String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws KettleException { String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() ); String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() ); String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() ); try ( FileObject fileObject = KettleVFS.getFileObject( fileName, getTransMeta() ); BOMInputStream inputStream = new BOMInputStream( KettleVFS.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE ) ) { InputStreamReader reader = null; if ( Utils.isEmpty( realEncoding ) ) { reader = new InputStreamReader( inputStream ); } else { reader = new InputStreamReader( inputStream, realEncoding ); } EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() ); String line = TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ); String[] fieldNames = CsvInput.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() ); if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) { removeEnclosure( fieldNames, csvInputMeta.getEnclosure() ); } trimFieldNames( fieldNames ); return fieldNames; } catch ( IOException e ) { throw new KettleFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e ); } }
/** * Initialize for processing specified file. */ protected void init( String file ) throws Exception { meta.setFilename( getFile( file ).getURL().getFile() ); step = new CsvInput( stepMeta, null, 1, transMeta, trans ); step.init( meta, data ); step.addRowListener( rowListener ); }
/** * So as not to heap up list of taken parameters, we are passing combi, but we expect to see CsvInput class instances * in it's content. */ private int processRows( StepMetaDataCombi combi ) throws Exception { CsvInput csvInput = (CsvInput) combi.step; CsvInputData stepData = (CsvInputData) combi.data; CsvInputMeta stepMeta = (CsvInputMeta) combi.meta; final int[] writtenRows = { 0 }; csvInput.addRowListener( new RowAdapter() { @Override public void rowWrittenEvent( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException { writtenRows[ 0 ]++; } } ); boolean haveRowsToRead; do { haveRowsToRead = !csvInput.processRow( stepMeta, stepData ); } while ( !haveRowsToRead ); csvInput.dispose( stepMeta, stepData ); return writtenRows[ 0 ]; }
private void getFilenamesFromPreviousSteps() throws KettleException { List<String> filenames = new ArrayList<String>(); boolean firstRow = true; int index = -1; Object[] row = getRow(); while ( row != null ) { if ( firstRow ) { firstRow = false; // Get the filename field index... // String filenameField = environmentSubstitute( meta.getFilenameField() ); index = getInputRowMeta().indexOfValue( filenameField ); if ( index < 0 ) { throw new KettleException( BaseMessages.getString( PKG, "CsvInput.Exception.FilenameFieldNotFound", filenameField ) ); } } String filename = getInputRowMeta().getString( row, index ); filenames.add( filename ); // add it to the list... row = getRow(); // Grab another row... } data.filenames = filenames.toArray( new String[ filenames.size() ] ); logBasic( BaseMessages.getString( PKG, "CsvInput.Log.ReadingFromNrFiles", Integer .toString( data.filenames.length ) ) ); }
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); getFilenamesFromPreviousSteps(); prepareToRunInParallel(); if ( !openNextFile() ) { setOutputDone(); return false; // nothing to see here, move along... setOutputDone(); // stop reading return false; Object[] outputRowData = readOneRow( false, false ); // get row, set busy! if ( openNextFile() ) { return true; // try again on the next loop... } else { setOutputDone(); // last file, end here return false; putRow( data.outputRowMeta, outputRowData ); // copy row to possible alternate rowset(s). if ( checkFeedback( getLinesInput() ) ) { if ( log.isBasic() ) { logBasic( BaseMessages.getString( PKG, "CsvInput.Log.LineNumber", Long.toString( getLinesInput() ) ) ); if ( getStepMeta().isDoingErrorHandling() ) { StringBuilder errorDescriptions = new StringBuilder( 100 ); StringBuilder errorFields = new StringBuilder( 50 );
private void doTest( final String fileEncoding, final String stepEncoding, final String testData, final String delimiter, final boolean useHeader ) throws Exception { String testFilePath = createTestFile( fileEncoding, testData ).getAbsolutePath(); CsvInputMeta meta = createStepMeta( testFilePath, stepEncoding, delimiter, useHeader ); CsvInputData data = new CsvInputData(); CsvInput csvInput = new CsvInput( stepMockHelper.stepMeta, stepMockHelper.stepDataInterface, 0, stepMockHelper.transMeta, stepMockHelper.trans ); csvInput.init( meta, data ); csvInput.addRowListener( new RowAdapter() { @Override public void rowWrittenEvent( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException { for ( int i = 0; i < rowMeta.size(); i++ ) { Assert.assertEquals( "Value", row[ i ] ); } } } ); boolean haveRowsToRead; do { haveRowsToRead = !csvInput.processRow( meta, data ); } while ( !haveRowsToRead ); csvInput.dispose( meta, data ); Assert.assertEquals( 2, csvInput.getLinesWritten() ); }
data.fieldsMapping = createFieldMapping( data.filenames[data.filenr], meta ); FileObject fileObject = KettleVFS.getFileObject( data.filenames[ data.filenr ], getTransMeta() ); if ( !( fileObject instanceof LocalFile ) ) { if ( needToSkipRow() ) { new ResultFile( ResultFile.FILE_TYPE_GENERAL, fileObject, getTransMeta().getName(), toString() ); resultFile.setComment( "File was read by a Csv input step" ); addResultFile( resultFile ); readOneRow( true, false ); // skip this row. logBasic( BaseMessages.getString( PKG, "CsvInput.Log.HeaderRowSkipped", data.filenames[ data.filenr - 1 ] ) ); if ( data.fieldsMapping.size() == 0 ) { return false;
String realEncoding = environmentSubstitute( meta.getEncoding() ); data.preferredBufferSize = Integer.parseInt( environmentSubstitute( meta.getBufferSize() ) ); if ( getTransMeta().findNrPrevSteps( getStepMeta() ) == 0 ) { String filename = environmentSubstitute( meta.getFilename() ); logError( BaseMessages.getString( PKG, "CsvInput.MissingFilename.Message" ) ); return false; data.delimiter = data.encodingType.getBytes( environmentSubstitute( meta.getDelimiter() ), realEncoding ); data.enclosure = data.encodingType.getBytes( environmentSubstitute( meta.getEnclosure() ), realEncoding ); logError( BaseMessages.getString( PKG, "CsvInput.BadEncoding.Message" ), e ); return false; data.stepNumber = getUniqueStepNrAcrossSlaves(); data.totalNumberOfSteps = getUniqueStepCountAcrossSlaves();
public void doTest( String content, String enclosure ) throws Exception { RowSet output = new QueueRowSet(); File tmp = createTestFile( "utf-8", content ); try { CsvInputMeta meta = createMeta( tmp, createInputFileFields( "f1", "f2" ), enclosure ); CsvInputData data = new CsvInputData(); csvInput.init( meta, data ); csvInput.addRowSetToOutputRowSets( output ); try { csvInput.processRow( meta, data ); } finally { csvInput.dispose( meta, data ); } } finally { tmp.delete(); } Object[] row = output.getRowImmediate(); assertNotNull( row ); assertEquals( "value1", row[ 0 ] ); assertEquals( "value2", row[ 1 ] ); assertNull( output.getRowImmediate() ); }
@Override public StepInterface getStep( StepMeta stepMeta, StepDataInterface stepDataInterface, int cnr, TransMeta tr, Trans trans ) { return new CsvInput( stepMeta, stepDataInterface, cnr, tr, trans ); }
new StringBuilder( 1000 ) ); if ( !StringUtils.isBlank( line ) ) { fieldNames = CsvInput.guessStringsFromLine( getLogChannel(), line, delimiter, enclosure, meta.getEscapeCharacter() );
public void doTest( File file ) throws Exception { CsvInputData data = new CsvInputData(); CsvInputMeta meta = createMeta( file, createInputFileFields( "a", "b" ) ); List<Object[]> actual; try { csvInput.init( meta, data ); actual = TransTestingUtil.execute( csvInput, meta, data, 2, false ); } finally { csvInput.dispose( meta, data ); } List<Object[]> expected = Arrays.asList( new Object[] { "a", "b", 1L }, new Object[] { "a", null, 2L } ); TransTestingUtil.assertResult( expected, actual ); }
private void doTest( final String fileEncoding, final String stepEncoding, final String testData ) throws Exception { String testFilePath = createTestFile( fileEncoding, testData ).getAbsolutePath(); CsvInputMeta meta = createStepMeta( testFilePath, stepEncoding ); CsvInputData data = new CsvInputData(); CsvInput csvInput = new CsvInput( stepMockHelper.stepMeta, stepMockHelper.stepDataInterface, 0, stepMockHelper.transMeta, stepMockHelper.trans ); csvInput.init( meta, data ); csvInput.addRowListener( new RowAdapter() { @Override public void rowWrittenEvent( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException { for ( int i = 0; i < rowMeta.size(); i++ ) { assertEquals( "Value", row[ i ] ); } } } ); boolean haveRowsToRead; do { haveRowsToRead = !csvInput.processRow( meta, data ); } while ( !haveRowsToRead ); csvInput.dispose( meta, data ); assertEquals( 2, csvInput.getLinesWritten() ); }
public void doTest( String content, String[] expected ) throws Exception { RowSet output = new QueueRowSet(); File tmp = createTestFile( encoding, content ); try { CsvInputMeta meta = createMeta( tmp, createInputFileFields( "f1", "f2", "f3" ) ); CsvInputData data = new CsvInputData(); csvInput.init( meta, data ); csvInput.addRowSetToOutputRowSets( output ); try { csvInput.processRow( meta, data ); } finally { csvInput.dispose( meta, data ); } } finally { tmp.delete(); } Object[] row = output.getRowImmediate(); assertNotNull( row ); assertEquals( expected[0], row[0] ); assertEquals( expected[1], row[1] ); assertEquals( expected[2], row[2] ); assertNull( output.getRowImmediate() ); }
private CsvInput createCsvInput() { return new CsvInput( stepMockHelper.stepMeta, stepMockHelper.stepDataInterface, 0, stepMockHelper.transMeta, stepMockHelper.trans ); }
@Test ( expected = NullPointerException.class ) public void guessStringsFromLineWithEmptyLine() throws Exception { line = null; String[] saData = guessStringsFromLine( logChannelInterface, line, csvInputMeta.getDelimiter(), csvInputMeta.getEnclosure(), csvInputMeta.getEscapeCharacter() ); assertNull( saData ); for ( int i = 0; i < saData.length; i++ ) { return; } }
private void doTest( String content ) throws Exception { RowSet output = new QueueRowSet(); File tmp = createTestFile( "utf-8", content ); try { CsvInputMeta meta = createMeta( tmp, createInputFileFields( "f1", "f2", "f3" ) ); CsvInputData data = new CsvInputData(); csvInput.init( meta, data ); csvInput.addRowSetToOutputRowSets( output ); try { csvInput.processRow( meta, data ); } finally { csvInput.dispose( meta, data ); } } finally { tmp.delete(); } Object[] row = output.getRowImmediate(); assertNotNull( row ); assertEquals( "value1", row[ 0 ] ); assertEquals( "value2", row[ 1 ] ); assertEquals( "value3", row[ 2 ] ); assertNull( output.getRowImmediate() ); }
@Before public void setUp() throws Exception { System.setProperty( Const.KETTLE_EMPTY_STRING_DIFFERS_FROM_NULL, "Y" ); stepMockHelper = StepMockUtil .getStepMockHelper( CsvInputMeta.class, "Pdi15270Test" ); csvInput = new CsvInput( stepMockHelper.stepMeta, stepMockHelper.stepDataInterface, 0, stepMockHelper.transMeta, stepMockHelper.trans ); }
@Before public void setUp() throws Exception { stepMockHelper = StepMockUtil.getStepMockHelper( CsvInputMeta.class, "CsvInputRowNumberTest" ); csvInput = new CsvInput( stepMockHelper.stepMeta, stepMockHelper.stepDataInterface, 0, stepMockHelper.transMeta, stepMockHelper.trans ); }
@Before public void setUp() throws Exception { stepMockHelper = StepMockUtil.getStepMockHelper( CsvInputMeta.class, "CsvInputEnclosureTest" ); csvInput = new CsvInput( stepMockHelper.stepMeta, stepMockHelper.stepDataInterface, 0, stepMockHelper.transMeta, stepMockHelper.trans ); }