public CsvInputMeta() { super(); // allocate BaseStepMeta allocate( 0 ); }
FieldsMapping createFieldMapping( String fileName, CsvInputMeta csvInputMeta ) throws KettleException { FieldsMapping mapping = null; if ( csvInputMeta.isHeaderPresent() ) { String[] fieldNames = readFieldNamesFromFile( fileName, csvInputMeta ); mapping = NamedFieldsMapping.mapping( fieldNames, fieldNames( csvInputMeta ) ); } else { int fieldsCount = csvInputMeta.getInputFields() == null ? 0 : csvInputMeta.getInputFields().length; mapping = UnnamedFieldsMapping.mapping( fieldsCount ); } return mapping; }
private CsvInputMeta createMeta( File file, TextFileInputField[] fields ) { CsvInputMeta meta = new CsvInputMeta(); meta.setFilename( file.getAbsolutePath() ); meta.setDelimiter( delimiter ); meta.setEncoding( encoding ); meta.setEnclosure( enclosure ); meta.setBufferSize( "1024" ); meta.setInputFields( fields ); meta.setHeaderPresent( false ); return meta; } }
private CsvInputMeta createMeta( File file, TextFileInputField[] fields ) { CsvInputMeta meta = new CsvInputMeta(); meta.setFilename( file.getAbsolutePath() ); meta.setDelimiter( "," ); meta.setEncoding( "utf-8" ); meta.setBufferSize( "1024" ); meta.setInputFields( fields ); meta.setHeaderPresent( false ); meta.setRowNumField( "rownum" ); return meta; } }
inputMeta.setFilenameField( wFilenameField.getText() ); inputMeta.setIncludingFilename( wIncludeFilename.getSelection() ); } else { inputMeta.setFilename( wFilename.getText() ); inputMeta.setDelimiter( wDelimiter.getText() ); inputMeta.setEnclosure( wEnclosure.getText() ); inputMeta.setBufferSize( wBufferSize.getText() ); inputMeta.setLazyConversionActive( wLazyConversion.getSelection() ); inputMeta.setHeaderPresent( wHeaderPresent.getSelection() ); inputMeta.setRowNumField( wRowNumField.getText() ); inputMeta.setAddResultFile( wAddResult.getSelection() ); inputMeta.setRunningInParallel( wRunningInParallel.getSelection() ); inputMeta.setNewlinePossibleInFields( wNewlinePossible.getSelection() ); inputMeta.setEncoding( wEncoding.getText() ); inputMeta.allocate( nrNonEmptyFields ); inputMeta.getInputFields()[i] = new TextFileInputField(); inputMeta.getInputFields()[i].setName( item.getText( colnr++ ) ); inputMeta.getInputFields()[i].setType( ValueMetaFactory.getIdForValueMeta( item.getText( colnr++ ) ) ); inputMeta.getInputFields()[i].setFormat( item.getText( colnr++ ) ); inputMeta.getInputFields()[i].setLength( Const.toInt( item.getText( colnr++ ), -1 ) ); inputMeta.getInputFields()[i].setPrecision( Const.toInt( item.getText( colnr++ ), -1 ) ); inputMeta.getInputFields()[i].setCurrencySymbol( item.getText( colnr++ ) ); inputMeta.getInputFields()[i].setDecimalSymbol( item.getText( colnr++ ) ); inputMeta.getInputFields()[i].setGroupSymbol( item.getText( colnr++ ) ); inputMeta.getInputFields()[i].setTrimType( ValueMetaString.getTrimTypeByDesc( item.getText( colnr++ ) ) );
protected StepMeta createInputStep( TransMeta transMeta ) { CsvInputMeta csvInputMeta = new CsvInputMeta(); CsvFileInfo fileInfo = getModelInfo().getFileInfo(); csvInputMeta.setAddResultFile( false ); csvInputMeta.setBufferSize( "5000" ); //$NON-NLS-1$ csvInputMeta.setDelimiter( fileInfo.getDelimiter() ); csvInputMeta.setEnclosure( fileInfo.getEnclosure() ); csvInputMeta.setEncoding( fileInfo.getEncoding() ); csvInputMeta.setFilename( filename ); csvInputMeta.setFilenameField( null ); csvInputMeta.setHeaderPresent( fileInfo.getHeaderRows() > 0 ); csvInputMeta.setIncludingFilename( false ); csvInputMeta.setInputFields( inputFields ); csvInputMeta.setLazyConversionActive( true ); csvInputMeta.setRowNumField( "" ); //$NON-NLS-1$ csvInputMeta.setRunningInParallel( false );
wFilenameField.setText( Const.NVL( inputMeta.getFilenameField(), "" ) ); wIncludeFilename.setSelection( inputMeta.isIncludingFilename() ); } else { wFilename.setText( Const.NVL( inputMeta.getFilename(), "" ) ); wDelimiter.setText( Const.NVL( inputMeta.getDelimiter(), "" ) ); wEnclosure.setText( Const.NVL( inputMeta.getEnclosure(), "" ) ); wBufferSize.setText( Const.NVL( inputMeta.getBufferSize(), "" ) ); wLazyConversion.setSelection( inputMeta.isLazyConversionActive() ); wHeaderPresent.setSelection( inputMeta.isHeaderPresent() ); wRunningInParallel.setSelection( inputMeta.isRunningInParallel() ); wNewlinePossible.setSelection( inputMeta.isNewlinePossibleInFields() ); wRowNumField.setText( Const.NVL( inputMeta.getRowNumField(), "" ) ); wAddResult.setSelection( inputMeta.isAddResultFile() ); wEncoding.setText( Const.NVL( inputMeta.getEncoding(), "" ) ); for ( int i = 0; i < inputMeta.getInputFields().length; i++ ) { TextFileInputField field = inputMeta.getInputFields()[i]; final TableItem item = getTableItem( field.getName() );
@Test public void testClone() { final CsvInputMeta original = new CsvInputMeta(); original.setDelimiter( ";" ); original.setEnclosure( "'" ); final TextFileInputField[] originalFields = new TextFileInputField[ 1 ]; final TextFileInputField originalField = new TextFileInputField(); originalField.setName( "field" ); originalFields[ 0 ] = originalField; original.setInputFields( originalFields ); final CsvInputMeta clone = (CsvInputMeta) original.clone(); // verify that the clone and its input fields are "equal" to the originals, but not the same objects Assert.assertNotSame( original, clone ); Assert.assertEquals( original.getDelimiter(), clone.getDelimiter() ); Assert.assertEquals( original.getEnclosure(), clone.getEnclosure() ); Assert.assertNotSame( original.getInputFields(), clone.getInputFields() ); Assert.assertNotSame( original.getInputFields()[ 0 ], clone.getInputFields()[ 0 ] ); Assert.assertEquals( original.getInputFields()[ 0 ].getName(), clone.getInputFields()[ 0 ].getName() ); } }
protected void doInputWiring( Query query, TransMeta transMeta ) { // // CSV FILE LOCATION AND FIELDS // InlineEtlPhysicalModel physicalModel = (InlineEtlPhysicalModel) query.getLogicalModel().getPhysicalModel(); CsvInputMeta csvinput = (CsvInputMeta) getStepMeta( transMeta, "CSV file input" ).getStepMetaInterface(); //$NON-NLS-1$ // the file name might need to be translated to the correct location here if ( csvFileLoc != null ) { csvinput.setFilename( csvFileLoc + physicalModel.getFileLocation() ); } else { csvinput.setFilename( physicalModel.getFileLocation() ); } csvinput.setDelimiter( physicalModel.getDelimiter() ); csvinput.setEnclosure( physicalModel.getEnclosure() ); csvinput.setHeaderPresent( physicalModel.getHeaderPresent() ); // update fields LogicalTable table = query.getLogicalModel().getLogicalTables().get( 0 ); csvinput.allocate( table.getLogicalColumns().size() ); for ( int i = 0; i < csvinput.getInputFields().length; i++ ) { // Update csv input LogicalColumn col = table.getLogicalColumns().get( i ); csvinput.getInputFields()[i] = new TextFileInputField(); String fieldName = (String) col.getProperty( InlineEtlPhysicalColumn.FIELD_NAME ); if ( logger.isDebugEnabled() ) { logger.debug( "FROM CSV: " + fieldName ); //$NON-NLS-1$ } csvinput.getInputFields()[i].setName( fieldName ); csvinput.getInputFields()[i].setType( convertType( col.getDataType() ) ); } }
String realEncoding = environmentSubstitute( meta.getEncoding() ); data.preferredBufferSize = Integer.parseInt( environmentSubstitute( meta.getBufferSize() ) ); String filename = environmentSubstitute( meta.getFilename() ); data.delimiter = data.encodingType.getBytes( environmentSubstitute( meta.getDelimiter() ), realEncoding ); if ( Utils.isEmpty( meta.getEnclosure() ) ) { data.enclosure = null; } else { data.enclosure = data.encodingType.getBytes( environmentSubstitute( meta.getEnclosure() ), realEncoding ); data.isAddingRowNumber = !Utils.isEmpty( meta.getRowNumField() ); if ( meta.isRunningInParallel() ) { data.stepNumber = getUniqueStepNrAcrossSlaves(); data.totalNumberOfSteps = getUniqueStepCountAcrossSlaves();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); data.parallel = meta.isRunningInParallel() && data.totalNumberOfSteps > 1; if ( !Utils.isEmpty( meta.getFilenameField() ) && meta.isIncludingFilename() ) { data.filenameFieldIndex = meta.getInputFields().length; if ( !Utils.isEmpty( meta.getRowNumField() ) ) { data.rownumFieldIndex = meta.getInputFields().length; if ( data.filenameFieldIndex >= 0 ) { data.rownumFieldIndex++;
previewBusy.set( true ); CsvInputMeta meta = new CsvInputMeta(); getInfo( meta ); if ( Utils.isEmpty( meta.getFilename() ) ) { return; if ( Utils.isEmpty( meta.getInputFields() ) ) { return; meta.getFields( rowMeta, stepname, null, null, transMeta, repository, metaStore );
String[] readFieldNamesFromFile( String fileName, CsvInputMeta csvInputMeta ) throws KettleException { String delimiter = environmentSubstitute( csvInputMeta.getDelimiter() ); String enclosure = environmentSubstitute( csvInputMeta.getEnclosure() ); String realEncoding = environmentSubstitute( csvInputMeta.getEncoding() ); try ( FileObject fileObject = KettleVFS.getFileObject( fileName, getTransMeta() ); BOMInputStream inputStream = new BOMInputStream( KettleVFS.getInputStream( fileObject ), ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE ) ) { InputStreamReader reader = null; if ( Utils.isEmpty( realEncoding ) ) { reader = new InputStreamReader( inputStream ); } else { reader = new InputStreamReader( inputStream, realEncoding ); } EncodingType encodingType = EncodingType.guessEncodingType( reader.getEncoding() ); String line = TextFileInput.getLine( log, reader, encodingType, TextFileInputMeta.FILE_FORMAT_UNIX, new StringBuilder( 1000 ) ); String[] fieldNames = CsvInput.guessStringsFromLine( log, line, delimiter, enclosure, csvInputMeta.getEscapeCharacter() ); if ( !Utils.isEmpty( csvInputMeta.getEnclosure() ) ) { removeEnclosure( fieldNames, csvInputMeta.getEnclosure() ); } trimFieldNames( fieldNames ); return fieldNames; } catch ( IOException e ) { throw new KettleFileException( BaseMessages.getString( PKG, "CsvInput.Exception.CreateFieldMappingError" ), e ); } }
@Test public void lazyConversionTurnedOff() throws KettleException { KettleEnvironment.init(); TransMeta transMeta = new TransMeta(); CsvInputMeta csvInputMeta = new CsvInputMeta(); csvInputMeta.setLazyConversionActive( true ); StepMeta csvInput = new StepMeta( "Csv", csvInputMeta ); transMeta.addStep( csvInput ); TableInputMeta tableInputMeta = new TableInputMeta(); tableInputMeta.setLazyConversionActive( true ); StepMeta tableInput = new StepMeta( "Table", tableInputMeta ); transMeta.addStep( tableInput ); Transformation trans = TransMetaConverter.convert( transMeta ); TransMeta cloneMeta; String transMetaXml = (String) trans.getConfig().get( TransMetaConverter.TRANS_META_CONF_KEY ); Document doc; try { doc = XMLHandler.loadXMLString( transMetaXml ); Node stepNode = XMLHandler.getSubNode( doc, "transformation" ); cloneMeta = new TransMeta( stepNode, null ); } catch ( KettleXMLException | KettleMissingPluginsException e ) { throw new RuntimeException( e ); } assertThat( ( (CsvInputMeta) cloneMeta.findStep( "Csv" ).getStepMetaInterface() ).isLazyConversionActive(), is( false ) ); assertThat( ( (TableInputMeta) cloneMeta.findStep( "Table" ).getStepMetaInterface() ).isLazyConversionActive(), is( false ) ); }
@Test ( expected = NullPointerException.class ) public void guessStringsFromLineWithEmptyLine() throws Exception { line = null; String[] saData = guessStringsFromLine( logChannelInterface, line, csvInputMeta.getDelimiter(), csvInputMeta.getEnclosure(), csvInputMeta.getEscapeCharacter() ); assertNull( saData ); for ( int i = 0; i < saData.length; i++ ) { return; } }
@Override public void readRep( Repository rep, IMetaStore metaStore, ObjectId id_step, List<DatabaseMeta> databases ) throws KettleException { try { filename = rep.getStepAttributeString( id_step, getRepCode( "FILENAME" ) ); filenameField = rep.getStepAttributeString( id_step, getRepCode( "FILENAME_FIELD" ) ); rowNumField = rep.getStepAttributeString( id_step, getRepCode( "ROW_NUM_FIELD" ) ); includingFilename = rep.getStepAttributeBoolean( id_step, getRepCode( "INCLUDE_FILENAME" ) ); delimiter = rep.getStepAttributeString( id_step, getRepCode( "DELIMITER" ) ); enclosure = rep.getStepAttributeString( id_step, getRepCode( "ENCLOSURE" ) ); headerPresent = rep.getStepAttributeBoolean( id_step, getRepCode( "HEADER_PRESENT" ) ); bufferSize = rep.getStepAttributeString( id_step, getRepCode( "BUFFERSIZE" ) ); lazyConversionActive = rep.getStepAttributeBoolean( id_step, getRepCode( "LAZY_CONVERSION" ) ); isaddresult = rep.getStepAttributeBoolean( id_step, getRepCode( "ADD_FILENAME_RESULT" ) ); runningInParallel = rep.getStepAttributeBoolean( id_step, getRepCode( "PARALLEL" ) ); newlinePossibleInFields = rep.getStepAttributeBoolean( id_step, 0, getRepCode( "NEWLINE_POSSIBLE" ), !runningInParallel ); encoding = rep.getStepAttributeString( id_step, getRepCode( "ENCODING" ) ); int nrfields = rep.countNrStepAttributes( id_step, getRepCode( "FIELD_NAME" ) ); allocate( nrfields ); inputFields[i].setName( rep.getStepAttributeString( id_step, i, getRepCode( "FIELD_NAME" ) ) ); inputFields[i].setType( ValueMetaFactory.getIdForValueMeta( rep.getStepAttributeString( id_step, i, getRepCode( "FIELD_TYPE" ) ) ) ); inputFields[i].setFormat( rep.getStepAttributeString( id_step, i, getRepCode( "FIELD_FORMAT" ) ) ); inputFields[i] .setCurrencySymbol( rep.getStepAttributeString( id_step, i, getRepCode( "FIELD_CURRENCY" ) ) ); inputFields[i].setDecimalSymbol( rep.getStepAttributeString( id_step, i, getRepCode( "FIELD_DECIMAL" ) ) ); inputFields[i].setGroupSymbol( rep.getStepAttributeString( id_step, i, getRepCode( "FIELD_GROUP" ) ) );
private void readData( Node stepnode ) throws KettleXMLException { try { filename = XMLHandler.getTagValue( stepnode, getXmlCode( "FILENAME" ) ); filenameField = XMLHandler.getTagValue( stepnode, getXmlCode( "FILENAME_FIELD" ) ); rowNumField = XMLHandler.getTagValue( stepnode, getXmlCode( "ROW_NUM_FIELD" ) ); includingFilename = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, getXmlCode( "INCLUDE_FILENAME" ) ) ); delimiter = XMLHandler.getTagValue( stepnode, getXmlCode( "DELIMITER" ) ); enclosure = XMLHandler.getTagValue( stepnode, getXmlCode( "ENCLOSURE" ) ); bufferSize = XMLHandler.getTagValue( stepnode, getXmlCode( "BUFFERSIZE" ) ); headerPresent = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, getXmlCode( "HEADER_PRESENT" ) ) ); lazyConversionActive = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, getXmlCode( "LAZY_CONVERSION" ) ) ); isaddresult = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, getXmlCode( "ADD_FILENAME_RESULT" ) ) ); runningInParallel = "Y".equalsIgnoreCase( XMLHandler.getTagValue( stepnode, getXmlCode( "PARALLEL" ) ) ); String nlp = XMLHandler.getTagValue( stepnode, getXmlCode( "NEWLINE_POSSIBLE" ) ); if ( Utils.isEmpty( nlp ) ) { if ( runningInParallel ) { encoding = XMLHandler.getTagValue( stepnode, getXmlCode( "ENCODING" ) ); Node fields = XMLHandler.getSubNode( stepnode, getXmlCode( "FIELDS" ) ); int nrfields = XMLHandler.countNodes( fields, getXmlCode( "FIELD" ) ); allocate( nrfields ); Node fnode = XMLHandler.getSubNodeByNr( fields, getXmlCode( "FIELD" ), i ); inputFields[i].setName( XMLHandler.getTagValue( fnode, getXmlCode( "FIELD_NAME" ) ) ); inputFields[i].setType(
@Test public void testMultiCharDelimOptions() throws Exception { meta.setDelimiter( "|||" ); init( "multi_delim.csv" ); setFields( new TextFileInputField( "Field 1", -1, -1 ), new TextFileInputField( "Field 2", -1, -1 ), new TextFileInputField( "Field 3", -1, -1 ) ); process(); check( new Object[][] { { "first", "1", "1.1" }, { "second", "2", "2.2" }, { "third", "3", "3.3" }, { "\u043d\u0435-\u043b\u0430\u0446\u0456\u043d\u043a\u0430(non-latin)", "4", "4" } } ); }
static String[] fieldNames( CsvInputMeta csvInputMeta ) { TextFileInputField[] fields = csvInputMeta.getInputFields(); String[] fieldNames = new String[fields.length]; for ( int i = 0; i < fields.length; i++ ) { // We need to sanitize field names because existing ktr files may contain field names with leading BOM fieldNames[i] = EncodingType.removeBOMIfPresent( fields[i].getName() ); } return fieldNames; }
private static void disableLazyConversion( TransMeta transMeta ) { transMeta.getSteps().stream().filter( step -> "CsvInput".equals( step.getStepID() ) ) .forEach( step -> ( (CsvInputMeta) step.getStepMetaInterface() ).setLazyConversionActive( false ) ); transMeta.getSteps().stream().filter( step -> "TableInput".equals( step.getStepID() ) ) .forEach( step -> ( (TableInputMeta) step.getStepMetaInterface() ).setLazyConversionActive( false ) ); }