public static Charset guessEncoding( File f, int bufferLength ) throws FileNotFoundException, IOException { FileInputStream fis = new FileInputStream( f ); byte[] buffer = new byte[bufferLength]; fis.read( buffer ); fis.close(); CharsetToolkit toolkit = new CharsetToolkit( buffer ); toolkit.setDefaultCharset( getDefaultSystemCharset() ); return toolkit.guessEncoding(); }
public static Charset guessEncoding( File f, int bufferLength, Charset defaultCharset ) throws FileNotFoundException, IOException { FileInputStream fis = new FileInputStream( f ); byte[] buffer = new byte[bufferLength]; fis.read( buffer ); fis.close(); CharsetToolkit toolkit = new CharsetToolkit( buffer ); toolkit.setDefaultCharset( defaultCharset ); return toolkit.guessEncoding(); }
if ( hasUTF8Bom( buffer ) ) { return Charset.forName( "UTF-8" ); if ( hasUTF16LEBom( buffer ) ) { return Charset.forName( "UTF-16LE" ); if ( hasUTF16BEBom( buffer ) ) { return Charset.forName( "UTF-16BE" ); if ( isTwoBytesSequence( b0 ) ) { if ( !isContinuationChar( b1 ) ) { validU8Char = false; } else { i++; } else if ( isThreeBytesSequence( b0 ) ) { if ( !( isContinuationChar( b1 ) && isContinuationChar( b2 ) ) ) { validU8Char = false; } else { i += 2; } else if ( isFourBytesSequence( b0 ) ) { if ( !( isContinuationChar( b1 ) && isContinuationChar( b2 ) && isContinuationChar( b3 ) ) ) { validU8Char = false; } else { i += 3;
/** * Constructor of the <code>com.glaforge.i18n.io.CharsetToolkit</code> utility class. * * @param buffer * the byte buffer of which we want to know the encoding. */ public CharsetToolkit( byte[] buffer ) { this.buffer = buffer; this.defaultCharset = getDefaultSystemCharset(); }
public static String guessEncodingName( File file ) throws FileNotFoundException, IOException { return guessEncoding( file, 4096 ).displayName(); }
public static String guessEncodingName( FileObject file ) throws FileNotFoundException, IOException { return guessEncodingName( new File( file.getName().getPathDecoded() ) ); }
/** * Defines the default <code>Charset</code> used in case the buffer represents an 8-bit <code>Charset</code>. * * @param defaultCharset * the default <code>Charset</code> to be returned by <code>guessEncoding()</code> if an 8-bit * <code>Charset</code> is encountered. */ public void setDefaultCharset( Charset defaultCharset ) { if ( defaultCharset != null ) { this.defaultCharset = defaultCharset; } else { this.defaultCharset = getDefaultSystemCharset(); } }
public static Charset guessEncoding( FileObject file, int bufferLength ) throws FileNotFoundException, IOException { return guessEncoding( new File( file.getName().getPathDecoded() ), bufferLength ); }
file = KettleVFS.getFileObject( metaA.getString( dataA ) ); throwsErrorOnFileNotFound( file ); encoding = CharsetToolkit.guessEncodingName( file ); } catch ( KettleFileNotFoundException e ) { if ( failIfNoFile ) {
public synchronized boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { meta = (TextFileOutputMeta) smi; data = (TextFileOutputData) sdi; if ( ( meta.getEncoding() == null ) || ( meta.getEncoding().isEmpty() ) ) { meta.setEncoding( CharsetToolkit.getDefaultSystemCharset().name() ); } Object[] row = getRow(); // This also waits for a row to be finished. if ( row != null && first ) { data.outputRowMeta = getInputRowMeta().clone(); } if ( first ) { initBinaryDataFields(); if ( data.outputRowMeta != null ) { initFieldNumbers( data.outputRowMeta, meta.getOutputFields() ); if ( row != null ) { meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); } } } return writeRowTo( row ); }