cascading.tap.hadoop.io java code examples

@Override
public TupleEntryIterator openForRead(FlowProcess<JobConf> flowProcess, RecordReader input) throws IOException {
  return new HadoopTupleEntrySchemeIterator(flowProcess, this, input);
}

@Override
public TupleEntryCollector openForWrite(FlowProcess<JobConf> flowProcess, OutputCollector output) throws IOException {
  return new HadoopTupleEntrySchemeCollector(flowProcess, this, output);
}

private static CloseableIterator<RecordReader> makeIterator( FlowProcess<? extends Configuration> flowProcess, Tap parentTap, RecordReader recordReader ) throws IOException
 {
 if( recordReader != null )
  return new RecordReaderIterator( recordReader );
 return new MultiRecordReaderIterator( flowProcess, parentTap );
 }

@Override
public FileStatus[] globStatus( Path path, PathFilter pathFilter ) throws IOException
 {
 FileStatus fileStatus = getFileStatus( path );
 if( fileStatus == null )
  return null;
 return new FileStatus[]{fileStatus};
 }

/**
 * Method hasNext returns true if there more {@link Tuple} instances available.
 *
 * @return boolean
 */
public boolean hasNext()
 {
 getNextReader();
 return !complete;
 }

@Override
public FileStatus[] listStatus( Path path ) throws IOException
 {
 return new FileStatus[]{getFileStatus( path )};
 }

/**
 * Constructor FSDigestInputStream creates a new FSDigestInputStream instance.
 *
 * @param inputStream of type InputStream
 * @param digestHex   of type String
 * @throws IOException if unable to get md5 digest
 */
public FSDigestInputStream( InputStream inputStream, String digestHex ) throws IOException
 {
 this( inputStream, getMD5Digest(), digestHex );
 }

private static OutputCollector makeCollector( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, OutputCollector outputCollector ) throws IOException
 {
 if( outputCollector != null )
  return outputCollector;
 return new TapOutputCollector( flowProcess, tap );
 }

@Override
public TupleEntryIterator openForRead( FlowProcess<? extends Configuration> flowProcess, RecordReader input ) throws IOException
 {
 // input may be null when this method is called on the client side or cluster side when accumulating
 // for a HashJoin
 return new HadoopTupleEntrySchemeIterator( flowProcess, this, input );
 }

@Override
public TupleEntryCollector openForWrite(FlowProcess<JobConf> flowProcess, OutputCollector output) throws IOException {
  return new HadoopTupleEntrySchemeCollector(flowProcess, this, output);
}

private static CloseableIterator<RecordReader> makeIterator( FlowProcess<? extends Configuration> flowProcess, Tap parentTap, RecordReader recordReader ) throws IOException
 {
 if( recordReader != null )
  return new RecordReaderIterator( recordReader );
 return new MultiRecordReaderIterator( flowProcess, parentTap );
 }

@Override
public FileStatus[] globStatus( Path path, PathFilter pathFilter ) throws IOException
 {
 FileStatus fileStatus = getFileStatus( path );
 if( fileStatus == null )
  return null;
 return new FileStatus[]{fileStatus};
 }

/**
 * Method hasNext returns true if there more {@link Tuple} instances available.
 *
 * @return boolean
 */
public boolean hasNext()
 {
 getNextReader();
 return !complete;
 }

@Override
public FileStatus[] listStatus( Path path ) throws IOException
 {
 return new FileStatus[]{getFileStatus( path )};
 }

/**
 * Constructor FSDigestInputStream creates a new FSDigestInputStream instance.
 *
 * @param inputStream of type InputStream
 * @param digestHex   of type String
 * @throws IOException if unable to get md5 digest
 */
public FSDigestInputStream( InputStream inputStream, String digestHex ) throws IOException
 {
 this( inputStream, getMD5Digest(), digestHex );
 }

private static OutputCollector makeCollector( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, OutputCollector outputCollector ) throws IOException
 {
 if( outputCollector != null )
  return outputCollector;
 return new TapOutputCollector( flowProcess, tap );
 }

@Override
public TupleEntryIterator openForRead( FlowProcess<? extends Configuration> flowProcess, RecordReader input ) throws IOException
 {
 // input may be null when this method is called on the client side or cluster side when accumulating
 // for a HashJoin
 return new HadoopTupleEntrySchemeIterator( flowProcess, this, input );
 }

@Override
public TupleEntryIterator openForRead( FlowProcess<JobConf> flowProcess, RecordReader<TupleWrapper,
  NullWritable> input ) throws IOException {
  // input may be null when this method is called on the client side or cluster side when accumulating
  // for a HashJoin
  return new HadoopTupleEntrySchemeIterator( flowProcess, this, input );
}

@Override
public TupleEntryIterator openForRead( FlowProcess<? extends Configuration> flowProcess, RecordReader input ) throws IOException
 {
 // input may be null when this method is called on the client side or
 // cluster side when accumulating for a HashJoin
 return new HadoopTupleEntrySchemeIterator( flowProcess, this, input );
 }

@Override
public TupleEntryIterator openForRead(FlowProcess<JobConf> jobConfFlowProcess, RecordReader recordReader) throws IOException {
 return new HadoopTupleEntrySchemeIterator(jobConfFlowProcess, this, recordReader);
}

How to use cascading.tap.hadoop.io

Best Java code snippets using cascading.tap.hadoop.io (Showing top 20 results out of 315)