private static OutputCollector makeCollector( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, OutputCollector outputCollector ) throws IOException { if( outputCollector != null ) return outputCollector; return new TapOutputCollector( flowProcess, tap ); }
public TapOutputCollector( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, String prefix, long sequence ) throws IOException { this.tap = tap; this.sequence = sequence; this.prefix = prefix == null || prefix.length() == 0 ? null : prefix; this.flowProcess = flowProcess; this.conf = this.flowProcess.getConfigCopy(); this.filenamePattern = this.conf.get( "cascading.tapcollector.partname", sequence == -1 ? PART_TASK_PATTERN : PART_TASK_SEQ_PATTERN ); initialize(); }
protected void initialize() throws IOException { tap.sinkConfInit( flowProcess, conf ); OutputFormat outputFormat = asJobConfInstance( conf ).getOutputFormat(); // todo: use OutputCommitter class isFileOutputFormat = outputFormat instanceof FileOutputFormat; if( isFileOutputFormat ) { Hadoop18TapUtil.setupJob( conf ); Hadoop18TapUtil.setupTask( conf ); int partition = conf.getInt( "mapred.task.partition", conf.getInt( "mapreduce.task.partition", 0 ) ); long localSequence = sequence == -1 ? 0 : sequence; if( prefix != null ) filename = String.format( filenamePattern, prefix, "/", partition, localSequence ); else filename = String.format( filenamePattern, "", "", partition, localSequence ); } LOG.info( "creating path: {}", filename ); writer = outputFormat.getRecordWriter( null, asJobConfInstance( conf ), filename, getReporter() ); }
protected void initialize() throws IOException { tap.sinkConfInit( flowProcess, conf ); OutputFormat outputFormat = asJobConfInstance( conf ).getOutputFormat(); // todo: use OutputCommitter class isFileOutputFormat = outputFormat instanceof FileOutputFormat; if( isFileOutputFormat ) { Hadoop18TapUtil.setupJob( conf ); Hadoop18TapUtil.setupTask( conf ); int partition = conf.getInt( "mapred.task.partition", conf.getInt( "mapreduce.task.partition", 0 ) ); long localSequence = sequence == -1 ? 0 : sequence; if( prefix != null ) filename = String.format( filenamePattern, prefix, "/", partition, localSequence ); else filename = String.format( filenamePattern, "", "", partition, localSequence ); } LOG.info( "creating path: {}", filename ); writer = outputFormat.getRecordWriter( null, asJobConfInstance( conf ), filename, getReporter() ); }
private static OutputCollector makeCollector( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, OutputCollector outputCollector ) throws IOException { if( outputCollector != null ) return outputCollector; return new TapOutputCollector( flowProcess, tap ); }
writer.close( getReporter() );
public TapOutputCollector( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, String prefix, long sequence ) throws IOException { this.tap = tap; this.sequence = sequence; this.prefix = prefix == null || prefix.length() == 0 ? null : prefix; this.flowProcess = flowProcess; this.conf = this.flowProcess.getConfigCopy(); this.filenamePattern = this.conf.get( "cascading.tapcollector.partname", sequence == -1 ? PART_TASK_PATTERN : PART_TASK_SEQ_PATTERN ); initialize(); }
@Override protected TupleEntrySchemeCollector createTupleEntrySchemeCollector( FlowProcess<? extends Configuration> flowProcess, Tap parent, String path, long sequence ) throws IOException { TapOutputCollector outputCollector = new TapOutputCollector( flowProcess, parent, path, sequence ); return new TupleEntrySchemeCollector<Configuration, OutputCollector>( flowProcess, parent, outputCollector ); }
writer.close( getReporter() );
@Override protected TupleEntrySchemeCollector createTupleEntrySchemeCollector( FlowProcess<? extends Configuration> flowProcess, Tap parent, String path, long sequence ) throws IOException { TapOutputCollector outputCollector = new TapOutputCollector( flowProcess, parent, path, sequence ); return new TupleEntrySchemeCollector<Configuration, OutputCollector>( flowProcess, parent, outputCollector ); }