@Override public FlowProcess copyWith( JobConf jobConf ) { return new HadoopFlowProcess( this, jobConf ); }
@Override public FlowProcess copyWith( JobConf jobConf ) { return new HadoopFlowProcess( this, jobConf ); }
@Override public Fields retrieveSourceFields(FlowProcess<Properties> flowProcess, Tap tap) { return scheme.retrieveSourceFields(new HadoopFlowProcess(defaults), lfs); }
@Override public void sourceConfInit(FlowProcess<Properties> flowProcess, Tap<Properties, RecordReader, OutputCollector> tap, Properties conf) { JobConf jobConf = mergeDefaults("LocalScheme#sourceConfInit", conf, defaults); scheme.sourceConfInit(new HadoopFlowProcess(jobConf), lfs, jobConf); overwriteProperties(conf, jobConf); }
@Override public Fields retrieveSinkFields(FlowProcess<Properties> flowProcess, Tap tap) { return scheme.retrieveSinkFields(new HadoopFlowProcess(defaults), lfs); }
@Override public void sinkConfInit(FlowProcess<Properties> flowProcess, Tap<Properties, RecordReader, OutputCollector> tap, Properties conf) { JobConf jobConf = mergeDefaults("LocalScheme#sinkConfInit", conf, defaults); scheme.sinkConfInit(new HadoopFlowProcess(jobConf), lfs, jobConf); overwriteProperties(conf, jobConf); }
@Override public void presentSourceFields(FlowProcess<Properties> flowProcess, Tap tap, Fields fields) { scheme.presentSourceFields(new HadoopFlowProcess(defaults), lfs, fields); }
@Override public void presentSinkFields(FlowProcess<Properties> flowProcess, Tap tap, Fields fields) { scheme.presentSinkFields(new HadoopFlowProcess(defaults), lfs, fields); }
private TupleEntryIterator getHadoopTupleEntryIterator() throws IOException { @SuppressWarnings("unchecked") Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) source; JobConf conf = new JobConf(); FlowProcess<JobConf> flowProcess = new HadoopFlowProcess(conf); hadoopTap.sourceConfInit(flowProcess, conf); return hadoopTap.openForRead(flowProcess); }
@Override public FlowProcess<JobConf> getFlowProcess() { return new HadoopFlowProcess( getFlowSession(), getConfig() ); }
@Override public FlowProcess<JobConf> getFlowProcess() { return new HadoopFlowProcess( getFlowSession(), getConfig() ); }
@Override public TupleEntryCollector openTrapForWrite( Tap trap ) throws IOException { JobConf jobConf = HadoopUtil.copyJobConf( getJobConf() ); int stepNum = jobConf.getInt( "cascading.flow.step.num", 0 ); String partname; if( jobConf.getBoolean( "mapred.task.is.map", true ) ) partname = String.format( "-m-%05d-", stepNum ); else partname = String.format( "-r-%05d-", stepNum ); jobConf.set( "cascading.tapcollector.partname", "%s%spart" + partname + "%05d" ); return trap.openForWrite( new HadoopFlowProcess( this, jobConf ), null ); // do not honor sinkmode as this may be opened across tasks }
@Override public TupleEntryCollector openTrapForWrite( Tap trap ) throws IOException { JobConf jobConf = HadoopUtil.copyJobConf( getJobConf() ); int stepNum = jobConf.getInt( "cascading.flow.step.num", 0 ); String partname; if( jobConf.getBoolean( "mapred.task.is.map", true ) ) partname = String.format( "-m-%05d-", stepNum ); else partname = String.format( "-r-%05d-", stepNum ); jobConf.set( "cascading.tapcollector.partname", "%s%spart" + partname + "%05d" ); return trap.openForWrite( new HadoopFlowProcess( this, jobConf ), null ); // do not honor sinkmode as this may be opened across tasks }
@Override public FlowProcess getFlowProcess() { return new HadoopFlowProcess( FlowSession.NULL, getConfiguration(), true ); }
@Override public TupleEntryIterator openForRead(FlowProcess<Properties> flowProcess, RecordReader input) throws IOException { JobConf jobConf = mergeDefaults("LocalTap#openForRead", flowProcess.getConfigCopy(), defaults); return lfs.openForRead(new HadoopFlowProcess(jobConf)); }
@Override public FlowProcess getFlowProcess() { return new HadoopFlowProcess( FlowSession.NULL, getConfiguration(), true ); }
@Override public TupleEntryCollector openForWrite(FlowProcess<Properties> flowProcess, OutputCollector output) throws IOException { JobConf jobConf = mergeDefaults("LocalTap#openForWrite", flowProcess.getConfigCopy(), defaults); return lfs.openForWrite(new HadoopFlowProcess(jobConf)); }
public long getDuration( int size ) { long start = System.currentTimeMillis(); createPartitionTap( size ).sourceConfInit( new HadoopFlowProcess( new JobConf() ), new JobConf() ); return System.currentTimeMillis() - start; }
public long getDuration( int size ) { long start = System.currentTimeMillis(); createPartitionTap( size ).sourceConfInit( new HadoopFlowProcess( new JobConf() ), new JobConf() ); return System.currentTimeMillis() - start; }
private void writeToHadoopTap(Tap<?, ?, ?> tap) throws IOException { @SuppressWarnings("unchecked") Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) tap; JobConf conf = new JobConf(); HadoopFlowProcess flowProcess = new HadoopFlowProcess(conf); hadoopTap.sinkConfInit(flowProcess, conf); TupleEntryCollector collector = hadoopTap.openForWrite(flowProcess); for (TupleEntry tuple : data.asTupleEntryList()) { collector.add(tuple); } collector.close(); }