@Override MapReduceFlow createFlow() { Configuration c = configuration; if (job != null) { c = ConfigurationUtils.merge(configuration, job.getConfiguration()); } Configuration conf = ConfigurationUtils.createFrom(c, properties); JobConf jobConf = new JobConf(conf); return new MapReduceFlow(beanName, jobConf, deleteSinkOnInit, false); }
protected void initializeFrom( JobConf jobConf ) { setSources( createSources( jobConf ) ); setSinks( createSinks( jobConf ) ); setTraps( createTraps( jobConf ) ); setFlowStepGraph( makeStepGraph( jobConf ) ); // this mirrors BaseFlow#initialize() initSteps(); this.flowStats = createPrepareFlowStats(); // must be last initializeNewJobsMap(); initializeChildStats(); } }
/** * Constructor MapReduceFlow creates a new MapReduceFlow instance. * * @param properties of type Properties * @param name of type String * @param jobConf of type JobConf * @param flowDescriptor of type Map * @param deleteSinkOnInit of type boolean * @param stopJobsOnExit of type boolean */ @ConstructorProperties({"properties", "name", "jobConf", "flowDescriptor", "deleteSinkOnInit", "stopJobsOnExit"}) public MapReduceFlow( Properties properties, String name, JobConf jobConf, Map<String, String> flowDescriptor, boolean deleteSinkOnInit, boolean stopJobsOnExit ) { super( HadoopUtil.getPlatformInfo( JobConf.class, "org/apache/hadoop", "Hadoop MR" ), properties, jobConf, name, flowDescriptor, deleteSinkOnInit ); this.stopJobsOnExit = stopJobsOnExit; initializeFrom( jobConf ); // push off initialization allowing for overrides }
protected void initializeFrom( JobConf jobConf ) { setSources( createSources( jobConf ) ); setSinks( createSinks( jobConf ) ); setTraps( createTraps( jobConf ) ); setFlowStepGraph( makeStepGraph( jobConf ) ); // this mirrors BaseFlow#initialize() initSteps(); this.flowStats = createPrepareFlowStats(); // must be last initializeNewJobsMap(); initializeChildStats(); } }
FileOutputFormat.setOutputPath( firstConf, new Path( remove( sinkPath1, true ) ) ); Flow firstMR = new MapReduceFlow( firstConf, true ); FileOutputFormat.setOutputPath( secondConf, new Path( remove( sinkPath2, true ) ) ); Flow secondMR = new MapReduceFlow( secondConf, true ); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath( job, new Path( remove( sinkPath3, true ) ) ); Flow thirdMR = new MapReduceFlow( new JobConf( job.getConfiguration() ), true );
/** * Constructor MapReduceFlow creates a new MapReduceFlow instance. * * @param properties of type Properties * @param name of type String * @param jobConf of type JobConf * @param flowDescriptor of type Map * @param deleteSinkOnInit of type boolean * @param stopJobsOnExit of type boolean */ @ConstructorProperties({"properties", "name", "jobConf", "flowDescriptor", "deleteSinkOnInit", "stopJobsOnExit"}) public MapReduceFlow( Properties properties, String name, JobConf jobConf, Map<String, String> flowDescriptor, boolean deleteSinkOnInit, boolean stopJobsOnExit ) { super( HadoopUtil.getPlatformInfo( JobConf.class, "org/apache/hadoop", "Hadoop MR" ), properties, jobConf, name, flowDescriptor, deleteSinkOnInit ); this.stopJobsOnExit = stopJobsOnExit; initializeFrom( jobConf ); // push off initialization allowing for overrides }
FileOutputFormat.setOutputPath( firstConf, new Path( remove( sinkPath1, true ) ) ); Flow firstMR = new MapReduceFlow( firstConf, true ); FileOutputFormat.setOutputPath( secondConf, new Path( remove( sinkPath2, true ) ) ); Flow secondMR = new MapReduceFlow( secondConf, true ); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath( job, new Path( remove( sinkPath3, true ) ) ); Flow thirdMR = new MapReduceFlow( new JobConf( job.getConfiguration() ), true );
@Test public void testFlow() throws IOException { getPlatform().copyFromLocal( inputFileApache ); JobConf defaultConf = (JobConf) ( (BaseHadoopPlatform) getPlatform() ).getConfiguration(); JobConf conf = new JobConf( defaultConf ); conf.setJobName( "mrflow" ); conf.setOutputKeyClass( LongWritable.class ); conf.setOutputValueClass( Text.class ); conf.setMapperClass( IdentityMapper.class ); conf.setReducerClass( IdentityReducer.class ); conf.setInputFormat( TextInputFormat.class ); conf.setOutputFormat( TextOutputFormat.class ); FileInputFormat.setInputPaths( conf, new Path( inputFileApache ) ); String outputPath = getOutputPath( "flowTest" ); FileOutputFormat.setOutputPath( conf, new Path( outputPath ) ); Flow flow = new MapReduceFlow( "mrflow", conf, true ); validateLength( new Hfs( new TextLine(), inputFileApache ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 ); flow.complete(); validateLength( new Hfs( new TextLine(), outputPath ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 ); }
@Test public void testFlow() throws IOException { getPlatform().copyFromLocal( inputFileApache ); JobConf defaultConf = (JobConf) ( (BaseHadoopPlatform) getPlatform() ).getConfiguration(); JobConf conf = new JobConf( defaultConf ); conf.setJobName( "mrflow" ); conf.setOutputKeyClass( LongWritable.class ); conf.setOutputValueClass( Text.class ); conf.setMapperClass( IdentityMapper.class ); conf.setReducerClass( IdentityReducer.class ); conf.setInputFormat( TextInputFormat.class ); conf.setOutputFormat( TextOutputFormat.class ); FileInputFormat.setInputPaths( conf, new Path( inputFileApache ) ); String outputPath = getOutputPath( "flowTest" ); FileOutputFormat.setOutputPath( conf, new Path( outputPath ) ); Flow flow = new MapReduceFlow( "mrflow", conf, true ); validateLength( new Hfs( new TextLine(), inputFileApache ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 ); flow.complete(); validateLength( new Hfs( new TextLine(), outputPath ).openForRead( new HadoopFlowProcess( defaultConf ) ), 10 ); }