private void initialize() throws IOException { // prevent collisions of configuration properties set client side if now cluster side String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) ); if( property == null ) { // default behavior is to accumulate paths, so remove any set prior conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2 tap.sourceConfInit( flowProcess, conf ); } JobConf jobConf = asJobConfInstance( conf ); inputFormat = jobConf.getInputFormat(); if( inputFormat instanceof JobConfigurable ) ( (JobConfigurable) inputFormat ).configure( jobConf ); // do not test for existence, let hadoop decide how to handle the given path // this delegates globbing to the inputformat on split generation. splits = inputFormat.getSplits( jobConf, 1 ); if( splits.length == 0 ) complete = true; }
private void initialize() throws IOException { // prevent collisions of configuration properties set client side if now cluster side String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) ); if( property == null ) { // default behavior is to accumulate paths, so remove any set prior conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2 tap.sourceConfInit( flowProcess, conf ); } JobConf jobConf = asJobConfInstance( conf ); inputFormat = jobConf.getInputFormat(); if( inputFormat instanceof JobConfigurable ) ( (JobConfigurable) inputFormat ).configure( jobConf ); // do not test for existence, let hadoop decide how to handle the given path // this delegates globbing to the inputformat on split generation. splits = inputFormat.getSplits( jobConf, 1 ); if( splits.length == 0 ) complete = true; }