Refine search
@Override public void sinkPrepare(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { super.sinkPrepare(flowProcess, sinkCall); Object[] context = new Object[SINK_CTX_SIZE]; // the tuple is fixed, so we can just use a collection/index Settings settings = loadSettings(flowProcess.getConfigCopy(), false); context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields()); sinkCall.setContext(context); }
private void report(Stats stats, FlowProcess<Properties> flowProcess) { // report current stats for (Counter count : Counter.ALL) { flowProcess.increment(count, count.get(stats)); } }
if( tap.getIdentifier() == null ) throw new IllegalStateException( "tap may not have null identifier: " + tap.toString() ); streamedJobs[ i ] = flowProcess.copyConfig( conf ); streamedJobs[ i ].set( "cascading.step.source", Tap.id( tap ) ); tap.sourceConfInit( flowProcess, streamedJobs[ i ] ); JobConf accumulatedJob = flowProcess.copyConfig( conf ); tap.sourceConfInit( flowProcess, accumulatedJob ); Map<String, String> map = flowProcess.diffConfigIntoMap( conf, accumulatedJob ); conf.set( "cascading.node.accumulated.source.conf." + Tap.id( tap ), pack( map, conf ) );
private void initialize() throws IOException { // prevent collisions of configuration properties set client side if now cluster side String property = flowProcess.getStringProperty( "cascading.node.accumulated.source.conf." + Tap.id( tap ) ); if( property == null ) { // default behavior is to accumulate paths, so remove any set prior conf = HadoopUtil.removePropertiesFrom( conf, "mapred.input.dir", "mapreduce.input.fileinputformat.inputdir" ); // hadoop2 tap.sourceConfInit( flowProcess, conf ); } JobConf jobConf = asJobConfInstance( conf ); inputFormat = jobConf.getInputFormat(); if( inputFormat instanceof JobConfigurable ) ( (JobConfigurable) inputFormat ).configure( jobConf ); // do not test for existence, let hadoop decide how to handle the given path // this delegates globbing to the inputformat on split generation. splits = inputFormat.getSplits( jobConf, 1 ); if( splits.length == 0 ) complete = true; }
protected void initTaps( FlowProcess<Properties> flowProcess, Properties conf, Set<Tap> taps, boolean isSink ) { if( !taps.isEmpty() ) { for( Tap tap : taps ) { Properties confCopy = flowProcess.copyConfig( conf ); tapProperties.put( tap, confCopy ); // todo: store the diff, not the copy if( isSink ) tap.sinkConfInit( flowProcess, confCopy ); else tap.sourceConfInit( flowProcess, confCopy ); } } }
@Override protected SinkStage createSinkStage( Tap sink ) { String id = Tap.id( sink ); LogicalOutput logicalOutput = outputMap.get( id ); if( logicalOutput == null ) logicalOutput = outputMap.get( flowProcess.getStringProperty( "cascading.node.sink." + id ) ); if( logicalOutput == null ) throw new IllegalStateException( "could not find output for: " + sink ); return new TezSinkStage( flowProcess, sink, logicalOutput ); }
@Override public Object getProperty( String key ) { return delegate.getProperty( key ); }
/** * Constructor TapCollector creates a new TapCollector instance. * * @param flowProcess * of type FlowProcess * @param tap * of type Tap * @throws IOException * when fails to initialize */ public HBaseTapCollector(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap) throws IOException { super(flowProcess, tap.getScheme()); this.hadoopFlowProcess = flowProcess; this.tap = tap; this.conf = new JobConf(flowProcess.getConfigCopy()); this.setOutput(this); }
private void bridge( FlowProcess flowProcess, Object conf ) { childConfigs = new ArrayList<>(); for( int i = 0; i < getTaps().length; i++ ) { Tap tap = getTaps()[ i ]; Object newConfig = flowProcess.copyConfig( conf ); tap.sinkConfInit( flowProcess, newConfig ); childConfigs.add( flowProcess.diffConfigIntoMap( conf, newConfig ) ); } }
@Override public Config getConfig() { return delegate.getConfig(); }
@Override protected TupleEntrySchemeCollector createTupleEntrySchemeCollector( FlowProcess<? extends Properties> flowProcess, Tap parent, String path, long sequence ) throws IOException { if( sequence != -1 && flowProcess.getConfig() != null ) ( (LocalFlowProcess) FlowProcessWrapper.undelegate( flowProcess ) ).getConfig().setProperty( PART_NUM_PROPERTY, Long.toString( sequence ) ); if( parent instanceof TapWith ) return (TupleEntrySchemeCollector) ( (TapWith) parent ) .withChildIdentifier( path ) .withSinkMode( SinkMode.UPDATE ) .asTap().openForWrite( flowProcess ); TapFileOutputStream output = new TapFileOutputStream( parent, path, true ); // always append return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, parent, output ); }
/** * Method deleteResource deletes the resource represented by this instance. * * @param flowProcess of type FlowProcess * @return boolean * @throws IOException when the resource cannot be deleted */ public boolean deleteResource( FlowProcess<? extends Config> flowProcess ) throws IOException { return deleteResource( flowProcess.getConfig() ); }
private String getCurrentFile() { String result = flowProcess.getStringProperty( "mapreduce.map.input.file" ); if( result == null ) result = flowProcess.getStringProperty( "map.input.file" ); return result; }
/** * Method getFullIdentifier returns a fully qualified resource identifier. * * @param flowProcess of type FlowProcess * @return String */ public String getFullIdentifier( FlowProcess<? extends Config> flowProcess ) { return getFullIdentifier( flowProcess.getConfig() ); }
/** * Method resourceExists returns true if the path represented by this instance exists. * * @param flowProcess of type FlowProcess * @return true if the underlying resource already exists * @throws IOException when the status cannot be determined */ public boolean resourceExists( FlowProcess<? extends Config> flowProcess ) throws IOException { return resourceExists( flowProcess.getConfig() ); }
@Override protected String getCurrentIdentifier( FlowProcess<? extends Configuration> flowProcess ) { String identifier = flowProcess.getStringProperty( FileType.CASCADING_SOURCE_PATH ); // set on current split if( identifier == null ) { if( flowProcess.getBooleanProperty( HfsProps.COMBINE_INPUT_FILES, false ) ) throw new TapException( "combined input format support, via '" + HfsProps.COMBINE_INPUT_FILES + "', may not be enabled for use with the PartitionTap" ); throw new TapException( "unable to retrieve the current file being processed, '" + FileType.CASCADING_SOURCE_PATH + "' is not set" ); } return new Path( identifier ).getParent().toString(); // drop part-xxxx }
/** * Method createResource creates the underlying resource. * * @param flowProcess of type FlowProcess * @return boolean * @throws IOException when there is an error making directories */ public boolean createResource( FlowProcess<? extends Config> flowProcess ) throws IOException { return createResource( flowProcess.getConfig() ); }
/** * Method getModifiedTime returns the date this resource was last modified. * <p> * If the resource does not exist, returns zero (0). * <p> * If the resource is continuous, returns {@link Long#MAX_VALUE}. * * @param flowProcess of type FlowProcess * @return The date this resource was last modified. * @throws IOException */ public long getModifiedTime( FlowProcess<? extends Config> flowProcess ) throws IOException { return getModifiedTime( flowProcess.getConfig() ); }
public static void logCounters( Logger logger, String message, FlowProcess flowProcess ) { String counters = flowProcess.getStringProperty( FlowRuntimeProps.LOG_COUNTERS ); if( counters == null ) return; String[] split = counters.split( "," ); for( String value : split ) { String counter[] = value.split( ":" ); logger.info( "{} {}.{}={}", message, counter[ 0 ], counter[ 1 ], flowProcess.getCounterValue( counter[ 0 ], counter[ 1 ] ) ); } }
@Override public int getCurrentSliceNum() { return delegate.getCurrentSliceNum(); }