@Override public void sinkPrepare(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { super.sinkPrepare(flowProcess, sinkCall); Object[] context = new Object[SINK_CTX_SIZE]; // the tuple is fixed, so we can just use a collection/index Settings settings = loadSettings(flowProcess.getConfigCopy(), false); context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields()); sinkCall.setContext(context); }
@Override public void sinkPrepare(FlowProcess<Properties> flowProcess, SinkCall<Object[], Object> sinkCall) throws IOException { super.sinkPrepare(flowProcess, sinkCall); Object[] context = new Object[SINK_CTX_SIZE]; Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props); context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields()); sinkCall.setContext(context); }
@Override public void sourcePrepare(FlowProcess<Properties> flowProcess, SourceCall<Object[], ScrollQuery> sourceCall) throws IOException { super.sourcePrepare(flowProcess, sourceCall); Object[] context = new Object[SRC_CTX_SIZE]; Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props); context[SRC_CTX_ALIASES] = CascadingUtils.alias(settings); context[SRC_CTX_OUTPUT_JSON] = settings.getOutputAsJson(); sourceCall.setContext(context); }
@Override public void sourcePrepare(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException { super.sourcePrepare(flowProcess, sourceCall); Object[] context = new Object[SRC_CTX_SIZE]; context[SRC_CTX_KEY] = sourceCall.getInput().createKey(); context[SRC_CTX_VALUE] = sourceCall.getInput().createValue(); // as the tuple _might_ vary (some objects might be missing), we use a map rather then a collection Settings settings = loadSettings(flowProcess.getConfigCopy(), true); context[SRC_CTX_ALIASES] = CascadingUtils.alias(settings); context[SRC_CTX_OUTPUT_JSON] = settings.getOutputAsJson(); sourceCall.setContext(context); }
if (input == null) { Settings settings = CascadingUtils.addDefaultsToSettings(CascadingUtils.extractOriginalProperties(flowProcess.getConfigCopy()), tapProperties, log);
@Override public Config getConfigCopy() { return delegate.getConfigCopy(); }
/** * Constructor TapIterator creates a new TapIterator instance. * * @throws IOException when */ public MultiRecordReaderIterator( FlowProcess<? extends Configuration> flowProcess, Tap tap ) throws IOException { this.flowProcess = flowProcess; this.tap = tap; this.conf = flowProcess.getConfigCopy(); initialize(); }
/** * Constructor TapIterator creates a new TapIterator instance. * * @throws IOException when */ public MultiRecordReaderIterator( FlowProcess<? extends Configuration> flowProcess, Tap tap ) throws IOException { this.flowProcess = flowProcess; this.tap = tap; this.conf = flowProcess.getConfigCopy(); initialize(); }
public TapOutputCollector( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, String prefix, long sequence ) throws IOException { this.tap = tap; this.sequence = sequence; this.prefix = prefix == null || prefix.length() == 0 ? null : prefix; this.flowProcess = flowProcess; this.conf = this.flowProcess.getConfigCopy(); this.filenamePattern = this.conf.get( "cascading.tapcollector.partname", sequence == -1 ? PART_TASK_PATTERN : PART_TASK_SEQ_PATTERN ); initialize(); }
@Override public void prepare(FlowProcess flowProcess, OperationCall operationCall) { super.prepare(flowProcess, operationCall); tupleSerializationUtil = new TupleSerializationUtil((JobConf) flowProcess.getConfigCopy()); }
@Override public TupleEntryCollector openForWrite(FlowProcess<Properties> flowProcess, OutputCollector output) throws IOException { JobConf jobConf = mergeDefaults("LocalTap#openForWrite", flowProcess.getConfigCopy(), defaults); return lfs.openForWrite(new HadoopFlowProcess(jobConf)); }
@Override public TupleEntryIterator openForRead(FlowProcess<Properties> flowProcess, RecordReader input) throws IOException { JobConf jobConf = mergeDefaults("LocalTap#openForRead", flowProcess.getConfigCopy(), defaults); return lfs.openForRead(new HadoopFlowProcess(jobConf)); }
private List<Footer> getFooters(FlowProcess<? extends JobConf> flowProcess, Hfs hfs) throws IOException { JobConf jobConf = flowProcess.getConfigCopy(); DeprecatedParquetInputFormat format = new DeprecatedParquetInputFormat(); format.addInputPath(jobConf, hfs.getPath()); return format.getFooters(jobConf); }
private List<Footer> getFooters(FlowProcess<JobConf> flowProcess, Hfs hfs) throws IOException { JobConf jobConf = flowProcess.getConfigCopy(); DeprecatedParquetInputFormat format = new DeprecatedParquetInputFormat(); format.addInputPath(jobConf, hfs.getPath()); return format.getFooters(jobConf); }
private List<Footer> getFooters(FlowProcess<JobConf> flowProcess, Hfs hfs) throws IOException { JobConf jobConf = flowProcess.getConfigCopy(); DeprecatedParquetInputFormat format = new DeprecatedParquetInputFormat(); format.addInputPath(jobConf, hfs.getPath()); return format.getFooters(jobConf); }
private List<Footer> getFooters(FlowProcess<? extends JobConf> flowProcess, Hfs hfs) throws IOException { JobConf jobConf = flowProcess.getConfigCopy(); DeprecatedParquetInputFormat format = new DeprecatedParquetInputFormat(); format.addInputPath(jobConf, hfs.getPath()); return format.getFooters(jobConf); }
@Override public void sinkPrepare(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { super.sinkPrepare(flowProcess, sinkCall); Object[] context = new Object[SINK_CTX_SIZE]; // the tuple is fixed, so we can just use a collection/index Settings settings = loadSettings(flowProcess.getConfigCopy(), false); context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields()); sinkCall.setContext(context); }
@Override public void sinkPrepare(FlowProcess<Properties> flowProcess, SinkCall<Object[], Object> sinkCall) throws IOException { super.sinkPrepare(flowProcess, sinkCall); Object[] context = new Object[SINK_CTX_SIZE]; Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props); context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields()); sinkCall.setContext(context); }
public void prepare(FlowProcess flowProcess, OperationCall operationCall) { JobConf conf = (JobConf)flowProcess.getConfigCopy(); int maxHashes = BloomProps.getMaxBloomHashes(conf); long numBits = BloomProps.getNumBloomBits(conf); int numSplits = BloomProps.getNumSplits(conf); splitSize = BloomUtil.getSplitSize(numBits, numSplits); function = factory.getFunction(numBits, maxHashes); approxCounter = new HyperLogLog(BloomProps.getHllErr(conf)); hashResult = new long[maxHashes]; }
@Override public void sourcePrepare(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException { super.sourcePrepare(flowProcess, sourceCall); Object[] context = new Object[SRC_CTX_SIZE]; context[SRC_CTX_KEY] = sourceCall.getInput().createKey(); context[SRC_CTX_VALUE] = sourceCall.getInput().createValue(); // as the tuple _might_ vary (some objects might be missing), we use a map rather then a collection Settings settings = loadSettings(flowProcess.getConfigCopy(), true); context[SRC_CTX_ALIASES] = CascadingUtils.alias(settings); context[SRC_CTX_OUTPUT_JSON] = settings.getOutputAsJson(); sourceCall.setContext(context); }