@Override public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) { super.setContext(context); postProcessFn.setContext(context); }
@Override public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) { super.setContext(context); reduceFn.setContext(context); }
@Override public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) { super.setContext(context); reduceFn.setContext(context); }
@Override public void initialize() { intern.setContext(getContext()); intern.initialize(); }
@Override public Iterable<T> read(TaskInputOutputContext<?, ?, ?, ?> context) throws IOException { fn.setContext(context); fn.initialize(); final Iterable<S> delegateIterable = delegate.read(context); return new Iterable<T>() { @Override public Iterator<T> iterator() { return new DoFnIterator<S, T>(delegateIterable.iterator(), fn); } }; } }
@Override public void initialize() { intern.setContext(getContext()); intern.initialize(); }
public void initialize(CrunchTaskContext ctxt) { if (emitter != null) { // Already initialized return; } fn.setContext(ctxt.getContext()); for (RTNode child : children) { child.initialize(ctxt); } if (outputConverter != null) { if (outputName != null) { this.emitter = new MultipleOutputEmitter( outputConverter, ctxt.getMultipleOutputs(), outputName); } else { this.emitter = new OutputEmitter( outputConverter, ctxt.getContext()); } } else if (!children.isEmpty()) { this.emitter = new IntermediateEmitter(children); } else { throw new CrunchRuntimeException("Invalid RTNode config: no emitter for: " + nodeName); } }
@Override public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type, ParallelDoOptions options) { InMemoryEmitter<Pair<K, V>> emitter = new InMemoryEmitter<Pair<K, V>>(); doFn.setContext(getInMemoryContext(getPipeline().getConfiguration())); doFn.initialize(); for (S s : collect) { doFn.process(s, emitter); } doFn.cleanup(emitter); return new MemTable<K, V>(emitter.getOutput(), type, name); }
@Override public <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type, ParallelDoOptions options) { InMemoryEmitter<T> emitter = new InMemoryEmitter<T>(); doFn.setContext(getInMemoryContext(getPipeline().getConfiguration())); doFn.initialize(); for (S s : collect) { doFn.process(s, emitter); } doFn.cleanup(emitter); return new MemCollection<T>(emitter.getOutput(), type, name); }
public void initialize(CrunchTaskContext ctxt) { if (emitter != null) { // Already initialized return; } fn.setContext(ctxt.getContext()); fn.initialize(); for (RTNode child : children) { child.initialize(ctxt); } if (outputConverter != null) { if (outputName != null) { this.emitter = new MultipleOutputEmitter(outputConverter, ctxt.getMultipleOutputs(), outputName); } else { this.emitter = new OutputEmitter(outputConverter, ctxt.getContext()); } } else if (!children.isEmpty()) { Configuration conf = ctxt.getContext().getConfiguration(); boolean disableDeepCopy = conf.getBoolean(RuntimeParameters.DISABLE_DEEP_COPY, false); this.emitter = new IntermediateEmitter(outputPType, children, conf, disableDeepCopy || fn.disableDeepCopy()); } else { throw new CrunchRuntimeException("Invalid RTNode config: no emitter for: " + nodeName); } }
public void initialize(DoFn<?, ?> fn, Integer tid) { if (context == null || !Objects.equal(lastTID, tid)) { TaskAttemptID attemptID; if (tid != null) { TaskID taskId = new TaskID(new JobID(jobName, 0), false, tid); attemptID = new TaskAttemptID(taskId, 0); lastTID = tid; } else { attemptID = new TaskAttemptID(); lastTID = null; } configureLocalFiles(); context = new MapContextImpl(getConfiguration(), attemptID, null, null, null, new SparkReporter(counters), null); } fn.setContext(context); fn.initialize(); }
public void initialize(DoFn<?, ?> fn, Integer tid) { if (context == null || !Objects.equal(lastTID, tid)) { TaskAttemptID attemptID; if (tid != null) { TaskID taskId = new TaskID(new JobID(jobName, 0), false, tid); attemptID = new TaskAttemptID(taskId, 0); lastTID = tid; } else { attemptID = new TaskAttemptID(); lastTID = null; } configureLocalFiles(); context = new MapContextImpl(getConfiguration(), attemptID, null, null, null, new SparkReporter(counters), null); } fn.setContext(context); fn.initialize(); }
@Override public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type, ParallelDoOptions options) { InMemoryEmitter<Pair<K, V>> emitter = new InMemoryEmitter<Pair<K, V>>(); Configuration conf = getPipeline().getConfiguration(); doFn.configure(conf); doFn.setContext(getInMemoryContext(conf)); doFn.initialize(); for (S s : collect) { doFn.process(s, emitter); } doFn.cleanup(emitter); return new MemTable<K, V>(emitter.getOutput(), type, name); }
public void initialize(CrunchTaskContext ctxt) { if (emitter != null) { // Already initialized return; } fn.setContext(ctxt.getContext()); fn.initialize(); for (RTNode child : children) { child.initialize(ctxt); } if (outputConverter != null) { if (outputName != null) { this.emitter = new MultipleOutputEmitter(outputConverter, ctxt.getMultipleOutputs(), outputName); } else { this.emitter = new OutputEmitter(outputConverter, ctxt.getContext()); } } else if (!children.isEmpty()) { this.emitter = new IntermediateEmitter(outputPType, children, ctxt.getContext().getConfiguration()); } else { throw new CrunchRuntimeException("Invalid RTNode config: no emitter for: " + nodeName); } }
@Override public <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type, ParallelDoOptions options) { doFn = verifySerializable(name, doFn); InMemoryEmitter<T> emitter = new InMemoryEmitter<T>(); Configuration conf = getPipeline().getConfiguration(); doFn.configure(conf); doFn.setContext(getInMemoryContext(conf)); doFn.initialize(); for (S s : collect) { doFn.process(s, emitter); } doFn.cleanup(emitter); return new MemCollection<T>(emitter.getOutput(), type, name); }