/** * Called during setup to pass the {@link TaskInputOutputContext} to * this {@code DoFn} instance. */ public void setContext(TaskInputOutputContext<?, ?, ?, ?> context) { this.context = context; initialize(); }
@Override public void initialize() { super.initialize(); postProcessFn.initialize(); leftValueType.initialize(getConfiguration()); }
@Override public void initialize() { intern.setContext(getContext()); intern.initialize(); }
@Override public void initialize() { super.initialize(); ptype.initialize(getConfiguration()); }
@Override public void initialize() { intern.setContext(getContext()); intern.initialize(); }
@Override public Iterable<T> read(TaskInputOutputContext<?, ?, ?, ?> context) throws IOException { fn.setContext(context); fn.initialize(); final Iterable<S> delegateIterable = delegate.read(context); return new Iterable<T>() { @Override public Iterator<T> iterator() { return new DoFnIterator<S, T>(delegateIterable.iterator(), fn); } }; } }
@Override public void initialize() { reduceFn.initialize(); valueType.initialize(getConfiguration() == null ? new Configuration() : getConfiguration()); }
@Override public void initialize() { reduceFn.initialize(); valueType.initialize(getConfiguration() == null ? new Configuration() : getConfiguration()); }
@Override public void initialize() { super.initialize(); bloomFilter = initializeFilter(getBloomFilterSize(getConfiguration())); }
@Override public void initialize() { super.initialize(); bloomFilter = new BloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH); ptype.initialize(getConfiguration()); keyToBytesFn = getKeyToBytesMapFn(ptype, getConfiguration()); }
@Override public void initialize() { super.initialize(); ReadableSourceTarget<Pair<K, V>> sourceTarget = (ReadableSourceTarget<Pair<K, V>>) ptype .getDefaultFileSource(getCacheFilePath()); Iterable<Pair<K, V>> iterable = null; try { iterable = sourceTarget.read(getConfiguration()); } catch (IOException e) { throw new CrunchRuntimeException("Error reading right-side of map side join: ", e); } joinMap = ArrayListMultimap.create(); for (Pair<K, V> joinPair : iterable) { joinMap.put(joinPair.first(), joinPair.second()); } }
@Override public void initialize() { super.initialize(); ReadableSourceTarget<Pair<K, V>> sourceTarget = (ReadableSourceTarget<Pair<K, V>>) ptype .getDefaultFileSource(getCacheFilePath()); Iterable<Pair<K, V>> iterable = null; try { iterable = sourceTarget.read(getConfiguration()); } catch (IOException e) { throw new CrunchRuntimeException("Error reading right-side of map side join: ", e); } joinMap = ArrayListMultimap.create(); for (Pair<K, V> joinPair : iterable) { joinMap.put(joinPair.first(), joinPair.second()); } }
@Override public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type) { InMemoryEmitter<Pair<K, V>> emitter = new InMemoryEmitter<Pair<K, V>>(); doFn.initialize(); for (S s : collect) { doFn.process(s, emitter); } doFn.cleanup(emitter); return new MemTable<K, V>(emitter.getOutput(), type, name); }
@Override public <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type) { InMemoryEmitter<T> emitter = new InMemoryEmitter<T>(); doFn.initialize(); for (S s : collect) { doFn.process(s, emitter); } doFn.cleanup(emitter); return new MemCollection<T>(emitter.getOutput(), type, name); }
@Override public void initialize() { super.initialize(); tableType.initialize(getConfiguration()); joinMap = ArrayListMultimap.create(); try { for (Pair<K, V> joinPair : readable.read(getContext())) { Pair<K, V> detachedPair = tableType.getDetachedValue(joinPair); joinMap.put(detachedPair.first(), detachedPair.second()); } } catch (IOException e) { throw new CrunchRuntimeException("Error reading map-side join data", e); } }
@Override public void initialize() { super.initialize(); if (getContext() == null) { this.ctxt = new Context<>(getConfiguration()); } else { this.ctxt = new Context<>(getContext()); } }
@Override public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type, ParallelDoOptions options) { InMemoryEmitter<Pair<K, V>> emitter = new InMemoryEmitter<Pair<K, V>>(); doFn.setContext(getInMemoryContext(getPipeline().getConfiguration())); doFn.initialize(); for (S s : collect) { doFn.process(s, emitter); } doFn.cleanup(emitter); return new MemTable<K, V>(emitter.getOutput(), type, name); }
@Override public <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type, ParallelDoOptions options) { InMemoryEmitter<T> emitter = new InMemoryEmitter<T>(); doFn.setContext(getInMemoryContext(getPipeline().getConfiguration())); doFn.initialize(); for (S s : collect) { doFn.process(s, emitter); } doFn.cleanup(emitter); return new MemCollection<T>(emitter.getOutput(), type, name); }
@Override public <K, V> PTable<K, V> parallelDo(String name, DoFn<S, Pair<K, V>> doFn, PTableType<K, V> type, ParallelDoOptions options) { InMemoryEmitter<Pair<K, V>> emitter = new InMemoryEmitter<Pair<K, V>>(); Configuration conf = getPipeline().getConfiguration(); doFn.configure(conf); doFn.setContext(getInMemoryContext(conf)); doFn.initialize(); for (S s : collect) { doFn.process(s, emitter); } doFn.cleanup(emitter); return new MemTable<K, V>(emitter.getOutput(), type, name); }
@Override public <T> PCollection<T> parallelDo(String name, DoFn<S, T> doFn, PType<T> type, ParallelDoOptions options) { doFn = verifySerializable(name, doFn); InMemoryEmitter<T> emitter = new InMemoryEmitter<T>(); Configuration conf = getPipeline().getConfiguration(); doFn.configure(conf); doFn.setContext(getInMemoryContext(conf)); doFn.initialize(); for (S s : collect) { doFn.process(s, emitter); } doFn.cleanup(emitter); return new MemCollection<T>(emitter.getOutput(), type, name); }