public SparkStateContext( Settings settings, Serializer serializer, int listStorageMaxElements) { this.provider = new SparkStorageProvider(serializer, listStorageMaxElements); this.spillTools = new SparkSpillTools(serializer, settings); }
@Override public Executor create(Class<?>... classes) { final SparkExecutor.Builder builder = SparkExecutor .newBuilder("euphoria-example") .registerKryoClasses(classes); if (test) { return builder.local().build(); } else { return builder.build(); } } }
public KeyedWindow<W, K> toKeyedWindow() { return new KeyedWindow<>(window(), timestamp(), key()); } }
@Override default ExecutorEnvironment newExecutorEnvironment() throws Exception { final Executor executor = SparkExecutor.newBuilder("test") .local() .allowMultipleContexts() .kryoRegistrator(TestRegistrator.class) .build(); return new ExecutorEnvironment() { @Override public Executor getExecutor() { return executor; } @Override public void shutdown() throws Exception { executor.shutdown(); } }; } }
SparkFlowTranslator(JavaSparkContext sparkEnv, Settings flowSettings, SparkAccumulatorFactory accumulatorFactory, Map<Class<?>, Comparator<?>> comparators) { this.sparkEnv = Objects.requireNonNull(sparkEnv); this.settings = Objects.requireNonNull(flowSettings); this.accumulatorFactory = Objects.requireNonNull(accumulatorFactory); this.comparators = Objects.requireNonNull(comparators); // ~ basic operators Translation.add(translations, FlowUnfolder.InputOperator.class, new InputTranslator()); Translation.add(translations, FlatMap.class, new FlatMapTranslator()); Translation.add(translations, ReduceStateByKey.class, new ReduceStateByKeyTranslator(settings)); Translation.add(translations, Union.class, new UnionTranslator()); // ~ derived operators Translation.add(translations, ReduceByKey.class, new ReduceByKeyTranslator(), ReduceByKeyTranslator::wantTranslate); // ~ batch broadcast join for a very small left side Translation.add(translations, Join.class, new BroadcastHashJoinTranslator(), BroadcastHashJoinTranslator::wantTranslate); // ~ batch broadcast join for a very small left side Translation.add(translations, Join.class, new BatchJoinTranslator(), BatchJoinTranslator::wantTranslate); }
@Override public Iterator<SparkElement<WID, OUT>> call(SparkElement<WID, IN> elem) { final WID window = elem.getWindow(); final long timestamp = getTimestamp(elem); FunctionCollectorMem<OUT> collector = getContext(); // setup user collector collector.clear(); collector.setWindow(window); functor.apply(elem.getElement(), collector); // wrap output in WindowedElement return Iterators.transform(collector.getOutputIterator(), e -> new SparkElement<>(window, timestamp, e)); }
@Override public Executor newExecutor(Config config, Collection<? extends Class<?>> dataClasses) throws IOException { SparkConf conf = new SparkConf(); conf.set("spark.serializer", KryoSerializer.class.getName()); conf.registerKryoClasses(dataClasses.toArray(new Class[dataClasses.size()])); return SparkExecutor .newBuilder(EuphoriaSparkTrends.class.getSimpleName(), conf) .build(); } }
/** * Execute spark in local mode with a given parallelism * * @return builder */ public Builder local() { return local(DEFAULT_PARALLELISM); }
@Override public Iterator<OUT> call(IN in) throws Exception { return function.call(in, getCollector()); }
private FunctionCollectorMem<OUT> getContext() { if (cachedCollector == null) { cachedCollector = new FunctionCollectorMem<>(accumulators); } return cachedCollector; } }
SparkStorageProvider(Serializer serializer, int listStorageMaxElemsInMemory) { this.sf = new SparkSerializerFactory(serializer); this.listStorageMaxElemsInMemory = listStorageMaxElemsInMemory; }
/** * Create a new builder, the {@link SparkExecutor} can be constructed from * * @param appName the application name * @param conf to initialize builder from * @return builder */ public static Builder newBuilder(String appName, SparkConf conf) { return new Builder(appName, conf); }
@Override @SuppressWarnings("unchecked") public JavaRDD<?> translate(ReduceByKey operator, SparkExecutorContext context) { return doTranslate(operator, context); }
@Override public boolean eof() { if (next != null) { return false; } if (streamFinished) { return true; } else { tryReadNext(); return next == null && streamFinished; } }
protected long getTimestamp(SparkElement<WID, IN> elem) { return elem.getTimestamp(); }
private void keepOrScrapKeyStats(StatsItem<K> finishedStats){ if (topKeys.size() < NUMBER_OF_TOP_KEYS_TO_KEEP){ topKeys.add(finishedStats); return; } StatsItem<K> smallestKey = topKeys.peek(); if(finishedStats.elementsCount()> smallestKey.elementsCount()){ topKeys.poll(); topKeys.add(finishedStats); } }
FunctionCollectorMem<OUT> getCollector() { if (cachedCollector == null) { cachedCollector = new FunctionCollectorMem<>(accumulators); } return cachedCollector; }
SparkSpillTools(Serializer serializer, Settings settings) { super(new SparkSerializerFactory(serializer), settings); }