@Override public DataSetAnalyticBase<T, R> run(DataSet<T> input) throws Exception { env = input.getExecutionEnvironment(); return this; }
public NoOpOperator(DataSet<IN> input, TypeInformation<IN> resultType) { super(input.getExecutionEnvironment(), resultType); this.input = input; }
protected SingleInputOperator(DataSet<IN> input, TypeInformation<OUT> resultType) { super(input.getExecutionEnvironment(), resultType); this.input = input; }
protected static void checkSameExecutionContext(DataSet<?> set1, DataSet<?> set2) { if (set1.getExecutionEnvironment() != set2.getExecutionEnvironment()) { throw new IllegalArgumentException("The two inputs have different execution contexts."); } }
@Override public void write(String executionName, PrintStream out, DataSet<T> data) throws Exception { ChecksumHashCode<T> checksumHashCode = new ChecksumHashCode<T>().run(data); if (printExecutionPlan.getValue()) { out.println(); out.println(data.getExecutionEnvironment().getExecutionPlan()); } ChecksumHashCode.Checksum checksum = checksumHashCode .execute(executionName); out.println(); out.println(checksum); } }
protected TwoInputOperator(DataSet<IN1> input1, DataSet<IN2> input2, TypeInformation<OUT> resultType) { super(Preconditions.checkNotNull(input1, "input1 is null").getExecutionEnvironment(), resultType); Preconditions.checkNotNull(input2, "input2 is null"); DataSet.checkSameExecutionContext(input1, input2); this.input1 = input1; this.input2 = input2; }
@Override public void write(String executionName, PrintStream out, DataSet<T> data) throws Exception { Collect<T> collector = new Collect<T>().run(data); if (printExecutionPlan.getValue()) { out.println(); out.println(data.getExecutionEnvironment().getExecutionPlan()); } List<T> results = collector.execute(executionName); if (results.size() == 0) { return; } out.println(); if (results.get(0) instanceof PrintableResult) { for (Object result : results) { out.println(((PrintableResult) result).toPrintableString()); } } else { for (Object result : results) { out.println(result); } } } }
/** * Closes the delta iteration. This method defines the end of the delta iteration's function. * * @param solutionSetDelta The delta for the solution set. The delta will be merged into the solution set at the end of * each iteration. * @param newWorkset The new workset (feedback data set) that will be fed back to the next iteration. * @return The DataSet that represents the result of the iteration, after the computation has terminated. * * @see DataSet#iterateDelta(DataSet, int, int...) */ public DataSet<ST> closeWith(DataSet<ST> solutionSetDelta, DataSet<WT> newWorkset) { return new DeltaIterationResultSet<ST, WT>(initialSolutionSet.getExecutionEnvironment(), initialSolutionSet.getType(), initialWorkset.getType(), this, solutionSetDelta, newWorkset, keys, maxIterations); }
return new IterativeDataSet<>(getExecutionEnvironment(), getType(), this, maxIterations);
public <F> F clean(F f) { if (getExecutionEnvironment().getConfig().isClosureCleanerEnabled()) { ClosureCleaner.clean(f, true); } else { ClosureCleaner.ensureSerializable(f); } return f; }
protected ProjectCross(DataSet<I1> input1, DataSet<I2> input2, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType, CrossProjection<I1, I2> crossProjection, CrossHint hint) { super(input1, input2, new ProjectCrossFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer(input1.getExecutionEnvironment().getConfig()).createInstance()), returnType, hint, "unknown"); this.crossProjection = crossProjection; }
protected ProjectCross(DataSet<I1> input1, DataSet<I2> input2, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType, CrossHint hint) { super(input1, input2, new ProjectCrossFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer(input1.getExecutionEnvironment().getConfig()).createInstance()), returnType, hint, "unknown"); crossProjection = null; }
protected ProjectJoin(DataSet<I1> input1, DataSet<I2> input2, Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType, JoinProjection<I1, I2> joinProj) { super(input1, input2, keys1, keys2, new ProjectFlatJoinFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer(input1.getExecutionEnvironment().getConfig()).createInstance()), returnType, hint, Utils.getCallLocationName(4)); this.joinProj = joinProj; }
protected ProjectJoin(DataSet<I1> input1, DataSet<I2> input2, Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, int[] fields, boolean[] isFromFirst, TupleTypeInfo<OUT> returnType) { super(input1, input2, keys1, keys2, new ProjectFlatJoinFunction<I1, I2, OUT>(fields, isFromFirst, returnType.createSerializer(input1.getExecutionEnvironment().getConfig()).createInstance()), returnType, hint, Utils.getCallLocationName(4)); // We need to use the 4th element in the stack because the call comes through .types(). joinProj = null; }
/** * Convenience method to get the elements of a DataSet as a List. * As DataSet can contain a lot of data, this method should be used with caution. * * @return A List containing the elements of the DataSet */ public List<T> collect() throws Exception { final String id = new AbstractID().toString(); final TypeSerializer<T> serializer = getType().createSerializer(getExecutionEnvironment().getConfig()); this.output(new Utils.CollectHelper<>(id, serializer)).name("collect()"); JobExecutionResult res = getExecutionEnvironment().execute(); ArrayList<byte[]> accResult = res.getAccumulatorResult(id); if (accResult != null) { try { return SerializedListAccumulator.deserializeList(accResult, serializer); } catch (ClassNotFoundException e) { throw new RuntimeException("Cannot find type class of collected data type.", e); } catch (IOException e) { throw new RuntimeException("Serialization error while deserializing collected data", e); } } else { throw new RuntimeException("The call to collect() could not retrieve the DataSet."); } }
/** * Convenience method to get the count (number of elements) of a DataSet * as well as the checksum (sum over element hashes). * * @return A ChecksumHashCode that represents the count and checksum of elements in the data set. * @deprecated replaced with {@code org.apache.flink.graph.asm.dataset.ChecksumHashCode} in Gelly */ @Deprecated public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> input) throws Exception { final String id = new AbstractID().toString(); input.output(new Utils.ChecksumHashCodeHelper<T>(id)).name("ChecksumHashCode"); JobExecutionResult res = input.getExecutionEnvironment().execute(); return res.<Utils.ChecksumHashCode> getAccumulatorResult(id); }
/** * Convenience method to get the count (number of elements) of a DataSet. * * @return A long integer that represents the number of elements in the data set. */ public long count() throws Exception { final String id = new AbstractID().toString(); output(new Utils.CountHelper<T>(id)).name("count()"); JobExecutionResult res = getExecutionEnvironment().execute(); return res.<Long> getAccumulatorResult(id); }
@Override public void write(String executionName, PrintStream out, DataSet<T> data) throws Exception { if (Tuple.class.isAssignableFrom(data.getType().getTypeClass())) { data .writeAsCsv(filename.getValue(), lineDelimiter.getValue(), fieldDelimiter.getValue()) .name("CSV: " + filename.getValue()); } else { // line and field delimiters are ineffective when writing custom POJOs result types data .writeAsText(filename.getValue()) .name("CSV: " + filename.getValue()); } data.getExecutionEnvironment().execute(); } }
return new DeltaIteration<>(getExecutionEnvironment(), getType(), this, workset, keys, maxIterations);
Graph.fromDataSet(initialVertices, edgesWithValue, initialVertices.getExecutionEnvironment());