DeltaIteration(ExecutionEnvironment context, TypeInformation<ST> type, DataSet<ST> solutionSet, DataSet<WT> workset, Keys<ST> keys, int maxIterations) { initialSolutionSet = solutionSet; initialWorkset = workset; solutionSetPlaceholder = new SolutionSetPlaceHolder<ST>(context, solutionSet.getType(), this); worksetPlaceholder = new WorksetPlaceHolder<WT>(context, workset.getType()); this.keys = keys; this.maxIterations = maxIterations; }
/** * Closes the iteration. This method defines the end of the iterative program part. * * @param iterationResult The data set that will be fed back to the next iteration. * @return The DataSet that represents the result of the iteration, after the computation has terminated. * * @see DataSet#iterate(int) */ public DataSet<T> closeWith(DataSet<T> iterationResult) { return new BulkIterationResultSet<T>(getExecutionEnvironment(), getType(), this, iterationResult); }
/** * Closes the delta iteration. This method defines the end of the delta iteration's function. * * @param solutionSetDelta The delta for the solution set. The delta will be merged into the solution set at the end of * each iteration. * @param newWorkset The new workset (feedback data set) that will be fed back to the next iteration. * @return The DataSet that represents the result of the iteration, after the computation has terminated. * * @see DataSet#iterateDelta(DataSet, int, int...) */ public DataSet<ST> closeWith(DataSet<ST> solutionSetDelta, DataSet<WT> newWorkset) { return new DeltaIterationResultSet<ST, WT>(initialSolutionSet.getExecutionEnvironment(), initialSolutionSet.getType(), initialWorkset.getType(), this, solutionSetDelta, newWorkset, keys, maxIterations); }
public static DataSet<Long> getDefaultVertexDataSet(ExecutionEnvironment env) { return env.fromElements( 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L); }
/** * Gets the type information of the data type of the input data set. * This method returns equivalent information as {@code getInput().getType()}. * * @return The input data type. */ public TypeInformation<IN> getInputType() { return this.input.getType(); }
@Override public String toString() { return "Local Environment (DOP = " + (getDegreeOfParallelism() == -1 ? "default" : getDegreeOfParallelism()) + ") : " + getIdString(); } }
protected TwoInputOperator(DataSet<IN1> input1, DataSet<IN2> input2, TypeInformation<OUT> resultType) { super(input1.getExecutionEnvironment(), resultType); DataSet.checkSameExecutionContext(input1, input2); this.input1 = input1; this.input2 = input2; }
@Override public String toString() { return "Remote Environment (" + this.host + ":" + this.port + " - DOP = " + (getDegreeOfParallelism() == -1 ? "default" : getDegreeOfParallelism()) + ") : " + getIdString(); } }
/** * Creates the program's {@link Plan}. The plan is a description of all data sources, data sinks, * and operations and how they interact, as an isolated unit that can be executed with a * {@link eu.stratosphere.api.common.PlanExecutor}. Obtaining a plan and starting it with an * executor is an alternative way to run a program and is only possible if the program consists * only of distributed operations. * * @return The program's plan. */ public JavaPlan createProgramPlan() { return createProgramPlan(null); }
/** * Creates a {@link LocalEnvironment}. The local execution environment will run the program in a * multi-threaded fashion in the same JVM as the environment was created in. It will use the * degree of parallelism specified in the parameter. * * @param degreeOfParallelism The degree of parallelism for the local environment. * @return A local execution environment with the specified degree of parallelism. */ public static LocalEnvironment createLocalEnvironment(int degreeOfParallelism) { LocalEnvironment lee = new LocalEnvironment(); lee.setDegreeOfParallelism(degreeOfParallelism); return lee; }
/** * Creates an execution environment that represents the context in which the program is currently executed. * If the program is invoked standalone, this method returns a local execution environment, as returned by * {@link #createLocalEnvironment()}. If the program is invoked from within the command line client to be * submitted to a cluster, this method returns the execution environment of this cluster. * * @return The execution environment of the context in which the program is executed. */ public static ExecutionEnvironment getExecutionEnvironment() { return contextEnvironment == null ? createLocalEnvironment() : contextEnvironment; }
public static void disableLocalExecution() { ExecutionEnvironment.disableLocalExecution(); } }
protected SingleInputOperator(DataSet<IN> input, TypeInformation<OUT> resultType) { super(input.getExecutionEnvironment(), resultType); this.input = input; }
/** * Closes the iteration and specifies a termination criterion. This method defines the end of * the iterative program part. * <p> * The termination criterion is a means of dynamically signaling the iteration to halt. It is expressed via a data * set that will trigger to halt the loop as soon as the data set is empty. A typical way of using the termination * criterion is to have a filter that filters out all elements that are considered non-converged. As soon as no more * such elements exist, the iteration finishes. * * @param iterationResult The data set that will be fed back to the next iteration. * @return The DataSet that represents the result of the iteration, after the computation has terminated. * * @see DataSet#iterate(int) */ public DataSet<T> closeWith(DataSet<T> iterationResult, DataSet<?> terminationCriterion) { return new BulkIterationResultSet<T>(getExecutionEnvironment(), getType(), this, iterationResult, terminationCriterion); }
public static DataSet<Long> getDefaultVertexDataSet(ExecutionEnvironment env) { return env.fromElements( 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L); }
/** * Gets the type information of the data type of the first input data set. * This method returns equivalent information as {@code getInput1().getType()}. * * @return The first input data type. */ public TypeInformation<IN1> getInput1Type() { return this.input1.getType(); }
/** * Creates a {@link LocalEnvironment}. The local execution environment will run the program in a * multi-threaded fashion in the same JVM as the environment was created in. The default degree of * parallelism of the local environment is the number of hardware contexts (CPU cores / threads), * unless it was specified differently by {@link #setDefaultLocalParallelism(int)}. * * @return A local execution environment. */ public static LocalEnvironment createLocalEnvironment() { return createLocalEnvironment(defaultLocalDop); }
public static void disableLocalExecution() { ExecutionEnvironment.disableLocalExecution(); } }
/** * Gets the type information of the data type of the second input data set. * This method returns equivalent information as {@code getInput2().getType()}. * * @return The second input data type. */ public TypeInformation<IN2> getInput2Type() { return this.input2.getType(); }
/** * Create an operator that produces the union of the two given data sets. * * @param input1 The first data set to be unioned. * @param input2 The second data set to be unioned. */ public UnionOperator(DataSet<T> input1, DataSet<T> input2) { super(input1, input2, input1.getType()); }