/** * Creates an execution environment that represents the context in which the program is currently executed. * If the program is invoked standalone, this method returns a local execution environment, as returned by * {@link #createLocalEnvironment()}. If the program is invoked from within the command line client to be * submitted to a cluster, this method returns the execution environment of this cluster. * * @return The execution environment of the context in which the program is executed. */ public static ExecutionEnvironment getExecutionEnvironment() { return contextEnvironmentFactory == null ? createLocalEnvironment() : contextEnvironmentFactory.createExecutionEnvironment(); }
/** * Creates a {@link LocalEnvironment}. The local execution environment will run the program in a * multi-threaded fashion in the same JVM as the environment was created in. It will use the * parallelism specified in the parameter. * * @param customConfiguration Pass a custom configuration to the LocalEnvironment. * @return A local execution environment with the specified parallelism. */ public static LocalEnvironment createLocalEnvironment(Configuration customConfiguration) { return createLocalEnvironment(customConfiguration, -1); }
/** * Syntactic sugar for aggregate (SUM, field). * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the summed DataSet. * * @see org.apache.flink.api.java.operators.AggregateOperator */ public AggregateOperator<T> sum(int field) { return aggregate(Aggregations.SUM, field); }
@Test(expected = IndexOutOfBoundsException.class) public void testGroupByKeyFields4() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should not work, key out of tuple bounds tupleDs.groupBy(5); }
public <F> F clean(F f) { if (getExecutionEnvironment().getConfig().isClosureCleanerEnabled()) { ClosureCleaner.clean(f, true); } else { ClosureCleaner.ensureSerializable(f); } return f; }
/** * Gets the type information of the data type of the input data set. * This method returns equivalent information as {@code getInput().getType()}. * * @return The input data type. */ public TypeInformation<IN> getInputType() { return this.input.getType(); }
/** * Creates a new data set that contains a sequence of numbers. The data set will be created in parallel, * so there is no guarantee about the order of the elements. * * @param from The number to start at (inclusive). * @param to The number to stop at (inclusive). * @return A DataSet, containing all number in the {@code [from, to]} interval. */ public DataSource<Long> generateSequence(long from, long to) { return fromParallelCollection(new NumberSequenceIterator(from, to), BasicTypeInfo.LONG_TYPE_INFO, Utils.getCallLocationName()); }
/** * @return a long integer representing the number of vertices */ public long numberOfVertices() throws Exception { return vertices.count(); }
@Override public DataSetAnalyticBase<T, R> run(DataSet<T> input) throws Exception { env = input.getExecutionEnvironment(); return this; }
ShutdownThread(PlanExecutor executor) { super("Local cluster reaper"); setDaemon(true); setPriority(Thread.MIN_PRIORITY); this.executor = executor; }
@Override public String toString() { return "Remote Environment (" + this.host + ":" + this.port + " - parallelism = " + (getParallelism() == -1 ? "default" : getParallelism()) + ") : " + getIdString(); }
@Override public JobExecutionResult execute(String jobName) throws Exception { JobExecutionResult result = super.execute(jobName); this.lastJobExecutionResult = result; return result; }
@Override public String toString() { return "Local Environment (parallelism = " + (getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT ? "default" : getParallelism()) + ") : " + getIdString(); }
@Override public void close() throws IOException { ChecksumHashCode update = new ChecksumHashCode(counter, checksum); getRuntimeContext().addAccumulator(id, update); } }
/** * Debugging utility to understand the hierarchy of serializers created by the Java API. * Tested in GroupReduceITCase.testGroupByGenericType() */ public static <T> String getSerializerTree(TypeInformation<T> ti) { return getSerializerTree(ti, 0); }
public void setAsContext() { StreamExecutionEnvironmentFactory factory = new StreamExecutionEnvironmentFactory() { @Override public StreamExecutionEnvironment createExecutionEnvironment() { throw new UnsupportedOperationException("Execution Environment is already defined" + " for this shell."); } }; initializeContextEnvironment(factory); }
/** * Gets the type information of the data type of the first input data set. * This method returns equivalent information as {@code getInput1().getType()}. * * @return The first input data type. */ public TypeInformation<IN1> getInput1Type() { return this.input1.getType(); }
/** * Creates a {@link LocalEnvironment}. The local execution environment will run the program in a * multi-threaded fashion in the same JVM as the environment was created in. The default * parallelism of the local environment is the number of hardware contexts (CPU cores / threads), * unless it was specified differently by {@link #setDefaultLocalParallelism(int)}. * * @return A local execution environment. */ public static LocalEnvironment createLocalEnvironment() { return createLocalEnvironment(defaultLocalDop); }
/** * Gets the type information of the data type of the second input data set. * This method returns equivalent information as {@code getInput2().getType()}. * * @return The second input data type. */ public TypeInformation<IN2> getInput2Type() { return this.input2.getType(); }
@Override public Void answer(InvocationOnMock invocation) throws Throwable { ExecutionEnvironment.createLocalEnvironment(); return null; } }).when(packagedProgramMock).invokeInteractiveModeForExecution();