Refine search
@Test(expected = InvalidProgramException.class) public void testJoinKeyInvalidAtomic1() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> ds1 = env.fromElements(0, 0, 0); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); ds1.join(ds2).where("*", "invalidKey"); }
@Test public void testFaultyMergeAccumulator() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); // Test Exception forwarding with faulty Accumulator implementation env.generateSequence(0, 10000) .map(new FaultyMergeAccumulatorUsingMapper()) .output(new DiscardingOutputFormat<>()); assertAccumulatorsShouldFail(env.execute()); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8); IterativeDataSet<Integer> iteration = data.iterate(10); DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc"); final List<Integer> resultList = new ArrayList<Integer>(); iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList)); env.execute(); Assert.assertEquals(8, resultList.get(0).intValue()); }
@SuppressWarnings("unchecked") private static DataSet<StringTriple> getDataSet(ExecutionEnvironment env, ParameterTool params) { if (params.has("input")) { return env.readCsvFile(params.get("input")) .fieldDelimiter(";") .pojoType(StringTriple.class); } else { System.out.println("Executing EmptyFieldsCountAccumulator example with default input data set."); System.out.println("Use --input to specify file input."); return env.fromCollection(getExampleInputTuples()); } }
private void receiveParameters(ExecutionEnvironment env) throws IOException { for (int x = 0; x < Parameters.values().length; x++) { Tuple value = (Tuple) streamer.getRecord(true); switch (Parameters.valueOf(((String) value.getField(0)).toUpperCase())) { case DOP: Integer dop = value.<Integer>getField(1); env.setParallelism(dop); break; case RETRY: int retry = value.<Integer>getField(1); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(retry, 10000L)); break; case ID: currentEnvironmentID = value.<Integer>getField(1); break; } } if (env.getParallelism() < 0) { env.setParallelism(1); } }
/** * Ensure that the user can pass a custom configuration object to the LocalEnvironment. */ @Test public void testLocalEnvironmentWithConfig() throws Exception { Configuration conf = new Configuration(); conf.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, PARALLELISM); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.setParallelism(ExecutionConfig.PARALLELISM_AUTO_MAX); env.getConfig().disableSysoutLogging(); DataSet<Integer> result = env.createInput(new ParallelismDependentInputFormat()) .rebalance() .mapPartition(new RichMapPartitionFunction<Integer, Integer>() { @Override public void mapPartition(Iterable<Integer> values, Collector<Integer> out) throws Exception { out.collect(getRuntimeContext().getIndexOfThisSubtask()); } }); List<Integer> resultCollection = result.collect(); assertEquals(PARALLELISM, resultCollection.size()); }
Configuration config = new Configuration(); config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER"); config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zkQuorum); config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, zookeeperStoragePath.getAbsolutePath()); ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment( "leader", 1, config); env.setParallelism(PARALLELISM); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L)); env.getConfig().setExecutionMode(executionMode); env.getConfig().disableSysoutLogging(); final DataSet<Long> result = env.generateSequence(1, numElements) env.execute();
/** * Ensure that the program parallelism can be set even if the configuration is supplied. */ @Test public void testUserSpecificParallelism() throws Exception { Configuration config = new Configuration(); config.setString(AkkaOptions.STARTUP_TIMEOUT, VALID_STARTUP_TIMEOUT); final URI restAddress = MINI_CLUSTER_RESOURCE.getMiniCluster().getRestAddress(); final String hostname = restAddress.getHost(); final int port = restAddress.getPort(); final ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment( hostname, port, config ); env.setParallelism(USER_DOP); env.getConfig().disableSysoutLogging(); DataSet<Integer> result = env.createInput(new ParallelismDependentInputFormat()) .rebalance() .mapPartition(new RichMapPartitionFunction<Integer, Integer>() { @Override public void mapPartition(Iterable<Integer> values, Collector<Integer> out) throws Exception { out.collect(getRuntimeContext().getIndexOfThisSubtask()); } }); List<Integer> resultCollection = result.collect(); assertEquals(USER_DOP, resultCollection.size()); }
@Override protected void testProgram() throws Exception { /* * Test passing a configuration object to an input format */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); Configuration ifConf = new Configuration(); ifConf.setString("prepend", "test"); DataSet<String> ds = env.createInput(new TestInputFormat(new Path(inputPath))).withParameters(ifConf); List<String> result = ds.collect(); String expectedResult = "ab\n" + "cd\n" + "ef\n"; compareResultAsText(result, expectedResult); }
@Test public void testIdentityMapWithMissingTypesAndStringTypeHint() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> identityMapDs = ds .map(new Mapper<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>()) .returns(new TypeHint<Tuple3<Integer, Long, String>>(){}); List<Tuple3<Integer, Long, String>> result = identityMapDs.collect(); String expectedResult = "(2,2,Hello)\n" + "(3,2,Hello world)\n" + "(1,1,Hi)\n"; compareResultAsText(result, expectedResult); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input = env.readTextFile(dataPath); input.flatMap(new TokenizeLine()) .groupBy(0) .reduceGroup(new CountWords()) .writeAsCsv(resultPath, "\n", " "); this.result = env.execute(); }
public static void main(String[] args) throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.fromElements(1, 2).output(new DiscardingOutputFormat<Integer>()); env.execute().getNetRuntime(); } }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); TestNonRichOutputFormat output = new TestNonRichOutputFormat(); env.createInput(new TestNonRichInputFormat()).output(output); try { env.execute(); } catch (Exception e){ // we didn't break anything by making everything rich. e.printStackTrace(); fail(e.getMessage()); } } }
public void run() throws Exception { LOG.info("Random seed = {}", RANDOM_SEED); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); for (int parallelism = MAX_PARALLELISM; parallelism > 0; parallelism--) { LOG.info("Parallelism = {}", parallelism); env.setParallelism(parallelism); testReduce(env); testGroupedReduce(env); testJoin(env); testCross(env); } }
/** * Tests compiler fail for join program with replicated data source and changing parallelism. */ @Test(expected = CompilerException.class) public void checkJoinWithReplicatedSourceInputChangingparallelism() { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class); ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo)); DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO)); DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class); DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1 .join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2) .writeAsText("/some/newpath"); Plan plan = env.createProgramPlan(); // submit the plan to the compiler OptimizedPlan oPlan = compileNoStats(plan); }
public void executeTask(MapFunction<Integer, Integer> mapper) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env .createInput(new InfiniteIntegerInputFormat(false)) .map(mapper) .output(new DiscardingOutputFormat<Integer>()); env.setParallelism(PARALLELISM); runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000); }
@Test public void noPreviousPartitioningJoin2() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> joined = set1 .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST) .where(0,1).equalTo(2,1).with(new MockJoin()); joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource(); checkValidJoinInputProperties(join); }
public FlinkPlanner(List<String> classPath) { super(); this.classPath = classPath; env.getConfig().disableSysoutLogging(); if (env.getParallelism() <= 0) { // load the default parallelism from config GlobalConfiguration.loadConfiguration(new File(CliFrontend.getConfigurationDirectoryFromEnv()).getAbsolutePath()); org.apache.flink.configuration.Configuration configuration = GlobalConfiguration.getConfiguration(); int parallelism = configuration.getInteger(ConfigConstants.DEFAULT_PARALLELISM_KEY, -1); if (parallelism <= 0) { throw new RuntimeException("Please set the default parallelism via the -p command-line flag"); } else { env.setParallelism(parallelism); } } }
@Test(expected = InvalidProgramException.class) public void testArrayOrderFull() { List<Object[]> arrayData = new ArrayList<>(); arrayData.add(new Object[0]); final ExecutionEnvironment env = ExecutionEnvironment .getExecutionEnvironment(); DataSet<Object[]> pojoDs = env .fromCollection(arrayData); // must not work pojoDs.writeAsText("/tmp/willNotHappen") .sortLocalOutput("*", Order.ASCENDING); }
jobName = getDefaultName(); Plan plan = translator.translateToPlan(this.sinks, jobName); if (getParallelism() > 0) { plan.setDefaultParallelism(getParallelism()); plan.setExecutionConfig(getConfig()); if (!config.isAutoTypeRegistrationDisabled()) { plan.accept(new Visitor<org.apache.flink.api.common.operators.Operator<?>>() { registerCachedFilesWithPlan(plan); } catch (Exception e) { throw new RuntimeException("Error while registering cached files: " + e.getMessage(), e); int registeredTypes = config.getRegisteredKryoTypes().size() + config.getRegisteredPojoTypes().size() + config.getRegisteredTypesWithKryoSerializerClasses().size() + config.getRegisteredTypesWithKryoSerializers().size();