private static DataSet<String> getMismatchesData(ExecutionEnvironment env) { if (fileOutput) { return env.readTextFile(mismatchesInputPath); } else { return MusicProfilesData.getMismatches(env); } } }
private void createTextSource(ExecutionEnvironment env, PythonOperationInfo info) { sets.add(info.setID, env.readTextFile(info.path).setParallelism(info.parallelism).name("TextSource") .map(new SerializerMap<String>()).setParallelism(info.parallelism).name("TextSourcePostStep")); }
private Plan getWordCountPlan(File inFile, File outFile, int parallelism) { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.readTextFile(inFile.getAbsolutePath()) .flatMap(new Tokenizer()) .groupBy(0) .sum(1) .writeAsCsv(outFile.getAbsolutePath()); return env.createProgramPlan(); } }
if (params.has("input")) { text = env.readTextFile(params.get("input")); } else {
if (params.has("input")) { text = env.readTextFile(params.get("input")); } else {
@Test public void testBatchDistributedCache() throws Exception { String textPath = createTempFile("count.txt", DATA); ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.registerCachedFile(textPath, "cache_test"); env.readTextFile(textPath).flatMap(new WordChecker()).count(); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input = env.readTextFile(dataPath); input.flatMap(new TokenizeLine()) .groupBy(0) .reduceGroup(new CountWords()) .writeAsCsv(resultPath, "\n", " "); this.result = env.execute(); }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(textPath); DataSet<WC> counts = text .flatMap(new Tokenizer()) .groupBy("complex.someTest") .reduce(new ReduceFunction<WC>() { private static final long serialVersionUID = 1L; public WC reduce(WC value1, WC value2) { return new WC(value1.complex.someTest, value1.count + value2.count); } }); counts.writeAsText(resultPath); env.execute("WordCount with custom data types example"); }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(textPath); DataSet<WC> counts = text .flatMap(new Tokenizer()) .groupBy("word") .reduce(new ReduceFunction<WC>() { private static final long serialVersionUID = 1L; public WC reduce(WC value1, WC value2) { return new WC(value1.word, value1.count + value2.count); } }); counts.writeAsText(resultPath); env.execute("WordCount with custom data types example"); }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(textPath); DataSet<WCBase> counts = text .flatMap(new Tokenizer()) .groupBy("word") .reduce(new ReduceFunction<WCBase>() { private static final long serialVersionUID = 1L; public WCBase reduce(WCBase value1, WCBase value2) { WC wc1 = (WC) value1; WC wc2 = (WC) value2; return new WC(value1.word, wc1.secretCount + wc2.secretCount); } }) .map(new MapFunction<WCBase, WCBase>() { @Override public WCBase map(WCBase value) throws Exception { WC wc = (WC) value; wc.count = wc.secretCount; return wc; } }); counts.writeAsText(resultPath); env.execute("WordCount with custom data types example"); }
env.setParallelism(100); DataSet<String> input1 = env.readTextFile(IN_FILE).name("source1"); DataSet<String> input2 = env.readTextFile(IN_FILE).name("source2"); DataSet<String> input3 = env.readTextFile(IN_FILE).name("source3");
@Test public void testBCVariableClosure() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input = env.readTextFile(IN_FILE).name("source1"); DataSet<String> reduced = input .map(new IdentityMapper<String>()) .reduceGroup(new Top1GroupReducer<String>()); DataSet<String> initialSolution = input.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc"); IterativeDataSet<String> iteration = initialSolution.iterate(100); iteration.closeWith(iteration.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "red")) .output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); try{ compileNoStats(plan); }catch(Exception e){ e.printStackTrace(); Assert.fail(e.getMessage()); } }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.readTextFile(textPath); DataSet<WCBase> counts = text .flatMap(new Tokenizer()) .groupBy("word") .reduce(new ReduceFunction<WCBase>() { private static final long serialVersionUID = 1L; public WCBase reduce(WCBase value1, WCBase value2) { WC wc1 = (WC) value1; WC wc2 = (WC) value2; int c = wc1.secretCount.getCount() + wc2.secretCount.getCount(); wc1.secretCount.setCount(c); return wc1; } }) .map(new MapFunction<WCBase, WCBase>() { @Override public WCBase map(WCBase value) throws Exception { WC wc = (WC) value; wc.count = wc.secretCount.getCount(); return wc; } }); counts.writeAsText(resultPath); env.execute("WordCount with custom data types example"); }
@Test public void testMultipleIterationsWithClosueBCVars() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(100); DataSet<String> input = env.readTextFile(IN_FILE).name("source1"); IterativeDataSet<String> iteration1 = input.iterate(100); IterativeDataSet<String> iteration2 = input.iterate(20); IterativeDataSet<String> iteration3 = input.iterate(17); iteration1.closeWith(iteration1.map(new IdentityMapper<String>())) .output(new DiscardingOutputFormat<String>()); iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>())) .output(new DiscardingOutputFormat<String>()); iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>())) .output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); try{ compileNoStats(plan); }catch(Exception e){ e.printStackTrace(); Assert.fail(e.getMessage()); } }
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> dataSet = env.readTextFile("/never/accessed"); dataSet .map(new MapFunction<String, Integer>() {
env.getConfig().setExecutionMode(execMode); DataSet<String> dataSet = env.readTextFile("/never/accessed"); dataSet .map(new MapFunction<String, Integer>() {
env.setParallelism(100); DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
DataSet<String> source = env.readTextFile(IN_FILE); DataSet<Tuple2<String, Integer>> lastUnion = source.flatMap(new DummyFlatMap());
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Integer> data = env.readTextFile("/never/accessed") .map(new MapFunction<String, Integer>() { @Override
DataSet<String> lines = env.readTextFile(IN_FILE).name("Input Lines");