text.flatMap(new Tokenizer())
text.flatMap(new Tokenizer())
text.flatMap(new Tokenizer())
text.flatMap(new Tokenizer())
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = getTextDataSet(env); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .sum(1); // emit result if(fileOutput) { counts.writeAsCsv(outputPath, "\n", " "); // execute program env.execute("WordCount Example"); } else { counts.print(); } }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.fromElements(WordCountData.TEXT); DataSet<Tuple2<String, Integer>> words = text.flatMap(new WordCount.Tokenizer()); DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); result.output(new LocalCollectionOutputFormat<Tuple2<String, Integer>>(resultsCollected)); env.execute("Word Count Collection"); } }