/** * Writes a DataSet to the standard output stream (stdout).<br/> * For each element of the DataSet the result of {@link Object#toString()} is written. * * @return The DataSink that writes the DataSet. */ public DataSink<T> print() { return output(new PrintingOutputFormat<T>(false)); }
/** * Writes a {@link Tuple} DataSet as a CSV file to the specified location.<br/> * <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br/> * For each Tuple field the result of {@link Object#toString()} is written. * Tuple fields are separated by the default field delimiter {@link CsvOutputFormat.DEFAULT_FIELD_DELIMITER}.<br/> * Tuples are are separated by the default line delimiter {@link CsvOutputFormat.DEFAULT_LINE_DELIMITER}. * * @param filePath The path pointing to the location the CSV file is written to. * @return The DataSink that writes the DataSet. * * @see Tuple * @see CsvOutputFormat */ public DataSink<T> writeAsCsv(String filePath) { return writeAsCsv(filePath, CsvOutputFormat.DEFAULT_LINE_DELIMITER, CsvOutputFormat.DEFAULT_FIELD_DELIMITER); }
/** * Gets the type information of the data type of the input data set. * This method returns equivalent information as {@code getInput().getType()}. * * @return The input data type. */ public TypeInformation<IN> getInputType() { return this.input.getType(); }
/** * Closes the delta iteration. This method defines the end of the delta iteration's function. * * @param solutionSetDelta The delta for the solution set. The delta will be merged into the solution set at the end of * each iteration. * @param newWorkset The new workset (feedback data set) that will be fed back to the next iteration. * @return The DataSet that represents the result of the iteration, after the computation has terminated. * * @see DataSet#iterateDelta(DataSet, int, int...) */ public DataSet<ST> closeWith(DataSet<ST> solutionSetDelta, DataSet<WT> newWorkset) { return new DeltaIterationResultSet<ST, WT>(initialSolutionSet.getExecutionEnvironment(), initialSolutionSet.getType(), initialWorkset.getType(), this, solutionSetDelta, newWorkset, keys, maxIterations); }
.map(new EdgeByIdProjector()); .groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder()) triangles.writeAsCsv(outputPath, "\n", ","); } else { triangles.print();
map(new RankAssigner((1.0d / numPages))); linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList()); IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(maxIterations); newRanks.join(iteration).where(0).equalTo(0) finalPageRanks.writeAsCsv(outputPath, "\n", " "); } else { finalPageRanks.print();
DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env).flatMap(new UndirectEdge()); DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); verticesWithInitialId.iterateDelta(verticesWithInitialId, maxIterations, 0); result.writeAsCsv(outputPath, "\n", " "); } else { result.print();
orders.filter( new FilterFunction<Tuple3<Integer,Integer, String>>() { @Override lineitems.filter(new FilterFunction<Tuple4<Integer, Double, Double, String>>() { @Override public boolean filter(Tuple4<Integer, Double, Double, String> t) ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag) .where(0).equalTo(0) .projectFirst(1).projectSecond(1,2) .map(new MapFunction<Tuple3<Integer, Double, Double>, Tuple2<Integer, Double>>() { @Override public Tuple2<Integer, Double> map(Tuple3<Integer, Double, Double> t) { .joinWithTiny(nations) .where(3).equalTo(0) .projectFirst(0,1,2).projectSecond(1).projectFirst(4) customerWithNation.join(revenueOfCustomerKey) .where(0).equalTo(0) .projectFirst(0,1,2,3,4).projectSecond(1) customerWithRevenue.writeAsCsv(outputPath);
IterativeDataSet<Centroid> loop = centroids.iterate(numIterations); .map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids") .map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids"); clusteredPoints.writeAsCsv(outputPath, "\n", " "); } else { clusteredPoints.print();
.filter(new FilterDocByKeyWords()) .project(0).types(String.class); .filter(new FilterByRank()); .filter(new FilterVisitsByDate()) .project(0).types(String.class); filterDocs.join(filterRanks) .where(0).equalTo(1) .projectSecond(0,1,2) joinDocsRanks.coGroup(filterVisits) .where(1).equalTo(0) .with(new AntiJoinVisits()); result.writeAsCsv(outputPath, "\n", "|"); } else { result.print();
orders.filter( new FilterFunction<Tuple5<Integer, String, String, String, Integer>>() { @Override ordersFilteredByYear.joinWithHuge(lineitems) .where(0).equalTo(0) .projectFirst(0,1).projectSecond(1) lineitemsOfOrders.groupBy(0,1).aggregate(Aggregations.SUM, 2); priceSums.writeAsCsv(outputPath);
cust = cust.filter( new FilterFunction<Customer>() { @Override or = or.filter( new FilterFunction<Order>() { private DateFormat format = new SimpleDateFormat("yyyy-MM-dd"); li = li.filter( new FilterFunction<Lineitem>() { private DateFormat format = new SimpleDateFormat("yyyy-MM-dd"); cust.join(or) .where(0) .equalTo(0) customerWithOrders.join(li) .where(4) .equalTo(0) .groupBy(0, 2, 3) .aggregate(Aggregations.SUM, 1); joined.writeAsCsv(outputPath, "\n", "|");
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = getTextDataSet(env); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .aggregate(Aggregations.SUM, 1); // emit result if(fileOutput) { counts.writeAsCsv(outputPath, "\n", " "); } else { counts.print(); } // execute program env.execute("WordCount Example"); }
groupBy(1).reduceGroup(new Tuple3GroupReduce()); reduceDs.writeAsCsv(resultPath); env.execute(); groupBy(4,0).reduceGroup(new Tuple5GroupReduce()); reduceDs.writeAsCsv(resultPath); env.execute(); groupBy(1).sortGroup(2,Order.ASCENDING).reduceGroup(new Tuple3SortedGroupReduce()); reduceDs.writeAsCsv(resultPath); env.execute(); groupBy(new KeySelector<Tuple3<Integer,Long,String>, Long>() { private static final long serialVersionUID = 1L; @Override reduceDs.writeAsCsv(resultPath); env.execute(); groupBy(new KeySelector<CustomType, Integer>() { private static final long serialVersionUID = 1L; @Override reduceDs.writeAsText(resultPath); env.execute();
public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> vertexIds = env.generateSequence(0, 10); DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(0L, 2L), new Tuple2<Long, Long>(2L, 4L), new Tuple2<Long, Long>(4L, 8L), new Tuple2<Long, Long>(1L, 5L), new Tuple2<Long, Long>(3L, 7L), new Tuple2<Long, Long>(3L, 9L)); DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner()); DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100)); result.print(); env.execute("Spargel Connected Components"); }
DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple2<String, String>> joinDs = ds1.join(ds2) .where(1) .equalTo(1) .with(new T3T5Join()); joinDs.writeAsCsv(resultPath); env.execute(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple2<String, String>> joinDs = ds1.join(ds2) .where(0,1) .equalTo(0,4) .with(new T3T5Join()); joinDs.writeAsCsv(resultPath); env.execute(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env); DataSet<Tuple2<Tuple3<Integer, Long, String>,Tuple5<Integer, Long, Integer, String, Long>>> joinDs = ds1.join(ds2) .where(0) .equalTo(2); joinDs.writeAsCsv(resultPath); env.execute();
TypeInformation<Tuple2<VertexKey, VertexValue>> vertexTypes = initialVertices.getType(); TypeInformation<VertexKey> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0); TypeInformation<Tuple2<VertexKey, Message>> messageTypeInfo = new TupleTypeInfo<Tuple2<VertexKey,Message>>(keyType, messageType); this.initialVertices.iterateDelta(this.initialVertices, this.maximumNumberOfIterations, zeroKeyPos); iteration.name(name); iteration.parallelism(parallelism); if (edgesWithoutValue != null) { MessagingUdfNoEdgeValues<VertexKey, VertexValue, Message> messenger = new MessagingUdfNoEdgeValues<VertexKey, VertexValue, Message>(messagingFunction, messageTypeInfo); messages = this.edgesWithoutValue.coGroup(iteration.getWorkset()).where(0).equalTo(0).with(messenger); messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(0).equalTo(0).with(messenger);
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.fromElements(WordCountData.TEXT); DataSet<Tuple2<String, Integer>> words = text.flatMap(new WordCount.Tokenizer()); DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); result.output(new LocalCollectionOutputFormat<Tuple2<String, Integer>>(resultsCollected)); env.execute("Word Count Collection"); } }
groupBy(1).reduce(new Tuple3Reduce("B-)")); reduceDs.writeAsCsv(resultPath); env.execute(); groupBy(4,0).reduce(new Tuple5Reduce()); reduceDs.writeAsCsv(resultPath); env.execute(); groupBy(new KeySelector<Tuple3<Integer,Long,String>, Long>() { private static final long serialVersionUID = 1L; @Override reduceDs.writeAsCsv(resultPath); env.execute(); groupBy(new KeySelector<CustomType, Integer>() { private static final long serialVersionUID = 1L; @Override reduceDs.writeAsText(resultPath); env.execute(); reduce(new AllAddingTuple3Reduce()); reduceDs.writeAsCsv(resultPath); env.execute();
map(new MapFunction<String, String>() { private static final long serialVersionUID = 1L; identityMapDs.writeAsText(resultPath); env.execute(); map(new MapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() { private static final long serialVersionUID = 1L; identityMapDs.writeAsCsv(resultPath); env.execute(); map(new MapFunction<CustomType, Tuple3<Integer, Long, String>>() { private static final long serialVersionUID = 1L; private final Tuple3<Integer, Long, String> out = new Tuple3<Integer, Long, String>(); typeConversionMapDs.writeAsCsv(resultPath); env.execute(); map(new MapFunction<Tuple3<Integer, Long, String>, String>() { private static final long serialVersionUID = 1L; typeConversionMapDs.writeAsText(resultPath); env.execute(); map(new MapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, String, Long>>() { private static final long serialVersionUID = 1L; private final Tuple3<Integer, String, Long> out = new Tuple3<Integer, String, Long>();