eu.stratosphere.api.java.DataSet java code examples

/**
 * Writes a DataSet to the standard output stream (stdout).<br/>
 * For each element of the DataSet the result of {@link Object#toString()} is written.
 * 
 *  @return The DataSink that writes the DataSet.
 */
public DataSink<T> print() {
  return output(new PrintingOutputFormat<T>(false));
}

/**
 * Writes a {@link Tuple} DataSet as a CSV file to the specified location.<br/>
 * <b>Note: Only a Tuple DataSet can written as a CSV file.</b><br/>
 * For each Tuple field the result of {@link Object#toString()} is written.
 * Tuple fields are separated by the default field delimiter {@link CsvOutputFormat.DEFAULT_FIELD_DELIMITER}.<br/>
 * Tuples are are separated by the default line delimiter {@link CsvOutputFormat.DEFAULT_LINE_DELIMITER}.
 * 
 * @param filePath The path pointing to the location the CSV file is written to.
 * @return The DataSink that writes the DataSet.
 * 
 * @see Tuple
 * @see CsvOutputFormat
 */
public DataSink<T> writeAsCsv(String filePath) {
  return writeAsCsv(filePath, CsvOutputFormat.DEFAULT_LINE_DELIMITER, CsvOutputFormat.DEFAULT_FIELD_DELIMITER);
}

/**
 * Gets the type information of the data type of the input data set.
 * This method returns equivalent information as {@code getInput().getType()}.
 * 
 * @return The input data type.
 */
public TypeInformation<IN> getInputType() {
  return this.input.getType();
}

/**
 * Closes the delta iteration. This method defines the end of the delta iteration's function.
 * 
 * @param solutionSetDelta The delta for the solution set. The delta will be merged into the solution set at the end of
 *                         each iteration.
 * @param newWorkset The new workset (feedback data set) that will be fed back to the next iteration.
 * @return The DataSet that represents the result of the iteration, after the computation has terminated.
 * 
 * @see DataSet#iterateDelta(DataSet, int, int...)
 */
public DataSet<ST> closeWith(DataSet<ST> solutionSetDelta, DataSet<WT> newWorkset) {
  return new DeltaIterationResultSet<ST, WT>(initialSolutionSet.getExecutionEnvironment(),
      initialSolutionSet.getType(), initialWorkset.getType(), this, solutionSetDelta, newWorkset, keys, maxIterations);
}

    .map(new EdgeByIdProjector());
    .groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder())
  triangles.writeAsCsv(outputPath, "\n", ",");
} else {
  triangles.print();

    map(new RankAssigner((1.0d / numPages)));
    linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList());
IterativeDataSet<Tuple2<Long, Double>> iteration = pagesWithRanks.iterate(maxIterations);
    newRanks.join(iteration).where(0).equalTo(0)
  finalPageRanks.writeAsCsv(outputPath, "\n", " ");
} else {
  finalPageRanks.print();

DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env).flatMap(new UndirectEdge());
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
    verticesWithInitialId.iterateDelta(verticesWithInitialId, maxIterations, 0);
  result.writeAsCsv(outputPath, "\n", " ");
} else {
  result.print();

    orders.filter(
            new FilterFunction<Tuple3<Integer,Integer, String>>() {
              @Override
    lineitems.filter(new FilterFunction<Tuple4<Integer, Double, Double, String>>() {
                @Override
                public boolean filter(Tuple4<Integer, Double, Double, String> t)
    ordersFilteredByYear.joinWithHuge(lineitemsFilteredByFlag)
              .where(0).equalTo(0)
              .projectFirst(1).projectSecond(1,2)
    .map(new MapFunction<Tuple3<Integer, Double, Double>, Tuple2<Integer, Double>>() {
          @Override
          public Tuple2<Integer, Double> map(Tuple3<Integer, Double, Double> t) {
        .joinWithTiny(nations)
        .where(3).equalTo(0)
        .projectFirst(0,1,2).projectSecond(1).projectFirst(4)
    customerWithNation.join(revenueOfCustomerKey)
    .where(0).equalTo(0)
    .projectFirst(0,1,2,3,4).projectSecond(1)
customerWithRevenue.writeAsCsv(outputPath);

IterativeDataSet<Centroid> loop = centroids.iterate(numIterations);
  .map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids")
    .map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");
  clusteredPoints.writeAsCsv(outputPath, "\n", " ");
} else {
  clusteredPoints.print();

    .filter(new FilterDocByKeyWords())
    .project(0).types(String.class);
    .filter(new FilterByRank());
    .filter(new FilterVisitsByDate())
    .project(0).types(String.class);
    filterDocs.join(filterRanks)
          .where(0).equalTo(1)
          .projectSecond(0,1,2)
    joinDocsRanks.coGroup(filterVisits)
            .where(1).equalTo(0)
            .with(new AntiJoinVisits());
  result.writeAsCsv(outputPath, "\n", "|");
} else {
  result.print();

    orders.filter(
            new FilterFunction<Tuple5<Integer, String, String, String, Integer>>() {
              @Override
    ordersFilteredByYear.joinWithHuge(lineitems)
              .where(0).equalTo(0)
              .projectFirst(0,1).projectSecond(1)
    lineitemsOfOrders.groupBy(0,1).aggregate(Aggregations.SUM, 2);
priceSums.writeAsCsv(outputPath);

cust = cust.filter(
          new FilterFunction<Customer>() {
            @Override
or = or.filter(
        new FilterFunction<Order>() {
          private DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
li = li.filter(
        new FilterFunction<Lineitem>() {
          private DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
    cust.join(or)
      .where(0)
      .equalTo(0)
    customerWithOrders.join(li)
              .where(4)
              .equalTo(0)
    .groupBy(0, 2, 3)
    .aggregate(Aggregations.SUM, 1);
joined.writeAsCsv(outputPath, "\n", "|");

public static void main(String[] args) throws Exception {
  
  if(!parseParameters(args)) {
    return;
  }
  
  // set up the execution environment
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  
  // get input data
  DataSet<String> text = getTextDataSet(env);
  
  DataSet<Tuple2<String, Integer>> counts = 
      // split up the lines in pairs (2-tuples) containing: (word,1)
      text.flatMap(new Tokenizer())
      // group by the tuple field "0" and sum up tuple field "1"
      .groupBy(0)
      .aggregate(Aggregations.SUM, 1);
  // emit result
  if(fileOutput) {
    counts.writeAsCsv(outputPath, "\n", " ");
  } else {
    counts.print();
  }
  
  // execute program
  env.execute("WordCount Example");
}

    groupBy(1).reduceGroup(new Tuple3GroupReduce());
reduceDs.writeAsCsv(resultPath);
env.execute();
    groupBy(4,0).reduceGroup(new Tuple5GroupReduce());
reduceDs.writeAsCsv(resultPath);
env.execute();
    groupBy(1).sortGroup(2,Order.ASCENDING).reduceGroup(new Tuple3SortedGroupReduce());
reduceDs.writeAsCsv(resultPath);
env.execute();
    groupBy(new KeySelector<Tuple3<Integer,Long,String>, Long>() {
          private static final long serialVersionUID = 1L;
          @Override
reduceDs.writeAsCsv(resultPath);
env.execute();
    groupBy(new KeySelector<CustomType, Integer>() {
          private static final long serialVersionUID = 1L;
          @Override
reduceDs.writeAsText(resultPath);
env.execute();

public static void main(String[] args) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  
  DataSet<Long> vertexIds = env.generateSequence(0, 10);
  DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(0L, 2L), new Tuple2<Long, Long>(2L, 4L), new Tuple2<Long, Long>(4L, 8L),
                            new Tuple2<Long, Long>(1L, 5L), new Tuple2<Long, Long>(3L, 7L), new Tuple2<Long, Long>(3L, 9L));
  
  DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
  
  DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
  
  result.print();
  env.execute("Spargel Connected Components");
}

DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
DataSet<Tuple2<String, String>> joinDs = 
    ds1.join(ds2)
    .where(1)
    .equalTo(1)
    .with(new T3T5Join());
joinDs.writeAsCsv(resultPath);
env.execute();
DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
DataSet<Tuple2<String, String>> joinDs = 
    ds1.join(ds2)
      .where(0,1)
      .equalTo(0,4)
      .with(new T3T5Join());
joinDs.writeAsCsv(resultPath);
env.execute();
DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
DataSet<Tuple2<Tuple3<Integer, Long, String>,Tuple5<Integer, Long, Integer, String, Long>>> joinDs = 
    ds1.join(ds2)
      .where(0)
      .equalTo(2);
joinDs.writeAsCsv(resultPath);
env.execute();

TypeInformation<Tuple2<VertexKey, VertexValue>> vertexTypes = initialVertices.getType();
TypeInformation<VertexKey> keyType = ((TupleTypeInfo<?>) initialVertices.getType()).getTypeAt(0);
TypeInformation<Tuple2<VertexKey, Message>> messageTypeInfo = new TupleTypeInfo<Tuple2<VertexKey,Message>>(keyType, messageType);		
  this.initialVertices.iterateDelta(this.initialVertices, this.maximumNumberOfIterations, zeroKeyPos);
iteration.name(name);
iteration.parallelism(parallelism);
if (edgesWithoutValue != null) {
  MessagingUdfNoEdgeValues<VertexKey, VertexValue, Message> messenger = new MessagingUdfNoEdgeValues<VertexKey, VertexValue, Message>(messagingFunction, messageTypeInfo);
  messages = this.edgesWithoutValue.coGroup(iteration.getWorkset()).where(0).equalTo(0).with(messenger);
  messages = this.edgesWithValue.coGroup(iteration.getWorkset()).where(0).equalTo(0).with(messenger);

  @Override
  protected void testProgram() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<String> text = env.fromElements(WordCountData.TEXT);
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new WordCount.Tokenizer());
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

    result.output(new LocalCollectionOutputFormat<Tuple2<String, Integer>>(resultsCollected));
    env.execute("Word Count Collection");
  }
}

    groupBy(1).reduce(new Tuple3Reduce("B-)"));
reduceDs.writeAsCsv(resultPath);
env.execute();
    groupBy(4,0).reduce(new Tuple5Reduce());
reduceDs.writeAsCsv(resultPath);
env.execute();
    groupBy(new KeySelector<Tuple3<Integer,Long,String>, Long>() {
          private static final long serialVersionUID = 1L;
          @Override
reduceDs.writeAsCsv(resultPath);
env.execute();
    groupBy(new KeySelector<CustomType, Integer>() {
          private static final long serialVersionUID = 1L;
          @Override
reduceDs.writeAsText(resultPath);
env.execute();
    reduce(new AllAddingTuple3Reduce());
reduceDs.writeAsCsv(resultPath);
env.execute();

    map(new MapFunction<String, String>() {
      private static final long serialVersionUID = 1L;
identityMapDs.writeAsText(resultPath);
env.execute();
    map(new MapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>() {
      private static final long serialVersionUID = 1L;
identityMapDs.writeAsCsv(resultPath);
env.execute();
    map(new MapFunction<CustomType, Tuple3<Integer, Long, String>>() {
      private static final long serialVersionUID = 1L;
      private final Tuple3<Integer, Long, String> out = new Tuple3<Integer, Long, String>();
typeConversionMapDs.writeAsCsv(resultPath);
env.execute();
    map(new MapFunction<Tuple3<Integer, Long, String>, String>() {
      private static final long serialVersionUID = 1L;
typeConversionMapDs.writeAsText(resultPath);
env.execute();
    map(new MapFunction<Tuple3<Integer, Long, String>, Tuple3<Integer, String, Long>>() {
      private static final long serialVersionUID = 1L;
      private final Tuple3<Integer, String, Long> out = new Tuple3<Integer, String, Long>();

Javadoc

A DataSet represents a collection of elements of the same type.
A DataSet can be transformed into another DataSet by applying a transformation as for example

DataSet#map(MapFunction),
DataSet#reduce(ReduceFunction),
DataSet#join(DataSet), or
DataSet#coGroup(DataSet).

Most used methods

coGroup
iterateDelta
map
output
Emits a DataSet using an OutputFormat. This method adds a data sink to the program. Programs may hav
print
writeAsCsv
filter
flatMap
getType
Returns the TypeInformation for the type of this DataSet.
groupBy
iterate
join

Popular in Java

Running tasks concurrently on multiple threads
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
getExternalFilesDir (Context)
requestLocationUpdates (LocationManager)
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
Best IntelliJ plugins

How to useDataSet in eu.stratosphere.api.java

Best Java code snippets using eu.stratosphere.api.java.DataSet (Showing top 20 results out of 315)

How to use
DataSet
in
eu.stratosphere.api.java