eu.stratosphere.api.java.record.operators.ReduceOperator java code examples

@Override
public Plan getPlan(String... args) {
  // parse job parameters
  int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output    = (args.length > 2 ? args[2] : "");
  HadoopDataSource<LongWritable, Text> source = new HadoopDataSource<LongWritable, Text>(
      new TextInputFormat(), new JobConf(), "Input Lines");
  TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
      .input(source)
      .name("Tokenize Lines")
      .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
      .input(mapper)
      .name("Count Words")
      .build();
  HadoopDataSink<Text, IntWritable> out = new HadoopDataSink<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(),new JobConf(), "Hadoop TextOutputFormat", reducer, Text.class, IntWritable.class);
  TextOutputFormat.setOutputPath(out.getJobConf(), new Path(output));
  Plan plan = new Plan(out, "Hadoop OutputFormat Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

/**
 * The private constructor that only gets invoked from the Builder.
 * @param builder
 */
protected ReduceOperator(Builder builder) {
  super(builder.udf, OperatorInfoHelper.unary(), builder.getKeyColumnsArray(), builder.name);
  this.keyTypes = builder.getKeyClassesArray();
  
  if (builder.inputs != null && !builder.inputs.isEmpty()) {
    setInput(Operator.createUnionCascade(builder.inputs));
  }
  
  setGroupOrder(builder.secondaryOrder);
  setBroadcastVariables(builder.broadcastInputs);
  setSemanticProperties(FunctionAnnotation.readSingleConstantAnnotations(builder.udf));
}

  /**
   * Creates and returns a ReduceOperator from using the values given 
   * to the builder.
   * 
   * @return The created operator
   */
  public ReduceOperator build() {
    if (name == null) {
      name = udf.getUserCodeClass().getName();
    }
    return new ReduceOperator(this);
  }
}

@Override
protected JobGraph getJobGraph() throws Exception {
  FileDataSource input = new FileDataSource(
      new ContractITCaseInputFormat(), inPath);
  DelimitedInputFormat.configureDelimitedFormat(input)
    .recordDelimiter('\n');
  input.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1));
  ReduceOperator testReducer = ReduceOperator.builder(new TestReducer(), StringValue.class, 0)
    .build();
  testReducer.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1));
  testReducer.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,
      config.getString("ReduceTest#LocalStrategy", ""));
  testReducer.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY,
      config.getString("ReduceTest#ShipStrategy", ""));
  FileDataSink output = new FileDataSink(
      new ContractITCaseOutputFormat(), resultPath);
  output.setDegreeOfParallelism(1);
  output.setInput(testReducer);
  testReducer.setInput(input);
  Plan plan = new Plan(output);
  PactCompiler pc = new PactCompiler(new DataStatistics());
  OptimizedPlan op = pc.compile(plan);
  NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator();
  return jgg.compileJobGraph(op);
}

  ReduceOperator.builder(new GroupByReturnFlag(), StringValue.class, 0)
  .name("groupyBy")
  .build();
groupByReturnFlag.setInput(lineItemFilter);
result.setInput(groupByReturnFlag);

    ReduceOperator.builder(CountAgg.class, StringValue.class, 0)
  .name("AggregateGroupBy")
  .build();
aggregation.setDegreeOfParallelism(this.degreeOfParallelism);
join.setFirstInput(ordersFilter);
join.setSecondInput(lineFilter);
aggregation.setInput(join);
result.setInput(aggregation);

@Override
public Plan getPlan(String... args) {
  
  // parse job parameters
  int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output = (args.length > 2 ? args[2] : "");
  @SuppressWarnings("unchecked")
  CsvInputFormat format = new CsvInputFormat(' ', IntValue.class, IntValue.class);
  FileDataSource input = new FileDataSource(format, dataInput, "Input");
  
  // create the reduce contract and sets the key to the first field
  ReduceOperator sorter = ReduceOperator.builder(new IdentityReducer(), IntValue.class, 0)
    .input(input)
    .name("Reducer")
    .build();
  // sets the group sorting to the second field
  sorter.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING));
  // create and configure the output format
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, sorter, "Sorted Output");
  CsvOutputFormat.configureRecordFormat(out)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(IntValue.class, 0)
    .field(IntValue.class, 1);
  
  Plan plan = new Plan(out, "SecondarySort Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

FileDataSource input1 = new FileDataSource(format1, input1Path, "Input 1");
ReduceOperator aggInput1 = ReduceOperator.builder(DummyReduce.class, IntValue.class, 0)
  .input(input1)
  .name("AggOrders")
input2.setDegreeOfParallelism(numSubtasksInput2);
ReduceOperator aggInput2 = ReduceOperator.builder(DummyReduce.class, IntValue.class, 0)
  .input(input2)
  .name("AggLines")
  .build();
aggInput2.setDegreeOfParallelism(numSubtasksInput2);

@Override
public boolean isCombinable() {
  return super.isCombinable() || getUserCodeWrapper().getUserCodeAnnotation(Combinable.class) != null;
}

ReduceOperator buildTriads = ReduceOperator.builder(new BuildTriads(), StringValue.class, 0)
  .name("Build Triads")
  .build();
closeTriads.setSecondInput(edges);
closeTriads.setFirstInput(buildTriads);
buildTriads.setInput(edges);

ReduceOperator aggCO = ReduceOperator.builder(new AggCO(), StringValue.class, 1)
  .name("AggCo")
  .build();
aggCO.setDegreeOfParallelism(numSubtasks);
aggCO.setInput(joinCO);
joinCO.setFirstInput(orders);
joinCO.setSecondInput(customers);

@Override
protected Plan getTestJob() {
  
  int dop = this.config.getInteger("GroupOrderTest#NumSubtasks", 1);
  
  @SuppressWarnings("unchecked")
  CsvInputFormat format = new CsvInputFormat(',', IntValue.class, IntValue.class);
  FileDataSource source = new FileDataSource(format, this.textPath, "Source");
  
  ReduceOperator reducer = ReduceOperator.builder(CheckingReducer.class)
    .keyField(IntValue.class, 0)
    .input(source)
    .name("Ordered Reducer")
    .build();
  reducer.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING));
  
  FileDataSink sink = new FileDataSink(CsvOutputFormat.class, this.resultPath, reducer, "Sink");
  CsvOutputFormat.configureRecordFormat(sink)
    .recordDelimiter('\n')
    .fieldDelimiter(',')
    .field(IntValue.class, 0)
    .field(IntValue.class, 1);
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(dop);
  return p;
}

  .name("Tokenize Lines")
  .build();
ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
  .input(mapper)
  .name("Count Words")

  .build();
ReduceOperator reduce = ReduceOperator.builder(Sum.class)
  .keyField(IntValue.class, 0) 
  .keyField(StringValue.class, 1)
reduce.setInput(joinNCOL);

public static void main(String[] args) throws Exception {
  
  GenericDataSource<UserGeneratingInputFormat> source = new GenericDataSource<UserGeneratingInputFormat>(UserGeneratingInputFormat.class);
  
  MapOperator mapper = MapOperator.builder(new NumberExtractingMapper())
      .input(source).name("le mapper").build();
  
  ReduceOperator reducer = ReduceOperator.builder(new ConcatenatingReducer(), IntValue.class, 1)
      .input(mapper).name("le reducer").build();
  
  GenericDataSink sink = new GenericDataSink(PrintingOutputFormat.class, reducer);
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  LocalExecutor.execute(p);
}

  ReduceOperator.builder(AmountAggregate.class, StringIntPair.class, 0)
  .name("groupyBy")
  .build();
sumAmountAggregate.setInput(partListJoin);

  .input(edges).name("Project Edge").build();
ReduceOperator edgeCounter = ReduceOperator.builder(new CountEdges(), IntValue.class, 0)
  .input(projectEdge).name("Count Edges for Vertex").build();
ReduceOperator countJoiner = ReduceOperator.builder(new JoinCountsAndUniquify())
  .keyField(IntValue.class, 0)
  .keyField(IntValue.class, 1)

@Override
public Plan getPlan(String... args) {
  // parse job parameters
  int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output    = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
    .input(mapper)
    .name("Count Words")
    .build();
  
  @SuppressWarnings("unchecked")
  FileDataSink out = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class, IntValue.class), output, reducer, "Word Counts");
  
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

ReduceOperator findNearestClusterCenters = ReduceOperator.builder(new FindNearestCenter(), IntValue.class, 0)
    .input(computeDistance)
    .name("Find Nearest Centers")
ReduceOperator recomputeClusterCenter = ReduceOperator.builder(new RecomputeClusterCenter(), IntValue.class, 0)
    .input(findNearestClusterCenters)
    .name("Recompute Center Positions")
ReduceOperator findNearestFinalCluster = ReduceOperator.builder(new FindNearestCenter(), IntValue.class, 0)
    .input(computeFinalDistance)
    .name("Find Nearest Final Centers")

@Override
public Plan getPlan(String... args) {
  int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build();
  
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper)
      .name("Count Words").build();
  
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts");
  
  CsvOutputFormat.configureRecordFormat(out).recordDelimiter('\n')
      .fieldDelimiter(' ').field(StringValue.class, 0)
      .field(IntValue.class, 1);
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

Javadoc

ReduceOperator evaluating a ReduceFunction over each group of records that share the same key.

Most used methods

Popular in Java

Making http requests using okhttp
onCreateOptionsMenu (Activity)
getSupportFragmentManager (FragmentActivity)
setRequestProperty (URLConnection)
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
BlockingQueue (java.util.concurrent)
A java.util.Queue that additionally supports operations that wait for the queue to become non-empty
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
From CI to AI: The AI layer in your organization

How to useReduceOperator in eu.stratosphere.api.java.record.operators

Best Java code snippets using eu.stratosphere.api.java.record.operators.ReduceOperator (Showing top 20 results out of 315)

How to use
ReduceOperator
in
eu.stratosphere.api.java.record.operators