eu.stratosphere.api.java.record.operators.ReduceOperator$Builder.build java code examples

@Override
public Plan getPlan(String... args) {
  // parse job parameters
  int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output    = (args.length > 2 ? args[2] : "");
  HadoopDataSource<LongWritable, Text> source = new HadoopDataSource<LongWritable, Text>(
      new TextInputFormat(), new JobConf(), "Input Lines");
  TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput));
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
      .input(source)
      .name("Tokenize Lines")
      .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
      .input(mapper)
      .name("Count Words")
      .build();
  HadoopDataSink<Text, IntWritable> out = new HadoopDataSink<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(),new JobConf(), "Hadoop TextOutputFormat", reducer, Text.class, IntWritable.class);
  TextOutputFormat.setOutputPath(out.getJobConf(), new Path(output));
  Plan plan = new Plan(out, "Hadoop OutputFormat Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

  .input(mapper)
  .name("Count Words")
  .build();
FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts");
CsvOutputFormat.configureRecordFormat(out)

.input(projectEdge).name("Count Edges for Vertex").build();
.input(edgeCounter)
.name("Join Counts")
.build();

.input(findNearestClusterCenters)
.name("Recompute Center Positions")
.build();

.build();

.input(findNearestClusterCenters)
.name("Recompute Center Positions")
.build();

ReduceOperator.builder(new GroupByReturnFlag(), StringValue.class, 0)
.name("groupyBy")
.build();

.input(projectEdge).name("Count Edges for Vertex").build();
.input(edgeCounter).name("Join Counts").build();
.input(toLowerDegreeEdge).name("Build Triads").build();

@Override
public Plan getPlan(String... args) {
  
  // parse job parameters
  int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output = (args.length > 2 ? args[2] : "");
  @SuppressWarnings("unchecked")
  CsvInputFormat format = new CsvInputFormat(' ', IntValue.class, IntValue.class);
  FileDataSource input = new FileDataSource(format, dataInput, "Input");
  
  // create the reduce contract and sets the key to the first field
  ReduceOperator sorter = ReduceOperator.builder(new IdentityReducer(), IntValue.class, 0)
    .input(input)
    .name("Reducer")
    .build();
  // sets the group sorting to the second field
  sorter.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING));
  // create and configure the output format
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, sorter, "Sorted Output");
  CsvOutputFormat.configureRecordFormat(out)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(IntValue.class, 0)
    .field(IntValue.class, 1);
  
  Plan plan = new Plan(out, "SecondarySort Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

.input(iteration.getPartialSolution())
.name("Compute sum (Reduce)")
.build();

@Override
protected JobGraph getJobGraph() throws Exception {
  FileDataSource input = new FileDataSource(
      new ContractITCaseInputFormat(), inPath);
  DelimitedInputFormat.configureDelimitedFormat(input)
    .recordDelimiter('\n');
  input.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1));
  ReduceOperator testReducer = ReduceOperator.builder(new TestReducer(), StringValue.class, 0)
    .build();
  testReducer.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1));
  testReducer.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY,
      config.getString("ReduceTest#LocalStrategy", ""));
  testReducer.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY,
      config.getString("ReduceTest#ShipStrategy", ""));
  FileDataSink output = new FileDataSink(
      new ContractITCaseOutputFormat(), resultPath);
  output.setDegreeOfParallelism(1);
  output.setInput(testReducer);
  testReducer.setInput(input);
  Plan plan = new Plan(output);
  PactCompiler pc = new PactCompiler(new DataStatistics());
  OptimizedPlan op = pc.compile(plan);
  NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator();
  return jgg.compileJobGraph(op);
}

.input(iteration.getPartialSolution())
.name("Compute sum (Reduce)")
.build();

@Override
protected Plan getTestJob() {
  
  int dop = this.config.getInteger("GroupOrderTest#NumSubtasks", 1);
  
  @SuppressWarnings("unchecked")
  CsvInputFormat format = new CsvInputFormat(',', IntValue.class, IntValue.class);
  FileDataSource source = new FileDataSource(format, this.textPath, "Source");
  
  ReduceOperator reducer = ReduceOperator.builder(CheckingReducer.class)
    .keyField(IntValue.class, 0)
    .input(source)
    .name("Ordered Reducer")
    .build();
  reducer.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING));
  
  FileDataSink sink = new FileDataSink(CsvOutputFormat.class, this.resultPath, reducer, "Sink");
  CsvOutputFormat.configureRecordFormat(sink)
    .recordDelimiter('\n')
    .fieldDelimiter(',')
    .field(IntValue.class, 0)
    .field(IntValue.class, 1);
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(dop);
  return p;
}

@Override
public Plan getPlan(String... args) {
  // parse job parameters
  int numSubTasks   = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output    = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
    .input(mapper)
    .name("Count Words")
    .build();
  
  @SuppressWarnings("unchecked")
  FileDataSink out = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class, IntValue.class), output, reducer, "Word Counts");
  
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

private static Plan getTestPlanPlan(int numSubTasks, String input, String output) {
  FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
  
  BulkIteration iteration = new BulkIteration("Loop");
  iteration.setInput(initialInput);
  iteration.setMaximumNumberOfIterations(5);
  
  Assert.assertTrue(iteration.getMaximumNumberOfIterations() > 1);
  ReduceOperator sumReduce = ReduceOperator.builder(new PickOneReducer())
      .input(iteration.getPartialSolution())
      .name("Compute sum (Reduce)")
      .build();
  
  iteration.setNextPartialSolution(sumReduce);
  FileDataSink finalResult = new FileDataSink(CsvOutputFormat.class, output, iteration, "Output");
  CsvOutputFormat.configureRecordFormat(finalResult)
    .recordDelimiter('\n')
    .fieldDelimiter(' ')
    .field(StringValue.class, 0);
  Plan plan = new Plan(finalResult, "Iteration with AllReducer (keyless Reducer)");
  
  plan.setDefaultParallelism(numSubTasks);
  Assert.assertTrue(plan.getDefaultParallelism() > 1);
  
  return plan;
}

static Plan getTestPlanPlan(int numSubTasks, String input, String output) {
  
  FileDataSource source = new FileDataSource(new TextInputFormat(), input, "Input Lines");
  source.setParameter(TextInputFormat.CHARSET_NAME, "ASCII");
  MapOperator mapper = MapOperator.builder(new TokenizeLine())
    .input(source)
    .name("Tokenize Lines")
    .build();
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0)
    .input(mapper)
    .name("Count Words")
    .build();
  @SuppressWarnings("unchecked")
  FileDataSink out = new FileDataSink(new CsvOutputFormat("\n"," ", StringValue.class, IntValue.class), output, reducer, "Word Counts");
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  
  return plan;
}

  static Plan getTestPlan(int numSubTasks, String input, String output) {

    FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input");
    initialInput.setDegreeOfParallelism(1);

    BulkIteration iteration = new BulkIteration("Loop");
    iteration.setInput(initialInput);
    iteration.setMaximumNumberOfIterations(2);

    ReduceOperator dummyReduce = ReduceOperator.builder(new DummyReducer(), IntValue.class, 0)
        .input(iteration.getPartialSolution())
        .name("Reduce something")
        .build();


    MapOperator dummyMap = MapOperator.builder(new IdentityMapper()).input(dummyReduce).build();
    iteration.setNextPartialSolution(dummyMap);

    FileDataSink finalResult = new FileDataSink(new PointOutFormat(), output, iteration, "Output");

    Plan plan = new Plan(finalResult, "Iteration with chained map test");
    plan.setDefaultParallelism(numSubTasks);
    return plan;
  }
}

@Override
public Plan getPlan(String... args) {
  int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1);
  String dataInput = (args.length > 1 ? args[1] : "");
  String output = (args.length > 2 ? args[2] : "");
  FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines");
  MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build();
  
  ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper)
      .name("Count Words").build();
  
  FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts");
  
  CsvOutputFormat.configureRecordFormat(out).recordDelimiter('\n')
      .fieldDelimiter(' ').field(StringValue.class, 0)
      .field(IntValue.class, 1);
  Plan plan = new Plan(out, "WordCount Example");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

static Plan getTestPlanPlan(int numSubTasks, String input, String output) {
  FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input");
  
  BulkIteration iteration = new BulkIteration("Loop");
  iteration.setInput(initialInput);
  iteration.setMaximumNumberOfIterations(NUM_ITERATIONS);
  ReduceOperator sumReduce = ReduceOperator.builder(new SumReducer())
      .input(iteration.getPartialSolution())
      .name("Compute sum (Reduce)")
      .build();
  
  iteration.setNextPartialSolution(sumReduce);
  @SuppressWarnings("unchecked")
  FileDataSink finalResult = new FileDataSink(new CsvOutputFormat("\n",  " ", StringValue.class), output, iteration, "Output");
  Plan plan = new Plan(finalResult, "Iteration with AllReducer (keyless Reducer)");
  plan.setDefaultParallelism(numSubTasks);
  return plan;
}

public static void main(String[] args) throws Exception {
  
  GenericDataSource<UserGeneratingInputFormat> source = new GenericDataSource<UserGeneratingInputFormat>(UserGeneratingInputFormat.class);
  
  MapOperator mapper = MapOperator.builder(new NumberExtractingMapper())
      .input(source).name("le mapper").build();
  
  ReduceOperator reducer = ReduceOperator.builder(new ConcatenatingReducer(), IntValue.class, 1)
      .input(mapper).name("le reducer").build();
  
  GenericDataSink sink = new GenericDataSink(PrintingOutputFormat.class, reducer);
  
  Plan p = new Plan(sink);
  p.setDefaultParallelism(4);
  
  LocalExecutor.execute(p);
}

Javadoc

Creates and returns a ReduceOperator from using the values given to the builder.

Popular methods of ReduceOperator$Builder

Popular in Java

Creating JSON documents from java classes using gson
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
setScale (BigDecimal)
getSharedPreferences (Context)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
JButton (javax.swing)
JCheckBox (javax.swing)
Top 12 Jupyter Notebook extensions

How to use buildmethodin eu.stratosphere.api.java.record.operators.ReduceOperator$Builder

Best Java code snippets using eu.stratosphere.api.java.record.operators.ReduceOperator$Builder.build (Showing top 20 results out of 315)

How to use
build
method
in
eu.stratosphere.api.java.record.operators.ReduceOperator$Builder