org.apache.flink.hadoopcompatibility.mapred.HadoopReduceCombineFunction java code examples

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.combiner.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
  this.combineCollector = new HadoopOutputCollector<>();
  this.reduceCollector = new HadoopOutputCollector<>();
}

@Test
public void testUngroupedHadoopReducer() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper2());
  DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
      reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
          new SumReducer(), new SumReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  sum.writeAsText(resultPath);
  env.execute();
  String expected = "(0,231)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.combiner.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
  this.combineCollector = new HadoopOutputCollector<>();
  this.reduceCollector = new HadoopOutputCollector<>();
}

public static void main(String[] args) throws Exception {
  if (args.length < 2) {
    System.err.println("Usage: WordCount <input path> <result path>");
    return;
  }
  final String inputPath = args[0];
  final String outputPath = args[1];
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  // Set up the Hadoop Input Format
  HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
  TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
  // Create a Flink job with it
  DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
  DataSet<Tuple2<Text, LongWritable>> words =
      text.flatMap(new HadoopMapFunction<LongWritable, Text, Text, LongWritable>(new Tokenizer()))
        .groupBy(0).reduceGroup(new HadoopReduceCombineFunction<Text, LongWritable, Text, LongWritable>(new Counter(), new Counter()));
  // Set up Hadoop Output Format
  HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat =
      new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
  hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
  TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));
  // Output & Execute
  words.output(hadoopOutputFormat).setParallelism(1);
  env.execute("Hadoop Compat WordCount");
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.combiner.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
  this.combineCollector = new HadoopOutputCollector<>();
  this.reduceCollector = new HadoopOutputCollector<>();
}

@Test
public void testStandardCountingWithCombiner() throws Exception{
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper1());
  DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
          new SumReducer(), new SumReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  counts.writeAsText(resultPath);
  env.execute();
  String expected = "(0,5)\n" +
      "(1,6)\n" +
      "(2,6)\n" +
      "(3,4)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.combiner.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
  this.combineCollector = new HadoopOutputCollector<>();
  this.reduceCollector = new HadoopOutputCollector<>();
}

@Test
public void testCombiner() throws Exception {
  org.junit.Assume.assumeThat(mode, new IsEqual<TestExecutionMode>(TestExecutionMode.CLUSTER));
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper3());
  DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
          new SumReducer(), new KeyChangingReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  counts.writeAsText(resultPath);
  env.execute();
  String expected = "(0,5)\n" +
      "(1,6)\n" +
      "(2,5)\n" +
      "(3,5)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

Javadoc

This wrapper maps a Hadoop Reducer and Combiner (mapred API) to a combinable Flink GroupReduceFunction.

Most used methods

getRuntimeContext
<init>
Maps two Hadoop Reducer (mapred API) to a combinable Flink GroupReduceFunction.

Popular in Java

Running tasks concurrently on multiple threads
getResourceAsStream (ClassLoader)
onRequestPermissionsResult (Fragment)
setRequestProperty (URLConnection)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
JPanel (javax.swing)
Best IntelliJ plugins

How to useHadoopReduceCombineFunction in org.apache.flink.hadoopcompatibility.mapred

Best Java code snippets using org.apache.flink.hadoopcompatibility.mapred.HadoopReduceCombineFunction (Showing top 8 results out of 315)

How to use
HadoopReduceCombineFunction
in
org.apache.flink.hadoopcompatibility.mapred