org.apache.flink.hadoopcompatibility.mapred.HadoopReduceFunction java code examples

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer);
}

@Test
public void testUngroupedHadoopReducer() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
  DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
      reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  commentCnts.writeAsText(resultPath);
  env.execute();
  String expected = "(42,15)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer);
}

@Test
public void testConfigurationViaJobConf() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  JobConf conf = new JobConf();
  conf.set("my.cntPrefix", "Hello");
  DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper2());
  DataSet<Tuple2<IntWritable, IntWritable>> helloCnts = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
          new ConfigurableCntReducer(), conf));
  String resultPath = tempFolder.newFile().toURI().toString();
  helloCnts.writeAsText(resultPath);
  env.execute();
  String expected = "(0,0)\n" +
      "(1,0)\n" +
      "(2,1)\n" +
      "(3,1)\n" +
      "(4,1)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer);
}

@Test
public void testConfigurationViaJobConf() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  JobConf conf = new JobConf();
  conf.set("my.cntPrefix", "Hello");
  DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper4());
  DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
          new ConfigurableCntReducer(), conf));
  String resultPath = tempFolder.newFile().toURI().toString();
  hellos.writeAsText(resultPath);
  env.execute();
  // return expected result
  String expected = "(0,0)\n" +
      "(1,0)\n" +
      "(2,1)\n" +
      "(3,1)\n" +
      "(4,1)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer);
}

@Test
public void testStandardGrouping() throws Exception{
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper1());
  DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new CommentCntReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  commentCnts.writeAsText(resultPath);
  env.execute();
  String expected = "(0,0)\n" +
      "(1,3)\n" +
      "(2,5)\n" +
      "(3,5)\n" +
      "(4,2)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

Javadoc

This wrapper maps a Hadoop Reducer (mapred API) to a non-combinable Flink GroupReduceFunction.

Most used methods

getRuntimeContext
<init>
Maps a Hadoop Reducer (mapred API) to a non-combinable Flink GroupReduceFunction.

Popular in Java

Running tasks concurrently on multiple threads
startActivity (Activity)
setRequestProperty (URLConnection)
setScale (BigDecimal)
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
Join (org.hibernate.mapping)
Top plugins for WebStorm

How to useHadoopReduceFunction in org.apache.flink.hadoopcompatibility.mapred

Best Java code snippets using org.apache.flink.hadoopcompatibility.mapred.HadoopReduceFunction (Showing top 8 results out of 315)

How to use
HadoopReduceFunction
in
org.apache.flink.hadoopcompatibility.mapred