org.apache.flink.hadoopcompatibility.mapred java code examples

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.combiner.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
  this.combineCollector = new HadoopOutputCollector<>();
  this.reduceCollector = new HadoopOutputCollector<>();
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer);
}

@Test
public void testConfigurableMapper() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  JobConf conf = new JobConf();
  conf.set("my.filterPrefix", "Hello");
  DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
  DataSet<Tuple2<IntWritable, Text>> hellos = ds.
      flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new ConfigurableMapper(), conf));
  String resultPath = tempFolder.newFile().toURI().toString();
  hellos.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
  env.execute();
  String expected = "(2,Hello)\n" +
      "(3,Hello world)\n" +
      "(4,Hello world, how are you?)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

public static void main(String[] args) throws Exception {
  if (args.length < 2) {
    System.err.println("Usage: WordCount <input path> <result path>");
    return;
  }
  final String inputPath = args[0];
  final String outputPath = args[1];
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  // Set up the Hadoop Input Format
  HadoopInputFormat<LongWritable, Text> hadoopInputFormat = new HadoopInputFormat<LongWritable, Text>(new TextInputFormat(), LongWritable.class, Text.class, new JobConf());
  TextInputFormat.addInputPath(hadoopInputFormat.getJobConf(), new Path(inputPath));
  // Create a Flink job with it
  DataSet<Tuple2<LongWritable, Text>> text = env.createInput(hadoopInputFormat);
  DataSet<Tuple2<Text, LongWritable>> words =
      text.flatMap(new HadoopMapFunction<LongWritable, Text, Text, LongWritable>(new Tokenizer()))
        .groupBy(0).reduceGroup(new HadoopReduceCombineFunction<Text, LongWritable, Text, LongWritable>(new Counter(), new Counter()));
  // Set up Hadoop Output Format
  HadoopOutputFormat<Text, LongWritable> hadoopOutputFormat =
      new HadoopOutputFormat<Text, LongWritable>(new TextOutputFormat<Text, LongWritable>(), new JobConf());
  hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
  TextOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path(outputPath));
  // Output & Execute
  words.output(hadoopOutputFormat).setParallelism(1);
  env.execute("Hadoop Compat WordCount");
}

@Test
public void testUngroupedHadoopReducer() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
  DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
      reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  commentCnts.writeAsText(resultPath);
  env.execute();
  String expected = "(42,15)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@Test
public void testUngroupedHadoopReducer() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper2());
  DataSet<Tuple2<IntWritable, IntWritable>> sum = ds.
      reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
          new SumReducer(), new SumReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  sum.writeAsText(resultPath);
  env.execute();
  String expected = "(0,231)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.combiner.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
  this.combineCollector = new HadoopOutputCollector<>();
  this.reduceCollector = new HadoopOutputCollector<>();
}

@Test
public void testConfigurationViaJobConf() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  JobConf conf = new JobConf();
  conf.set("my.cntPrefix", "Hello");
  DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper2());
  DataSet<Tuple2<IntWritable, IntWritable>> helloCnts = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
          new ConfigurableCntReducer(), conf));
  String resultPath = tempFolder.newFile().toURI().toString();
  helloCnts.writeAsText(resultPath);
  env.execute();
  String expected = "(0,0)\n" +
      "(1,0)\n" +
      "(2,1)\n" +
      "(3,1)\n" +
      "(4,1)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@Test
public void testNonPassingMapper() throws Exception{
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env);
  DataSet<Tuple2<IntWritable, Text>> nonPassingFlatMapDs = ds.
      flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new NonPassingMapper()));
  String resultPath = tempFolder.newFile().toURI().toString();
  nonPassingFlatMapDs.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE);
  env.execute();
  compareResultsByLinesInMemory("\n", resultPath);
}

@Test
public void testStandardCountingWithCombiner() throws Exception{
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper1());
  DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
          new SumReducer(), new SumReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  counts.writeAsText(resultPath);
  env.execute();
  String expected = "(0,5)\n" +
      "(1,6)\n" +
      "(2,6)\n" +
      "(3,4)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.combiner.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
  this.combineCollector = new HadoopOutputCollector<>();
  this.reduceCollector = new HadoopOutputCollector<>();
}

@Test
public void testConfigurationViaJobConf() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  JobConf conf = new JobConf();
  conf.set("my.cntPrefix", "Hello");
  DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper4());
  DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(
          new ConfigurableCntReducer(), conf));
  String resultPath = tempFolder.newFile().toURI().toString();
  hellos.writeAsText(resultPath);
  env.execute();
  // return expected result
  String expected = "(0,0)\n" +
      "(1,0)\n" +
      "(2,1)\n" +
      "(3,1)\n" +
      "(4,1)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

flatMap(new HadoopMapFunction<IntWritable, Text, IntWritable, Text>(new DuplicatingMapper()));

@Test
public void testCombiner() throws Exception {
  org.junit.Assume.assumeThat(mode, new IsEqual<TestExecutionMode>(TestExecutionMode.CLUSTER));
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper3());
  DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
          new SumReducer(), new KeyChangingReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  counts.writeAsText(resultPath);
  env.execute();
  String expected = "(0,5)\n" +
      "(1,6)\n" +
      "(2,5)\n" +
      "(3,5)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer);
}

@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
  super.open(parameters);
  this.reducer.configure(jobConf);
  this.combiner.configure(jobConf);
  this.reporter = new HadoopDummyReporter();
  Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
  TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig());
  this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
  this.combineCollector = new HadoopOutputCollector<>();
  this.reduceCollector = new HadoopOutputCollector<>();
}

@Test
public void testStandardGrouping() throws Exception{
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper1());
  DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new CommentCntReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  commentCnts.writeAsText(resultPath);
  env.execute();
  String expected = "(0,0)\n" +
      "(1,3)\n" +
      "(2,5)\n" +
      "(3,5)\n" +
      "(4,2)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

How to use org.apache.flink.hadoopcompatibility.mapred

Best Java code snippets using org.apache.flink.hadoopcompatibility.mapred (Showing top 19 results out of 315)