@SuppressWarnings("unchecked") @Override public void open(Configuration parameters) throws Exception { super.open(parameters); this.reducer.configure(jobConf); this.reporter = new HadoopDummyReporter(); this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>(); Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0); TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig()); this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer); }
@Test public void testUngroupedHadoopReducer() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env); DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds. reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer())); String resultPath = tempFolder.newFile().toURI().toString(); commentCnts.writeAsText(resultPath); env.execute(); String expected = "(42,15)\n"; compareResultsByLinesInMemory(expected, resultPath); }
@SuppressWarnings("unchecked") @Override public void open(Configuration parameters) throws Exception { super.open(parameters); this.reducer.configure(jobConf); this.reporter = new HadoopDummyReporter(); this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>(); Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0); TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig()); this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer); }
@Test public void testConfigurationViaJobConf() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); JobConf conf = new JobConf(); conf.set("my.cntPrefix", "Hello"); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env). map(new Mapper2()); DataSet<Tuple2<IntWritable, IntWritable>> helloCnts = ds. groupBy(0). reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>( new ConfigurableCntReducer(), conf)); String resultPath = tempFolder.newFile().toURI().toString(); helloCnts.writeAsText(resultPath); env.execute(); String expected = "(0,0)\n" + "(1,0)\n" + "(2,1)\n" + "(3,1)\n" + "(4,1)\n"; compareResultsByLinesInMemory(expected, resultPath); }
@SuppressWarnings("unchecked") @Override public void open(Configuration parameters) throws Exception { super.open(parameters); this.reducer.configure(jobConf); this.reporter = new HadoopDummyReporter(); this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>(); Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0); TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig()); this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer); }
@Test public void testConfigurationViaJobConf() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); JobConf conf = new JobConf(); conf.set("my.cntPrefix", "Hello"); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env). map(new Mapper4()); DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds. groupBy(0). reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>( new ConfigurableCntReducer(), conf)); String resultPath = tempFolder.newFile().toURI().toString(); hellos.writeAsText(resultPath); env.execute(); // return expected result String expected = "(0,0)\n" + "(1,0)\n" + "(2,1)\n" + "(3,1)\n" + "(4,1)\n"; compareResultsByLinesInMemory(expected, resultPath); }
@SuppressWarnings("unchecked") @Override public void open(Configuration parameters) throws Exception { super.open(parameters); this.reducer.configure(jobConf); this.reporter = new HadoopDummyReporter(); this.reduceCollector = new HadoopOutputCollector<KEYOUT, VALUEOUT>(); Class<KEYIN> inKeyClass = (Class<KEYIN>) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0); TypeSerializer<KEYIN> keySerializer = TypeExtractor.getForClass(inKeyClass).createSerializer(getRuntimeContext().getExecutionConfig()); this.valueIterator = new HadoopTupleUnwrappingIterator<KEYIN, VALUEIN>(keySerializer); }
@Test public void testStandardGrouping() throws Exception{ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env). map(new Mapper1()); DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds. groupBy(0). reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new CommentCntReducer())); String resultPath = tempFolder.newFile().toURI().toString(); commentCnts.writeAsText(resultPath); env.execute(); String expected = "(0,0)\n" + "(1,3)\n" + "(2,5)\n" + "(3,5)\n" + "(4,2)\n"; compareResultsByLinesInMemory(expected, resultPath); }