mapReduceDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); mapReduceDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
mapReduceDriver.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName); mapReduceDriver.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
protected List<KeyValueReuseList<K2, V2>> sortAndGroup( final List<Pair<K2, V2>> mapOutputs) { if (mapOutputs.isEmpty()) { return Collections.emptyList(); } if (keyValueOrderComparator == null || keyGroupComparator == null) { JobConf conf = new JobConf(getConfiguration()); conf.setMapOutputKeyClass(mapOutputs.get(0).getFirst().getClass()); if (keyGroupComparator == null) { keyGroupComparator = conf.getOutputValueGroupingComparator(); } if (keyValueOrderComparator == null) { keyValueOrderComparator = conf.getOutputKeyComparator(); } } ReduceFeeder<K2, V2> reduceFeeder = new ReduceFeeder<K2, V2>( getConfiguration()); return reduceFeeder.sortAndGroup(mapOutputs, keyValueOrderComparator, keyGroupComparator); }
@Override public List<Pair<K3, V3>> run() throws IOException { try { preRunChecks(myMapper, myReducer); initDistributedCache(); List<Pair<K2, V2>> mapOutputs = new ArrayList<Pair<K2, V2>>(); // run map component LOG.debug("Starting map phase with mapper: " + myMapper); mapOutputs.addAll(MapDriver.newMapDriver(myMapper) .withCounters(getCounters()).withConfiguration(getConfiguration()) .withAll(inputList).withMapInputPath(getMapInputPath()).run()); if (myCombiner != null) { // User has specified a combiner. Run this and replace the mapper // outputs // with the result of the combiner. LOG.debug("Starting combine phase with combiner: " + myCombiner); mapOutputs = new ReducePhaseRunner<K2, V2, K2, V2>(inputFormatClass, getConfiguration(), counters, getOutputSerializationConfiguration(), outputFormatClass) .runReduce(sortAndGroup(mapOutputs), myCombiner); } // Run the reduce phase. LOG.debug("Starting reduce phase with reducer: " + myReducer); return new ReducePhaseRunner<K2, V2, K3, V3>(inputFormatClass, getConfiguration(), counters, getOutputSerializationConfiguration(), outputFormatClass).runReduce(sortAndGroup(mapOutputs), myReducer); } finally { cleanupDistributedCache(); } }
@Test public void testHypercubeMapReduce() throws IOException { MapReduceDriver<Writable, VectorWritable, IntWritable, CentroidWritable, IntWritable, CentroidWritable> mapReduceDriver = new MapReduceDriver<Writable, VectorWritable, IntWritable, CentroidWritable, IntWritable, CentroidWritable>(new StreamingKMeansMapper(), new StreamingKMeansReducer()); Configuration configuration = mapReduceDriver.getConfiguration(); configure(configuration); System.out.printf("%s full test\n", configuration.get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION)); for (Centroid datapoint : syntheticData.getFirst()) { mapReduceDriver.addInput(new IntWritable(0), new VectorWritable(datapoint)); } List<org.apache.hadoop.mrunit.types.Pair<IntWritable, CentroidWritable>> results = mapReduceDriver.run(); testReducerResults(syntheticData.getFirst().size(), results); }