@Override public final void reduce(AvroKey<K> key, Iterator<AvroValue<V>> values, OutputCollector<KO, VO> out, Reporter reporter) throws IOException { if (this.collector == null) this.collector = getCollector(out); reduceIterable.values = values; reducer.reduce(key.datum(), reduceIterable, collector, reporter); }
@Override public void map(AvroWrapper<IN> wrapper, NullWritable value, OutputCollector<KO,VO> collector, Reporter reporter) throws IOException { if (this.out == null) this.out = new MapCollector<>(collector, isMapOnly); mapper.map(wrapper.datum(), out, reporter); }
/** Called with all map output values with a given key. By default, pairs * key with each value, collecting {@link Pair} instances. */ @SuppressWarnings("unchecked") public void reduce(K key, Iterable<V> values, AvroCollector<OUT> collector, Reporter reporter) throws IOException { if (outputPair == null) outputPair = new Pair<>(AvroJob.getOutputSchema(getConf())); for (V value : values) { outputPair.set(key, value); collector.collect((OUT)outputPair); } }
@Override public void map( BalancesRecord balanceRecord, AvroCollector<Pair<KeyRecord, JoinableRecord>> collector, Reporter reporter) throws IOException { collector.collect( new Pair<>( new KeyRecord(balanceRecord.id), new JoinableRecord(balanceRecord.getClass().getName(), balanceRecord.id, "", balanceRecord.balance))); }
/** Returns the specified map output deserializer. Defaults to the final * output deserializer if no map output schema was specified. */ public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) { Configuration conf = getConf(); boolean isKey = AvroKey.class.isAssignableFrom(c); Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)); GenericData dataModel = AvroJob.createMapOutputDataModel(conf); DatumReader<T> datumReader = dataModel.createDatumReader(schema); return new AvroWrapperDeserializer(datumReader, isKey); }
/** * Output Collector for the default schema. * <p/> * * @param namedOutput the named output name * @param reporter the reporter * @param datum output data * @throws IOException thrown if output collector could not be created */ public void collect(String namedOutput, Reporter reporter,Object datum) throws IOException{ getCollector(namedOutput,reporter).collect(datum); }
@Override protected AvroCollector<OUT> getCollector(OutputCollector<AvroWrapper<OUT>, NullWritable> collector) { return new ReduceCollector(collector); }
@Override protected AvroCollector<Pair<K,V>> getCollector(OutputCollector<AvroKey<K>,AvroValue<V>> collector) { return new PairCollector(collector); }
public static void setMeta(JobConf job) { AvroJob.setOutputMeta(job, STRING_KEY, STRING_META_VALUE); AvroJob.setOutputMeta(job, LONG_KEY, LONG_META_VALUE); AvroJob.setOutputMeta(job, BYTES_KEY, BYTES_META_VALUE); }
@Override public void map( NamesRecord nameRecord, AvroCollector<Pair<KeyRecord, JoinableRecord>> collector, Reporter reporter) throws IOException { collector.collect( new Pair<>( new KeyRecord(nameRecord.id), new JoinableRecord(nameRecord.getClass().getName(), nameRecord.id, nameRecord.name, -1L))); }
/** * OutputCollector with custom schema. * <p/> * * @param namedOutput the named output name (this will the output file name) * @param reporter the reporter * @param datum output data * @param schema schema to use for this output * @throws IOException thrown if output collector could not be created */ public void collect(String namedOutput, Reporter reporter, Schema schema,Object datum) throws IOException{ getCollector(namedOutput,reporter,schema).collect(datum); }
@SuppressWarnings("unchecked") public Pair(Object key, Double value) { this((K)key, getSchema(key), (V)value, DOUBLE_SCHEMA); } @SuppressWarnings("unchecked")
/** * OutputCollector with custom schema and file name. * <p/> * * @param namedOutput the named output name * @param reporter the reporter * @param baseOutputPath outputfile name to use. * @param datum output data * @param schema schema to use for this output * @throws IOException thrown if output collector could not be created */ public void collect(String namedOutput,Reporter reporter,Schema schema,Object datum,String baseOutputPath) throws IOException{ getCollector(namedOutput,null,reporter,baseOutputPath,schema).collect(datum); }
@SuppressWarnings("unchecked") public Pair(Object key, Integer value) { this((K)key, getSchema(key), (V)value, INT_SCHEMA); } @SuppressWarnings("unchecked")