@Override public Pair<K,V> next(Pair<K,V> reuse) throws IOException { prepare(); if (!hasNext()) throw new NoSuchElementException(); Pair<K,V> result = reuse; if (result == null) result = new Pair<>(schema); result.key(keyConverter.convert(key)); reader.getCurrentValue(value); result.value(valConverter.convert(value)); // swap key and spareKey Writable k = key; key = spareKey; spareKey = k; ready = false; return result; }
public Pair(K key, Schema keySchema, V value, Schema valueSchema) { this.schema = getPairSchema(keySchema, valueSchema); this.key = key; this.value = value; }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); GenericData dataModel = AvroJob.createDataModel(conf); return new AvroWrapperSerializer(dataModel.createDatumWriter(schema)); }
/** Called with all map output values with a given key. By default, pairs * key with each value, collecting {@link Pair} instances. */ @SuppressWarnings("unchecked") public void reduce(K key, Iterable<V> values, AvroCollector<OUT> collector, Reporter reporter) throws IOException { if (outputPair == null) outputPair = new Pair<>(AvroJob.getOutputSchema(getConf())); for (V value : values) { outputPair.set(key, value); collector.collect((OUT)outputPair); } }
/** Returns the specified map output deserializer. Defaults to the final * output deserializer if no map output schema was specified. */ public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) { Configuration conf = getConf(); boolean isKey = AvroKey.class.isAssignableFrom(c); Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)); GenericData dataModel = AvroJob.createMapOutputDataModel(conf); DatumReader<T> datumReader = dataModel.createDatumReader(schema); return new AvroWrapperDeserializer(datumReader, isKey); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) { Schema mapOutputSchema = AvroJob.getMapOutputSchema(conf); Schema keySchema = org.apache.avro.mapred.Pair.getKeySchema(mapOutputSchema); schema = keySchema.getFields().get(0).schema(); } }
TetherJob.setExecutable(job, exec, execargs, false); Schema outscheme = new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema(); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); job.set(AvroJob.OUTPUT_SCHEMA, outscheme.toString()); int numWords = 0; for (Pair<Utf8, Long> wc : counts) { assertEquals(wc.key().toString(), WordCountUtil.COUNTS.get(wc.key().toString()), wc.value()); numWords++;
public void testOutputFormat() throws Exception { JobConf job = new JobConf(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest"); wordCountUtil.writeLinesFile(); AvroJob.setInputSchema(job, STRING); AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG)); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in")); FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out")); FileOutputFormat.setCompressOutput(job, true); job.setOutputFormat(AvroTrevniOutputFormat.class); JobClient.runJob(job); wordCountUtil.validateCountsFile(); }
@Override public void initialize() { keyMapFn.initialize(); valueMapFn.initialize(); pairSchemaJson = org.apache.avro.mapred.Pair.getPairSchema( new Schema.Parser().parse(firstJson), new Schema.Parser().parse(secondJson)).toString(); }
@SuppressWarnings("deprecation") public void testJob(String pathOut) throws Exception { JobConf job = new JobConf(); String pathIn = INPUT_DIR.getRoot().getPath(); WordCountUtil.writeLinesFile(pathIn + "/lines.avro"); Path outputPath = new Path(pathOut); outputPath.getFileSystem(job).delete(outputPath); job.setJobName("wordcount"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(pathIn)); FileOutputFormat.setOutputPath(job, new Path(pathOut)); FileOutputFormat.setCompressOutput(job, true); WordCountUtil.setMeta(job); JobClient.runJob(job); WordCountUtil.validateCountsFile(new File(pathOut, "part-00000.avro")); }
AvroJob.setInputSchema(job, Weather.SCHEMA$); AvroJob.setMapOutputSchema (job, Pair.getPairSchema(Weather.SCHEMA$, Schema.create(Type.NULL))); AvroJob.setOutputSchema(job, Weather.SCHEMA$); AvroJob.setMapperClass(job, SortMapper.class);
Schema outscheme = new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema(); hf.write(outscheme.toString()); DataFileStream<Pair<Utf8, Long>> counts = new DataFileStream<>(cin, reader); for (Pair<Utf8, Long> wc : counts) { assertEquals(wc.key().toString(), WordCountUtil.COUNTS.get(wc.key().toString()), wc.value()); numWords++;
AvroJob.setInputSchema(conf, Schema.parse(baseJobConf.get(AVRO_REC_SCHEMA))); AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Schema.Type.BYTES), Schema.create(Schema.Type.BYTES))); AvroJob.setMapperClass(conf, mapperClass); conf.setReducerClass(AvroStoreBuilderReducer.class);
@Override public void reduce(Utf8 word, Iterable<Long> counts, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter) throws IOException { long sum = 0; for (long count : counts) sum += count; Pair<Utf8, Long> outputvalue = new Pair<>(word, sum); amos.getCollector("myavro", reporter).collect(outputvalue); amos.collect("myavro1", reporter, outputvalue.toString()); amos.collect("myavro", reporter, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema(), outputvalue, "testavrofile"); amos.collect("myavro", reporter, Schema.create(Schema.Type.STRING), outputvalue.toString(), "testavrofile1"); collector.collect(new Pair<>(word, sum)); }
@Test public void testJob() throws Exception { JobConf job = new JobConf(); Path outputPath = new Path(DIR.getRoot().getPath() + "/out"); outputPath.getFileSystem(job).delete(outputPath); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, DIR.getRoot().getPath() + "/in"); job.setMapperClass(AvroTestConverter.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputPath); System.out.println(createSchema()); AvroJob.setOutputSchema(job, Pair.getPairSchema(Schema.create(Schema.Type.LONG), createSchema())); job.setOutputFormat(AvroOutputFormat.class); JobClient.runJob(job); } }
SeekableInput labelSensesInput = new FsInput(labelSensesPath, new Configuration()); Schema labelSensesSchema = Pair.getPairSchema(Schema.create(Type.STRING),LabelSenseList.getClassSchema()) ; DatumReader<Pair<CharSequence,LabelSenseList>> labelSensesDatumReader = new SpecificDatumReader<Pair<CharSequence,LabelSenseList>>(labelSensesSchema); FileReader<Pair<CharSequence,LabelSenseList>> labelSensesReader = DataFileReader.openReader(labelSensesInput, labelSensesDatumReader) ; SeekableInput labelOccurrencesInput = new FsInput(labelOccurrencesPath, new Configuration()); Schema labelOccurrencesSchema = Pair.getPairSchema(Schema.create(Type.STRING),LabelOccurrences.getClassSchema()) ; DatumReader<Pair<CharSequence,LabelOccurrences>> labelOccurrencesDatumReader = new SpecificDatumReader<Pair<CharSequence,LabelOccurrences>>(labelOccurrencesSchema); FileReader<Pair<CharSequence,LabelOccurrences>> labelOccurrencesReader = DataFileReader.openReader(labelOccurrencesInput, labelOccurrencesDatumReader) ; CharSequence label = sensesPair.key() ; LabelSenseList senses = sensesPair.value() ; while ((occurrencesPair == null || labelTextComparator.compare(occurrencesPair.key(), sensesPair.key()) < 0 ) && labelOccurrencesReader.hasNext()) occurrencesPair = labelOccurrencesReader.next(); if (labelTextComparator.compare(occurrencesPair.key(), sensesPair.key()) == 0) occurrences = occurrencesPair.value() ;
Schema schema = Pair.getPairSchema(Schema.create(Type.STRING),LabelSenseList.getClassSchema()) ; CharSequence label = pair.key() ; labels.put(label) ; labelsInserted++ ;
@Override public void run() throws Exception { JobConfig props = super.getJobConfig(); JobConf conf = super.createJobConf(ItemModelTestLoglikMapper.class, ItemModelTestLoglikReducer.class, ItemModelTestLoglikCombiner.class, Pair.getPairSchema(Schema.create(Type.STRING), RegressionTestLoglikOutput.SCHEMA$), RegressionTestLoglikOutput.SCHEMA$); AvroUtils.runAvroJob(conf); } public static class ItemModelTestLoglikMapper extends AvroMapper<GenericData.Record, Pair<String, RegressionTestLoglikOutput>>
public void collect(OUT datum) throws IOException { if (isMapOnly) { wrapper.datum(datum); collector.collect((KO)wrapper, (VO)NullWritable.get()); } else { // split a pair Pair<K,V> pair = (Pair<K,V>)datum; keyWrapper.datum(pair.key()); valueWrapper.datum(pair.value()); collector.collect((KO)keyWrapper, (VO)valueWrapper); } } }
@Test @SuppressWarnings("deprecation") public void testJob() throws Exception { JobConf job = new JobConf(); String dir = "target/testReflectJob"; Path inputPath = new Path(dir + "/in"); Path outputPath = new Path(dir + "/out"); outputPath.getFileSystem(job).delete(outputPath); inputPath.getFileSystem(job).delete(inputPath); writeLinesFile(new File(dir+"/in")); job.setJobName("reflect"); AvroJob.setInputSchema(job, ReflectData.get().getSchema(Text.class)); AvroJob.setMapOutputSchema (job, new Pair(new Text(""), new Count(0L)).getSchema()); AvroJob.setOutputSchema(job, ReflectData.get().getSchema(WordCount.class)); AvroJob.setMapperClass(job, MapImpl.class); //AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); AvroJob.setReflect(job); // use reflection JobClient.runJob(job); validateCountsFile(new File(new File(dir, "out"), "part-00000.avro")); }