org.apache.avro.mapred.Pair java code examples

Refine search

@Override public Pair<K,V> next(Pair<K,V> reuse) throws IOException {
 prepare();
 if (!hasNext())
  throw new NoSuchElementException();
 Pair<K,V> result = reuse;
 if (result == null)
  result = new Pair<>(schema);
 result.key(keyConverter.convert(key));
 reader.getCurrentValue(value);
 result.value(valConverter.convert(value));
 // swap key and spareKey
 Writable k = key;
 key = spareKey;
 spareKey = k;
 ready = false;
 return result;
}

public Pair(K key, Schema keySchema, V value, Schema valueSchema) {
 this.schema = getPairSchema(keySchema, valueSchema);
 this.key = key;
 this.value = value;
}

/** Returns the specified output serializer. */
public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) {
 // AvroWrapper used for final output, AvroKey or AvroValue for map output
 boolean isFinalOutput = c.equals(AvroWrapper.class);
 Configuration conf = getConf();
 Schema schema = isFinalOutput
  ? AvroJob.getOutputSchema(conf)
  : (AvroKey.class.isAssignableFrom(c)
    ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf))
    : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)));
 GenericData dataModel = AvroJob.createDataModel(conf);
 return new AvroWrapperSerializer(dataModel.createDatumWriter(schema));
}

/** Called with all map output values with a given key.  By default, pairs
 * key with each value, collecting {@link Pair} instances. */
@SuppressWarnings("unchecked")
public void reduce(K key, Iterable<V> values,
          AvroCollector<OUT> collector,
          Reporter reporter) throws IOException {
 if (outputPair == null)
  outputPair = new Pair<>(AvroJob.getOutputSchema(getConf()));
 for (V value : values) {
  outputPair.set(key, value);
  collector.collect((OUT)outputPair);
 }
}

/** Returns the specified map output deserializer.  Defaults to the final
 * output deserializer if no map output schema was specified. */
public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) {
 Configuration conf = getConf();
 boolean isKey = AvroKey.class.isAssignableFrom(c);
 Schema schema = isKey
  ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf))
  : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf));
 GenericData dataModel = AvroJob.createMapOutputDataModel(conf);
 DatumReader<T> datumReader = dataModel.createDatumReader(schema);
 return new AvroWrapperDeserializer(datumReader, isKey);
}

@Override
public void setConf(Configuration conf) {
 super.setConf(conf);
 if (conf != null) {
  Schema mapOutputSchema = AvroJob.getMapOutputSchema(conf);
  Schema keySchema = org.apache.avro.mapred.Pair.getKeySchema(mapOutputSchema);
  schema = keySchema.getFields().get(0).schema();
 }
}

TetherJob.setExecutable(job, exec, execargs, false);
Schema outscheme = new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema();
AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING));
job.set(AvroJob.OUTPUT_SCHEMA, outscheme.toString());
int numWords = 0;
for (Pair<Utf8, Long> wc : counts) {
 assertEquals(wc.key().toString(), WordCountUtil.COUNTS.get(wc.key().toString()), wc.value());
 numWords++;

public void testOutputFormat() throws Exception {
 JobConf job = new JobConf();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest");
 wordCountUtil.writeLinesFile();
 AvroJob.setInputSchema(job, STRING);
 AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG));
 AvroJob.setMapperClass(job, MapImpl.class);
 AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in"));
 FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out"));
 FileOutputFormat.setCompressOutput(job, true);
 job.setOutputFormat(AvroTrevniOutputFormat.class);
 JobClient.runJob(job);
 wordCountUtil.validateCountsFile();
}

@Override
public void initialize() {
 keyMapFn.initialize();
 valueMapFn.initialize();
 pairSchemaJson = org.apache.avro.mapred.Pair.getPairSchema(
   new Schema.Parser().parse(firstJson), new Schema.Parser().parse(secondJson)).toString();
}

@SuppressWarnings("deprecation")
public void testJob(String pathOut) throws Exception {
 JobConf job = new JobConf();
 String pathIn = INPUT_DIR.getRoot().getPath();
 WordCountUtil.writeLinesFile(pathIn + "/lines.avro");
 Path outputPath = new Path(pathOut);
 outputPath.getFileSystem(job).delete(outputPath);
 job.setJobName("wordcount");
 AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING));
 AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());
 AvroJob.setMapperClass(job, MapImpl.class);
 AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, new Path(pathIn));
 FileOutputFormat.setOutputPath(job, new Path(pathOut));
 FileOutputFormat.setCompressOutput(job, true);
 WordCountUtil.setMeta(job);
 JobClient.runJob(job);
 WordCountUtil.validateCountsFile(new File(pathOut, "part-00000.avro"));
}

AvroJob.setInputSchema(job, Weather.SCHEMA$);
AvroJob.setMapOutputSchema
 (job, Pair.getPairSchema(Weather.SCHEMA$, Schema.create(Type.NULL)));
AvroJob.setOutputSchema(job, Weather.SCHEMA$);
AvroJob.setMapperClass(job, SortMapper.class);

Schema outscheme = new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema();
 hf.write(outscheme.toString());
 DataFileStream<Pair<Utf8, Long>> counts = new DataFileStream<>(cin, reader);
 for (Pair<Utf8, Long> wc : counts) {
  assertEquals(wc.key().toString(), WordCountUtil.COUNTS.get(wc.key().toString()), wc.value());
  numWords++;

AvroJob.setInputSchema(conf, Schema.parse(baseJobConf.get(AVRO_REC_SCHEMA)));
AvroJob.setOutputSchema(conf,
            Pair.getPairSchema(Schema.create(Schema.Type.BYTES),
                      Schema.create(Schema.Type.BYTES)));
AvroJob.setMapperClass(conf, mapperClass);
conf.setReducerClass(AvroStoreBuilderReducer.class);

@Override
public void reduce(Utf8 word, Iterable<Long> counts,
          AvroCollector<Pair<Utf8, Long>> collector,
          Reporter reporter) throws IOException {
 long sum = 0;
 for (long count : counts)
  sum += count;
 Pair<Utf8, Long> outputvalue = new Pair<>(word, sum);
 amos.getCollector("myavro", reporter).collect(outputvalue);
 amos.collect("myavro1", reporter, outputvalue.toString());
 amos.collect("myavro", reporter, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema(), outputvalue, "testavrofile");
 amos.collect("myavro", reporter, Schema.create(Schema.Type.STRING), outputvalue.toString(), "testavrofile1");
 collector.collect(new Pair<>(word, sum));
}

 @Test
  public void testJob() throws Exception {
  JobConf job = new JobConf();
  Path outputPath = new Path(DIR.getRoot().getPath() + "/out");
  outputPath.getFileSystem(job).delete(outputPath);

  job.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(job, DIR.getRoot().getPath() + "/in");

  job.setMapperClass(AvroTestConverter.class);
  job.setNumReduceTasks(0);

  FileOutputFormat.setOutputPath(job, outputPath);
  System.out.println(createSchema());
  AvroJob.setOutputSchema(job,
              Pair.getPairSchema(Schema.create(Schema.Type.LONG),
                        createSchema()));
  job.setOutputFormat(AvroOutputFormat.class);

  JobClient.runJob(job);
 }
}

SeekableInput labelSensesInput = new FsInput(labelSensesPath, new Configuration());
Schema labelSensesSchema = Pair.getPairSchema(Schema.create(Type.STRING),LabelSenseList.getClassSchema()) ;
DatumReader<Pair<CharSequence,LabelSenseList>> labelSensesDatumReader = new SpecificDatumReader<Pair<CharSequence,LabelSenseList>>(labelSensesSchema);
FileReader<Pair<CharSequence,LabelSenseList>> labelSensesReader = DataFileReader.openReader(labelSensesInput, labelSensesDatumReader) ;
SeekableInput labelOccurrencesInput = new FsInput(labelOccurrencesPath, new Configuration());
Schema labelOccurrencesSchema = Pair.getPairSchema(Schema.create(Type.STRING),LabelOccurrences.getClassSchema()) ;
DatumReader<Pair<CharSequence,LabelOccurrences>> labelOccurrencesDatumReader = new SpecificDatumReader<Pair<CharSequence,LabelOccurrences>>(labelOccurrencesSchema);
FileReader<Pair<CharSequence,LabelOccurrences>> labelOccurrencesReader = DataFileReader.openReader(labelOccurrencesInput, labelOccurrencesDatumReader) ;
  CharSequence label = sensesPair.key() ;
  LabelSenseList senses = sensesPair.value() ;
  while ((occurrencesPair == null || labelTextComparator.compare(occurrencesPair.key(), sensesPair.key()) < 0 ) && labelOccurrencesReader.hasNext())
    occurrencesPair = labelOccurrencesReader.next();
  if (labelTextComparator.compare(occurrencesPair.key(), sensesPair.key()) == 0)
    occurrences = occurrencesPair.value() ;

Schema schema = Pair.getPairSchema(Schema.create(Type.STRING),LabelSenseList.getClassSchema()) ;
  CharSequence label = pair.key() ;
  labels.put(label) ;
  labelsInserted++ ;

@Override
public void run() throws Exception
{
 JobConfig props = super.getJobConfig();
 JobConf conf = super.createJobConf(ItemModelTestLoglikMapper.class,
                   ItemModelTestLoglikReducer.class,
                   ItemModelTestLoglikCombiner.class,
                   Pair.getPairSchema(Schema.create(Type.STRING),
                            RegressionTestLoglikOutput.SCHEMA$),
                            RegressionTestLoglikOutput.SCHEMA$);
 AvroUtils.runAvroJob(conf);
}
public static class ItemModelTestLoglikMapper extends AvroMapper<GenericData.Record, Pair<String, RegressionTestLoglikOutput>>

 public void collect(OUT datum) throws IOException {
  if (isMapOnly) {
   wrapper.datum(datum);
   collector.collect((KO)wrapper, (VO)NullWritable.get());
  } else {
   // split a pair
   Pair<K,V> pair = (Pair<K,V>)datum;
   keyWrapper.datum(pair.key());
   valueWrapper.datum(pair.value());
   collector.collect((KO)keyWrapper, (VO)valueWrapper);
  }
 }
}

@Test
@SuppressWarnings("deprecation")
public void testJob() throws Exception {
 JobConf job = new JobConf();
 String dir = "target/testReflectJob";
 Path inputPath = new Path(dir + "/in");
 Path outputPath = new Path(dir + "/out");
 outputPath.getFileSystem(job).delete(outputPath);
 inputPath.getFileSystem(job).delete(inputPath);
 writeLinesFile(new File(dir+"/in"));
 job.setJobName("reflect");
 AvroJob.setInputSchema(job, ReflectData.get().getSchema(Text.class));
 AvroJob.setMapOutputSchema
  (job, new Pair(new Text(""), new Count(0L)).getSchema());
 AvroJob.setOutputSchema(job, ReflectData.get().getSchema(WordCount.class));
 AvroJob.setMapperClass(job, MapImpl.class);
 //AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, inputPath);
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setReflect(job); // use reflection
 JobClient.runJob(job);
 validateCountsFile(new File(new File(dir, "out"), "part-00000.avro"));
}

Javadoc

A key/value pair.

Most used methods

<init>
getPairSchema
Get a pair schema.
key
Set the key.
value
Set the value.
getKeySchema
Return a pair's key schema.
getValueSchema
Return a pair's value schema.
getSchema
set
Set both the key and value.
checkIsPairSchema
compareTo
makePairSchema
get

Popular in Java

Reactive rest calls using spring rest template
compareTo (BigDecimal)
getExternalFilesDir (Context)
getSharedPreferences (Context)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
Date (java.util)
A specific moment in time, with millisecond precision. Values typically come from System#currentTime
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
Github Copilot alternatives

How to usePair in org.apache.avro.mapred

Best Java code snippets using org.apache.avro.mapred.Pair (Showing top 20 results out of 315)

Refine search

How to use
Pair
in
org.apache.avro.mapred