@SuppressWarnings({"unchecked", "deprecation"}) public RecordWriter<Object, Object> getRecordWriter(FileSystem fs,JobConf job, String baseFileName, Progressable arg3) throws IOException { String nameOutput = job.get(CONFIG_NAMED_OUTPUT, null); String fileName = getUniqueName(job, baseFileName); Schema schema = null; String schemastr = job.get(MO_PREFIX+nameOutput+".schema",null); if (schemastr!=null) schema = Schema.parse(schemastr); JobConf outputConf = new JobConf(job); outputConf.setOutputFormat(getNamedOutputFormatClass(job, nameOutput)); boolean isMapOnly = job.getNumReduceTasks() == 0; if (schema != null) { if (isMapOnly) AvroJob.setMapOutputSchema(outputConf, schema); else AvroJob.setOutputSchema(outputConf, schema); } OutputFormat outputFormat = outputConf.getOutputFormat(); return outputFormat.getRecordWriter(fs, outputConf, fileName, arg3); } }
@Test public void testNonAvroReducer() throws Exception { JobConf job = new JobConf(); Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath()); outputPath.getFileSystem(job).delete(outputPath); // configure input for Avro from sequence file AvroJob.setInputSequenceFile(job); AvroJob.setInputSchema(job, SCHEMA); FileInputFormat.setInputPaths(job, file().toURI().toString()); // mapper is default, identity // use a hadoop reducer that consumes Avro input AvroJob.setMapOutputSchema(job, SCHEMA); job.setReducerClass(NonAvroReducer.class); // configure outputPath for non-Avro SequenceFile job.setOutputFormat(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); // output key/value classes are default, LongWritable/Text JobClient.runJob(job); checkFile(new SequenceFileReader<> (new File(outputPath.toString() + "/part-00000"))); }
@Test public void testJob() throws Exception { JobConf job = new JobConf(); Path inputPath1 = new Path(INPUT_DIR_1.getRoot().getPath()); Path inputPath2 = new Path(INPUT_DIR_2.getRoot().getPath()); Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath()); outputPath.getFileSystem(job).delete(outputPath); writeNamesFiles(new File(inputPath1.toUri().getPath())); writeBalancesFiles(new File(inputPath2.toUri().getPath())); job.setJobName("multiple-inputs-join"); AvroMultipleInputs.addInputPath(job, inputPath1, NamesMapImpl.class, ReflectData.get().getSchema(NamesRecord.class)); AvroMultipleInputs.addInputPath(job, inputPath2, BalancesMapImpl.class, ReflectData.get().getSchema(BalancesRecord.class)); Schema keySchema = ReflectData.get().getSchema(KeyRecord.class); Schema valueSchema = ReflectData.get().getSchema(JoinableRecord.class); AvroJob.setMapOutputSchema(job, Pair.getPairSchema(keySchema, valueSchema)); AvroJob.setOutputSchema(job, ReflectData.get().getSchema(CompleteRecord.class)); AvroJob.setReducerClass(job, ReduceImpl.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, outputPath); AvroJob.setReflect(job); JobClient.runJob(job); validateCompleteFile(new File(OUTPUT_DIR.getRoot(), "part-00000.avro")); }
@Test @SuppressWarnings("deprecation") public void testJob() throws Exception { JobConf job = new JobConf(); String dir = "target/testReflectJob"; Path inputPath = new Path(dir + "/in"); Path outputPath = new Path(dir + "/out"); outputPath.getFileSystem(job).delete(outputPath); inputPath.getFileSystem(job).delete(inputPath); writeLinesFile(new File(dir+"/in")); job.setJobName("reflect"); AvroJob.setInputSchema(job, ReflectData.get().getSchema(Text.class)); AvroJob.setMapOutputSchema (job, new Pair(new Text(""), new Count(0L)).getSchema()); AvroJob.setOutputSchema(job, ReflectData.get().getSchema(WordCount.class)); AvroJob.setMapperClass(job, MapImpl.class); //AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); AvroJob.setReflect(job); // use reflection JobClient.runJob(job); validateCountsFile(new File(new File(dir, "out"), "part-00000.avro")); }
AvroJob.setMapOutputSchema (job, Pair.getPairSchema(Weather.SCHEMA$, Schema.create(Type.NULL))); AvroJob.setOutputSchema(job, Weather.SCHEMA$);
@SuppressWarnings({"unchecked", "deprecation"}) public RecordWriter<Object, Object> getRecordWriter(FileSystem fs,JobConf job, String baseFileName, Progressable arg3) throws IOException { String nameOutput = job.get(CONFIG_NAMED_OUTPUT, null); String fileName = getUniqueName(job, baseFileName); Schema schema = null; String schemastr = job.get(MO_PREFIX+nameOutput+".schema",null); if (schemastr!=null) schema = Schema.parse(schemastr); JobConf outputConf = new JobConf(job); outputConf.setOutputFormat(getNamedOutputFormatClass(job, nameOutput)); boolean isMapOnly = job.getNumReduceTasks() == 0; if (schema != null) { if (isMapOnly) AvroJob.setMapOutputSchema(outputConf, schema); else AvroJob.setOutputSchema(outputConf, schema); } OutputFormat outputFormat = outputConf.getOutputFormat(); return outputFormat.getRecordWriter(fs, outputConf, fileName, arg3); } }
outputSchema.setFields(outputFields); AvroJob.setOutputSchema(conf, outputSchema); AvroJob.setMapOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), inputSchema)); AvroJob.setMapperClass(conf, mapperClass);
/** * Creates a JobConf for a map-reduce job. Loads the input schema from the input files. * * @param mapperClass AvroMapper subclass for the mapper. * @param reducerClass AvroReducer subclass for the reducer. * @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair * @param outputSchema Reducer output schema * @return A configured JobConf. * @throws IOException * @throws URISyntaxException */ public JobConf createJobConf(Class<? extends AvroMapper> mapperClass, Class<? extends AvroReducer> reducerClass, Schema mapperOutputSchema, Schema outputSchema) throws IOException, URISyntaxException { JobConf conf = createJobConf(); AvroJob.setMapperClass(conf, mapperClass); AvroJob.setReducerClass(conf, reducerClass); AvroJob.setMapOutputSchema(conf, mapperOutputSchema); AvroJob.setOutputSchema(conf, outputSchema); return conf; }
outputSchema.setFields(outputFields); AvroJob.setOutputSchema(conf, outputSchema); AvroJob.setMapOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.FLOAT), outputSchema)); AvroJob.setMapperClass(conf, mapperClass);
public static void main(String... args) throws Exception { JobConf job = new JobConf(); job.setJarByClass(SmallFilesMapReduce.class); Path input = new Path(args[0]); Path output = new Path(args[1]); output.getFileSystem(job).delete(output, true); AvroJob.setInputSchema(job, Stock.SCHEMA$); //<co id="ch03_avro_mr_comment1"/> AvroJob.setMapOutputSchema(job, Pair.getPairSchema(Stock.SCHEMA$, Schema.create(Schema.Type.NULL))); AvroJob.setOutputSchema(job, Stock.SCHEMA$); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); AvroJob.setMapperClass(job, Mapper.class); //<co id="ch03_smallfilemr_comment2"/> AvroJob.setReducerClass(job, Reducer.class); FileOutputFormat.setCompressOutput(job, true); AvroJob.setOutputCodec(job, SNAPPY_CODEC); JobClient.runJob(job); }
/** * Creates a JobConf for a map-reducer job with an explicitly set input schema. * * @param mapperClass AvroMapper subclass for the mapper. * @param reducerClass AvroReducer subclass for the reducer. * @param inputSchema Schema of the input data. * @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair * @param outputSchema Reducer output schema * @return A configured JobConf. * @throws IOException * @throws URISyntaxException */ public JobConf createJobConf(Class<? extends AvroMapper> mapperClass, Class<? extends AvroReducer> reducerClass, Schema inputSchema, Schema mapperOutputSchema, Schema outputSchema) throws IOException, URISyntaxException { JobConf conf = createJobConf(); AvroJob.setMapperClass(conf, mapperClass); AvroJob.setReducerClass(conf, reducerClass); AvroJob.setInputSchema(conf, inputSchema); AvroJob.setMapOutputSchema(conf, mapperOutputSchema); AvroJob.setOutputSchema(conf, outputSchema); return conf; }
AvroJob.setCombinerClass(conf, combinerClass); AvroJob.setMapOutputSchema(conf, mapperOutputSchema); AvroJob.setOutputSchema(conf, outputSchema);
AvroJob.setMapOutputSchema(conf, mapperOutputSchema); AvroJob.setOutputSchema(conf, outputSchema);