private static void setInputFormatClass(Configuration conf) { if (Hadoop.isHadoop1()) { conf.set("mapreduce.inputformat.class", DatasetKeyInputFormat.class.getName()); } else { // build a job with an empty conf Job fakeJob = Hadoop.Job.newInstance.invoke(new Configuration(false)); fakeJob.setInputFormatClass(DatasetKeyInputFormat.class); // then copy any created entries into the real conf for (Map.Entry<String, String> entry : fakeJob.getConfiguration()) { conf.set(entry.getKey(), entry.getValue()); } } }
private static void setInputFormatClass(Configuration conf) { if (Hadoop.isHadoop1()) { conf.set("mapreduce.inputformat.class", DatasetKeyInputFormat.class.getName()); } else { // build a job with an empty conf Job fakeJob = Hadoop.Job.newInstance.invoke(new Configuration(false)); fakeJob.setInputFormatClass(DatasetKeyInputFormat.class); // then copy any created entries into the real conf for (Map.Entry<String, String> entry : fakeJob.getConfiguration()) { conf.set(entry.getKey(), entry.getValue()); } } }
private static void setConfigProperties(Configuration conf, Format format, Schema schema, Class<?> type) { GenericData model = DataModelUtil.getDataModelForType(type); if (Formats.AVRO.equals(format)) { setModel.invoke(conf, model.getClass()); conf.set(AVRO_SCHEMA_INPUT_KEY, schema.toString()); } else if (Formats.PARQUET.equals(format)) { // TODO: update to a version of Parquet with setAvroDataSupplier //AvroReadSupport.setAvroDataSupplier(conf, // DataModelUtil.supplierClassFor(model)); AvroReadSupport.setAvroReadSchema(conf, schema); } }
@Override public void initialize() { Preconditions.checkState(ReaderWriterState.NEW.equals(state), "A reader may not be opened more than once - current state:%s", state); try { FileInputFormat format = InputFormatUtil.newInputFormatInstance(descriptor); Job job = Hadoop.Job.newInstance.invoke(conf); FileInputFormat.addInputPath(job, path); // attempt to minimize the number of InputSplits FileStatus stat = fs.getFileStatus(path); FileInputFormat.setMaxInputSplitSize(job, stat.getLen()); this.splits = format.getSplits(job).iterator(); this.shouldAdvance = true; this.state = ReaderWriterState.OPEN; } catch (RuntimeException e) { this.state = ReaderWriterState.ERROR; throw new DatasetOperationException("Cannot calculate splits", e); } catch (IOException e) { this.state = ReaderWriterState.ERROR; throw new DatasetIOException("Cannot calculate splits", e); } }
private static TypeInfo primitiveTypeInfo(String type) { return primitiveTypeForName.invoke(type); }
public static TypeInfo parseTypeInfo(String type) { return parseTypeInfo.invoke(type); }
public static TypeInfo parseTypeInfo(String type) { return parseTypeInfo.invoke(type); }
private static TypeInfo primitiveTypeInfo(String type) { return primitiveTypeForName.invoke(type); }