private static Job getNamedJob(JobContext context, String namedOutput) throws IOException { // The following trick leverages the instantiation of a record writer via // the job thus supporting arbitrary output formats. Job job = Job.getInstance(context.getConfiguration()); job.setOutputFormatClass(getNamedOutputFormatClass(context, namedOutput)); job.setOutputKeyClass(getNamedOutputKeyClass(context, namedOutput)); job.setOutputValueClass(getNamedOutputValueClass(context, namedOutput)); Configuration conf = job.getConfiguration(); Map<String, String> namedConfigurations = ConfigurationUtil.getNamedConfigurations(context.getConfiguration(), computePrefixName(namedOutput)); LOG.trace("Setting config for named output {}: {}", namedOutput, namedConfigurations); ConfigurationUtil.setAll(namedConfigurations, conf); return job; }
private static Context createContext(Configuration conf) throws IOException { // Create context needs to happen only when running in as a MapReduce job. // In other cases, ContextManager will be initialized using saveContext method. CConfiguration cConf = ConfigurationUtil.get(conf, Constants.Explore.CCONF_KEY, CConfCodec.INSTANCE); Configuration hConf = ConfigurationUtil.get(conf, Constants.Explore.HCONF_KEY, HConfCodec.INSTANCE); Injector injector = createInjector(cConf, hConf); ZKClientService zkClientService = injector.getInstance(ZKClientService.class); zkClientService.startAndWait(); DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class); SystemDatasetInstantiatorFactory datasetInstantiatorFactory = injector.getInstance(SystemDatasetInstantiatorFactory.class); return new Context(datasetFramework, zkClientService, datasetInstantiatorFactory); }
private Map<String, String> createDatasetConfiguration(String namespace, String datasetName, Map<String, String> datasetArgs) { Configuration hConf = new Configuration(); hConf.clear(); AbstractBatchWritableOutputFormat.setDataset(hConf, namespace, datasetName, datasetArgs); return ConfigurationUtil.toMap(hConf); }
@Test public void testNamedConfigurations() throws IOException { org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); Map<String, String> name1Config = ImmutableMap.of("key1", "value1", "key2", "value2"); Map<String, String> name2Config = ImmutableMap.of("name2key", "name2value"); Map<String, String> nameDotConfig = ImmutableMap.of("name3key", "name3value"); Map<String, String> emptyConfig = ImmutableMap.of(); ConfigurationUtil.setNamedConfigurations(conf, "name1", name1Config); ConfigurationUtil.setNamedConfigurations(conf, "name2", name2Config); ConfigurationUtil.setNamedConfigurations(conf, "name.", nameDotConfig); ConfigurationUtil.setNamedConfigurations(conf, "emptyConfig", emptyConfig); Assert.assertEquals(name1Config, ConfigurationUtil.getNamedConfigurations(conf, "name1")); Assert.assertEquals(name2Config, ConfigurationUtil.getNamedConfigurations(conf, "name2")); Assert.assertEquals(nameDotConfig, ConfigurationUtil.getNamedConfigurations(conf, "name.")); Assert.assertEquals(emptyConfig, ConfigurationUtil.getNamedConfigurations(conf, "emptyConfig")); }
/** * Adds a named output for the job. * * @param job job to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only (alphanumeric) * @param outputFormatClass name of the OutputFormat class. * @param keyClass key class * @param valueClass value class * @param outputConfigs configurations for the output */ @SuppressWarnings("unchecked") public static void addNamedOutput(Job job, String namedOutput, String outputFormatClass, Class<?> keyClass, Class<?> valueClass, Map<String, String> outputConfigs) { assertValidName(namedOutput); checkNamedOutputName(namedOutput, getNamedOutputsList(job), false); Configuration conf = job.getConfiguration(); conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput); conf.set(MO_PREFIX + namedOutput + FORMAT, outputFormatClass); conf.setClass(MO_PREFIX + namedOutput + KEY, keyClass, Object.class); conf.setClass(MO_PREFIX + namedOutput + VALUE, valueClass, Object.class); ConfigurationUtil.setNamedConfigurations(conf, computePrefixName(namedOutput), outputConfigs); }
ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);
@Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { MultiInputTaggedSplit taggedInputSplit = (MultiInputTaggedSplit) split; ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration()); InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils.newInstance( taggedInputSplit.getInputFormatClass(), context.getConfiguration()); InputSplit inputSplit = taggedInputSplit.getInputSplit(); // we can't simply compute the underlying RecordReader and return it, because we need to override its // initialize method in order to initialize the underlying RecordReader with the underlying InputSplit // Find the InputFormat and then the RecordReader from the MultiInputTaggedSplit. return new DelegatingRecordReader<>(inputFormat.createRecordReader(inputSplit, context)); } }
/** * Adds a named output for the job. * * @param job job to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only (alphanumeric) * @param outputFormatClass name of the OutputFormat class. * @param keyClass key class * @param valueClass value class * @param outputConfigs configurations for the output */ @SuppressWarnings("unchecked") public static void addNamedOutput(Job job, String namedOutput, String outputFormatClass, Class<?> keyClass, Class<?> valueClass, Map<String, String> outputConfigs) { assertValidName(namedOutput); checkNamedOutputName(namedOutput, getNamedOutputsList(job), false); Configuration conf = job.getConfiguration(); conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput); conf.set(MO_PREFIX + namedOutput + FORMAT, outputFormatClass); conf.setClass(MO_PREFIX + namedOutput + KEY, keyClass, Object.class); conf.setClass(MO_PREFIX + namedOutput + VALUE, valueClass, Object.class); ConfigurationUtil.setNamedConfigurations(conf, computePrefixName(namedOutput), outputConfigs); }
ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(), PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);
@Override public RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { MultiInputTaggedSplit taggedInputSplit = (MultiInputTaggedSplit) split; ConfigurationUtil.setAll((taggedInputSplit).getInputConfigs(), context.getConfiguration()); InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils.newInstance( taggedInputSplit.getInputFormatClass(), context.getConfiguration()); InputSplit inputSplit = taggedInputSplit.getInputSplit(); // we can't simply compute the underlying RecordReader and return it, because we need to override its // initialize method in order to initialize the underlying RecordReader with the underlying InputSplit // Find the InputFormat and then the RecordReader from the MultiInputTaggedSplit. return new DelegatingRecordReader<>(inputFormat.createRecordReader(inputSplit, context)); } }
private static Job getNamedJob(JobContext context, String namedOutput) throws IOException { // The following trick leverages the instantiation of a record writer via // the job thus supporting arbitrary output formats. Job job = Job.getInstance(context.getConfiguration()); job.setOutputFormatClass(getNamedOutputFormatClass(context, namedOutput)); job.setOutputKeyClass(getNamedOutputKeyClass(context, namedOutput)); job.setOutputValueClass(getNamedOutputValueClass(context, namedOutput)); Configuration conf = job.getConfiguration(); Map<String, String> namedConfigurations = ConfigurationUtil.getNamedConfigurations(context.getConfiguration(), computePrefixName(namedOutput)); LOG.trace("Setting config for named output {}: {}", namedOutput, namedConfigurations); ConfigurationUtil.setAll(namedConfigurations, conf); return job; }
private boolean writesEnabled() { try { CConfiguration cConf = ConfigurationUtil.get(getConf(), Constants.Explore.CCONF_KEY, CConfCodec.INSTANCE); return cConf.getBoolean(Constants.Explore.WRITES_ENABLED); } catch (IOException e) { LOG.error("Unable to get CDAP Configuration to check if writes are enabled.", e); throw Throwables.propagate(e); } } }
private Map<String, String> createDatasetConfiguration(String namespace, String datasetName, Map<String, String> datasetArgs) { Configuration hConf = new Configuration(); hConf.clear(); AbstractBatchWritableOutputFormat.setDataset(hConf, namespace, datasetName, datasetArgs); return ConfigurationUtil.toMap(hConf); }
@Override @SuppressWarnings("unchecked") public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { List<InputSplit> splits = new ArrayList<>(); Map<String, MultipleInputs.MapperInput> mapperInputMap = MultipleInputs.getInputMap(job.getConfiguration()); for (Map.Entry<String, MultipleInputs.MapperInput> mapperInputEntry : mapperInputMap.entrySet()) { String inputName = mapperInputEntry.getKey(); MultipleInputs.MapperInput mapperInput = mapperInputEntry.getValue(); String mapperClassName = mapperInput.getMapperClassName(); Job jobCopy = new Job(job.getConfiguration()); Configuration confCopy = jobCopy.getConfiguration(); // set configuration specific for this input onto the jobCopy ConfigurationUtil.setAll(mapperInput.getInputFormatConfiguration(), confCopy); Class<?> inputFormatClass = confCopy.getClassByNameOrNull(mapperInput.getInputFormatClassName()); Preconditions.checkNotNull(inputFormatClass, "Class could not be found: %s", mapperInput.getInputFormatClassName()); InputFormat<K, V> inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, confCopy); //some input format need a jobId to getSplits jobCopy.setJobID(new JobID(inputName, inputName.hashCode())); // Get splits for each input path and tag with InputFormat // and Mapper types by wrapping in a MultiInputTaggedSplit. List<InputSplit> formatSplits = inputFormat.getSplits(jobCopy); for (InputSplit split : formatSplits) { splits.add(new MultiInputTaggedSplit(split, confCopy, inputName, mapperInput.getInputFormatConfiguration(), inputFormat.getClass(), mapperClassName)); } } return splits; }
private static Context createContext(Configuration conf) throws IOException { // Create context needs to happen only when running in as a MapReduce job. // In other cases, ContextManager will be initialized using saveContext method. CConfiguration cConf = ConfigurationUtil.get(conf, Constants.Explore.CCONF_KEY, CConfCodec.INSTANCE); Configuration hConf = ConfigurationUtil.get(conf, Constants.Explore.HCONF_KEY, HConfCodec.INSTANCE); Injector injector = createInjector(cConf, hConf); ZKClientService zkClientService = injector.getInstance(ZKClientService.class); zkClientService.startAndWait(); DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class); StreamAdmin streamAdmin = injector.getInstance(StreamAdmin.class); SystemDatasetInstantiatorFactory datasetInstantiatorFactory = injector.getInstance(SystemDatasetInstantiatorFactory.class); AuthenticationContext authenticationContext = injector.getInstance(AuthenticationContext.class); AuthorizationEnforcer authorizationEnforcer = injector.getInstance(AuthorizationEnforcer.class); return new Context(datasetFramework, streamAdmin, zkClientService, datasetInstantiatorFactory, authenticationContext, authorizationEnforcer); }
private Map<String, String> createBatchReadableConfiguration() { List<Split> splits = this.splits; if (splits == null) { splits = ((BatchReadable<?, ?>) dataset).getSplits(); } Configuration hConf = new Configuration(); hConf.clear(); try { AbstractBatchReadableInputFormat.setDatasetSplits(hConf, datasetNamespace, datasetName, datasetArgs, splits); return ConfigurationUtil.toMap(hConf); } catch (IOException e) { throw new IllegalArgumentException(e); } } }
@Override @SuppressWarnings("unchecked") public List<InputSplit> getSplits(JobContext job) throws IOException, InterruptedException { List<InputSplit> splits = new ArrayList<>(); Map<String, MultipleInputs.MapperInput> mapperInputMap = MultipleInputs.getInputMap(job.getConfiguration()); for (Map.Entry<String, MultipleInputs.MapperInput> mapperInputEntry : mapperInputMap.entrySet()) { String inputName = mapperInputEntry.getKey(); MultipleInputs.MapperInput mapperInput = mapperInputEntry.getValue(); String mapperClassName = mapperInput.getMapperClassName(); Job jobCopy = new Job(job.getConfiguration()); Configuration confCopy = jobCopy.getConfiguration(); // set configuration specific for this input onto the jobCopy ConfigurationUtil.setAll(mapperInput.getInputFormatConfiguration(), confCopy); Class<?> inputFormatClass = confCopy.getClassByNameOrNull(mapperInput.getInputFormatClassName()); Preconditions.checkNotNull(inputFormatClass, "Class could not be found: %s", mapperInput.getInputFormatClassName()); InputFormat<K, V> inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, confCopy); //some input format need a jobId to getSplits jobCopy.setJobID(new JobID(inputName, inputName.hashCode())); // Get splits for each input path and tag with InputFormat // and Mapper types by wrapping in a MultiInputTaggedSplit. List<InputSplit> formatSplits = inputFormat.getSplits(jobCopy); for (InputSplit split : formatSplits) { splits.add(new MultiInputTaggedSplit(split, confCopy, inputName, mapperInput.getInputFormatConfiguration(), inputFormat.getClass(), mapperClassName)); } } return splits; }
private boolean writesEnabled() { try { CConfiguration cConf = ConfigurationUtil.get(getConf(), Constants.Explore.CCONF_KEY, CConfCodec.INSTANCE); return cConf.getBoolean(Constants.Explore.WRITES_ENABLED); } catch (IOException e) { LOG.error("Unable to get CDAP Configuration to check if writes are enabled.", e); throw Throwables.propagate(e); } } }