@Override public JsonElement serialize(InputFormatProvider src, Type typeOfSrc, JsonSerializationContext context) { JsonObject jsonObj = new JsonObject(); jsonObj.addProperty("inputFormatClass", src.getInputFormatClassName()); jsonObj.add("inputFormatConfig", context.serialize(src.getInputFormatConfiguration())); return jsonObj; } }
@Override public Map<String, String> getInputFormatConfiguration() { if (dataset instanceof InputFormatProvider) { if (splits != null) { LOG.warn("Ignoring user-specified splits for {} because it is of type InputFormatProvider", datasetName); } return ((InputFormatProvider) dataset).getInputFormatConfiguration(); } return createBatchReadableConfiguration(); }
@Override public String getInputFormatClassName() { return dataset instanceof InputFormatProvider ? ((InputFormatProvider) dataset).getInputFormatClassName() : batchReadableInputFormat.getName(); }
@Override public Map<String, String> getInputFormatConfiguration() { if (dataset instanceof InputFormatProvider) { if (splits != null) { LOG.warn("Ignoring user-specified splits for {} because it is of type InputFormatProvider", datasetName); } return ((InputFormatProvider) dataset).getInputFormatConfiguration(); } return createBatchReadableConfiguration(); }
@Override public String getInputFormatClassName() { return dataset instanceof InputFormatProvider ? ((InputFormatProvider) dataset).getInputFormatClassName() : batchReadableInputFormat.getName(); }
/** * Creates an instance of MapperInput with the given InputFormatProvider and specified Mapper class. */ public MapperInput(String alias, InputFormatProvider inputFormatProvider, @Nullable Class<? extends Mapper> mapper) { this.alias = alias; this.inputFormatProvider = inputFormatProvider; this.inputFormatClassName = inputFormatProvider.getInputFormatClassName(); this.inputFormatConfiguration = inputFormatProvider.getInputFormatConfiguration(); this.mapper = mapper; if (inputFormatClassName == null) { throw new IllegalArgumentException( "Input '" + alias + "' provided null as the input format"); } if (inputFormatConfiguration == null) { throw new IllegalArgumentException( "Input '" + alias + "' provided null as the input format configuration"); } }
@Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) { config.validate(); String inputFormatName = getInputFormatName(); InputFormatProvider inputFormatProvider = pipelineConfigurer.usePlugin("inputformat", inputFormatName, FORMAT_PLUGIN_ID, config.getProperties()); if (inputFormatProvider == null) { throw new IllegalArgumentException( String.format("Could not find the '%s' input format plugin. " + "Please ensure the '%s' format plugin is installed.", inputFormatName, inputFormatName)); } // get input format configuration to give the output format plugin a chance to validate it's config // and fail pipeline deployment if it is invalid inputFormatProvider.getInputFormatConfiguration(); if (!config.containsMacro("name") && !config.containsMacro("basePath")) { String tpfsName = config.getName(); pipelineConfigurer.createDataset(tpfsName, TimePartitionedFileSet.class.getName(), createProperties(inputFormatProvider)); } pipelineConfigurer.getStageConfigurer().setOutputSchema(config.getSchema()); }
/** * Creates an instance of MapperInput with the given InputFormatProvider and specified Mapper class. */ public MapperInput(String alias, InputFormatProvider inputFormatProvider, @Nullable Class<? extends Mapper> mapper) { this.alias = alias; this.inputFormatProvider = inputFormatProvider; this.inputFormatClassName = inputFormatProvider.getInputFormatClassName(); this.inputFormatConfiguration = inputFormatProvider.getInputFormatConfiguration(); this.mapper = mapper; if (inputFormatClassName == null) { throw new IllegalArgumentException( "Input '" + alias + "' provided null as the input format"); } if (inputFormatConfiguration == null) { throw new IllegalArgumentException( "Input '" + alias + "' provided null as the input format configuration"); } }
@Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) { String inputFormatName = getInputFormatName(); InputFormatProvider inputFormatProvider = pipelineConfigurer.usePlugin("inputformat", inputFormatName, FORMAT_PLUGIN_ID, config.getProperties()); if (inputFormatProvider == null) { throw new IllegalArgumentException( String.format("Could not find the '%s' input format plugin. " + "Please ensure the '%s' format plugin is installed.", inputFormatName, inputFormatName)); } // get input format configuration to give the output format plugin a chance to validate it's config // and fail pipeline deployment if it is invalid inputFormatProvider.getInputFormatConfiguration(); if (!config.containsMacro("name") && !config.containsMacro("basePath") && !config.containsMacro("fileProperties")) { pipelineConfigurer.createDataset(config.getName(), PartitionedFileSet.class, createProperties(inputFormatProvider)); } pipelineConfigurer.getStageConfigurer().setOutputSchema(config.getSchema()); }
private DatasetProperties createProperties(InputFormatProvider inputFormatProvider) { FileSetProperties.Builder properties = SnapshotFileSet.getBaseProperties(config); if (!Strings.isNullOrEmpty(config.getBasePath())) { properties.setBasePath(config.getBasePath()); } properties.setInputFormat(inputFormatProvider.getInputFormatClassName()); for (Map.Entry<String, String> formatProperty : inputFormatProvider.getInputFormatConfiguration().entrySet()) { properties.setInputProperty(formatProperty.getKey(), formatProperty.getValue()); } addFileProperties(properties); return properties.build(); }
private DatasetProperties createProperties(InputFormatProvider inputFormatProvider) { FileSetProperties.Builder properties = FileSetProperties.builder(); if (!Strings.isNullOrEmpty(config.getBasePath())) { properties.setBasePath(config.getBasePath()); } properties.setInputFormat(inputFormatProvider.getInputFormatClassName()); for (Map.Entry<String, String> formatProperty : inputFormatProvider.getInputFormatConfiguration().entrySet()) { properties.setInputProperty(formatProperty.getKey(), formatProperty.getValue()); } addFileSetProperties(properties); return properties.build(); }
Configuration hConf = new Configuration(); hConf.clear(); for (Map.Entry<String, String> entry : inputFormatProvider.getInputFormatConfiguration().entrySet()) { hConf.set(entry.getKey(), entry.getValue()); @SuppressWarnings("unchecked") Class<InputFormat> inputFormatClass = (Class<InputFormat>) classLoader.loadClass( inputFormatProvider.getInputFormatClassName()); return jsc.newAPIHadoopRDD(hConf, inputFormatClass, keyClass, valueClass); } catch (ClassNotFoundException e) {
FileInputFormat.addInputPath(job, path); FileInputFormat.setMaxInputSplitSize(job, config.getMaxSplitSize()); inputFormatClass = inputFormatProvider.getInputFormatClassName(); Configuration hConf = job.getConfiguration(); for (Map.Entry<String, String> propertyEntry : inputFormatProvider.getInputFormatConfiguration().entrySet()) { hConf.set(propertyEntry.getKey(), propertyEntry.getValue());
public void addInput(String stageName, Input input) { if (input instanceof Input.DatasetInput) { // Note if input format provider is trackable then it comes in as DatasetInput Input.DatasetInput datasetInput = (Input.DatasetInput) input; addInput(stageName, datasetInput.getName(), datasetInput.getAlias(), datasetInput.getArguments(), datasetInput.getSplits()); } else if (input instanceof Input.InputFormatProviderInput) { Input.InputFormatProviderInput ifpInput = (Input.InputFormatProviderInput) input; addInput(stageName, ifpInput.getAlias(), new BasicInputFormatProvider(ifpInput.getInputFormatProvider().getInputFormatClassName(), ifpInput.getInputFormatProvider().getInputFormatConfiguration())); } else if (input instanceof Input.StreamInput) { Input.StreamInput streamInput = (Input.StreamInput) input; addInput(stageName, streamInput.getAlias(), streamInput); } }