/** * Returns an Input defined by a dataset. * @param datasetName the name of the input dataset * @param arguments the arguments to use when instantiating the dataset * @param splits the data selection splits. If null, will use the splits defined by the dataset. If the dataset * type is not {@link BatchReadable}, splits will be ignored */ public static Input ofDataset(String datasetName, Map<String, String> arguments, @Nullable Iterable<? extends Split> splits) { return new DatasetInput(datasetName, arguments, splits); }
private DatasetInput(String name, Map<String, String> arguments, @Nullable Iterable<? extends Split> splits) { super(name); this.arguments = Collections.unmodifiableMap(new HashMap<>(arguments)); this.splits = copySplits(splits); }
public void addInput(String stageName, Input input) { if (input instanceof Input.DatasetInput) { // Note if input format provider is trackable then it comes in as DatasetInput Input.DatasetInput datasetInput = (Input.DatasetInput) input; addInput(stageName, datasetInput.getName(), datasetInput.getAlias(), datasetInput.getArguments(), datasetInput.getSplits()); } else if (input instanceof Input.InputFormatProviderInput) { Input.InputFormatProviderInput ifpInput = (Input.InputFormatProviderInput) input; addInput(stageName, ifpInput.getAlias(), new BasicInputFormatProvider(ifpInput.getInputFormatProvider().getInputFormatClassName(), ifpInput.getInputFormatProvider().getInputFormatConfiguration())); } else if (input instanceof Input.StreamInput) { Input.StreamInput streamInput = (Input.StreamInput) input; addInput(stageName, streamInput.getAlias(), streamInput); } }
private Input.InputFormatProviderInput createInput(Input.DatasetInput datasetInput) { String datasetName = datasetInput.getName(); Map<String, String> datasetArgs = datasetInput.getArguments(); // keep track of the original alias to set it on the created Input before returning it String originalAlias = datasetInput.getAlias(); Dataset dataset; if (datasetInput.getNamespace() == null) { dataset = getDataset(datasetName, datasetArgs, AccessType.READ); } else { dataset = getDataset(datasetInput.getNamespace(), datasetName, datasetArgs, AccessType.READ); } DatasetInputFormatProvider datasetInputFormatProvider = new DatasetInputFormatProvider(datasetInput.getNamespace(), datasetName, datasetArgs, dataset, datasetInput.getSplits(), MapReduceBatchReadableInputFormat.class); return (Input.InputFormatProviderInput) Input.of(datasetName, datasetInputFormatProvider).alias(originalAlias); }
private Input.InputFormatProviderInput createInput(Input.DatasetInput datasetInput) { String datasetName = datasetInput.getName(); Map<String, String> datasetArgs = datasetInput.getArguments(); // keep track of the original alias to set it on the created Input before returning it String originalAlias = datasetInput.getAlias(); Dataset dataset; if (datasetInput.getNamespace() == null) { dataset = getDataset(datasetName, datasetArgs, AccessType.READ); } else { dataset = getDataset(datasetInput.getNamespace(), datasetName, datasetArgs, AccessType.READ); } DatasetInputFormatProvider datasetInputFormatProvider = new DatasetInputFormatProvider(datasetInput.getNamespace(), datasetName, datasetArgs, dataset, datasetInput.getSplits(), MapReduceBatchReadableInputFormat.class); return (Input.InputFormatProviderInput) Input.of(datasetName, datasetInputFormatProvider).alias(originalAlias); }
@Override public DatasetInput fromNamespace(String namespace) { return new DatasetInput(super.name, arguments, splits, namespace); } }