/** * Gets all inputs that will be processed. This includes both old and new data. * Must call {@link #createPlan()} first. * * @return inputs to process */ public List<DatePath> getInputsToProcess() { return getPlan()._inputsToProcess; }
/** * Gets only the new data that will be processed. New data is data that falls within the * desired date range. * Must call {@link #createPlan()} first. * * @return new inputs to process */ public List<DatePath> getNewInputsToProcess() { return getPlan()._newInputsToProcess; }
/** * Gets the previous output to reuse, or null if no output is being reused. * Must call {@link #createPlan()} first. * * @return previous output to reuse, or null */ public DatePath getPreviousOutputToProcess() { return getPlan()._previousOutputToProcess; }
/** * Gets only the old data that will be processed. Old data is data that falls before the * desired date range. It will be subtracted out from the previous output. * Must call {@link #createPlan()} first. * * @return old inputs to process */ public List<DatePath> getOldInputsToProcess() { return getPlan()._oldInputsToProcess; }
/** * Gets whether another pass will be required. Because there may be a limit on the number of inputs processed * in a single run, multiple runs may be required to process all data in the desired date range. * Must call {@link #createPlan()} first. * * @return true if another pass is required */ public boolean getNeedsAnotherPass() { return getPlan()._needAnotherPass; }
/** * Gets a map from input path to schema. Because multiple inputs are allowed, there may be multiple schemas. * Must call {@link #createPlan()} first. * * @return map from path to input schema */ public Map<String,Schema> getInputSchemasByPath() { return getPlan()._inputSchemasByPath; }
/** * Gets the input schemas. Because multiple inputs are allowed, there may be multiple schemas. * Must call {@link #createPlan()} first. * * <p> * This does not include the output schema, even though previous output may be fed back as input. * The reason is that the ouput schema it determined based on the input schema. * </p> * * @return input schemas */ public List<Schema> getInputSchemas() { return getPlan()._inputSchemas; }
public DateRange getCurrentDateRange() { checkPlanExists(); return getPlan()._currentDateRange; }
/** * Get the number of reducers to use based on the input and previous output data size. * Must call {@link #createPlan()} first. * * @return number of reducers to use */ public int getNumReducers() { checkPlanExists(); return getPlan()._numReducers; }