/** * Get the number of reducers to use based on the input and previous output data size. * Must call {@link #createPlan()} first. * * @return number of reducers to use */ public int getNumReducers() { checkPlanExists(); return getPlan()._numReducers; }
public void finalizePlan() throws IOException { determineInputSchemas(); determineNumReducers(); determineTotalBytes(); }
/** * Initializes the job with a job name and properties. * * @param name Job name * @param props Configuration properties */ public AbstractJob(String name, Properties props) { this(); setName(name); setProperties(props); }
/** * Creates a random temporary path within the file system. * * @return Random temporary path * @throws IOException IOException */ protected Path createRandomTempPath() throws IOException { return ensurePath(randomTempPath()); }
/** * Gets a map from input path to schema. Because multiple inputs are allowed, there may be multiple schemas. * Must call {@link #createPlan()} first. * * @return map from path to input schema */ public Map<String,Schema> getInputSchemasByPath() { checkPlanExists(); return _inputSchemasByPath; }
/** * Gets whether another pass will be required. Because there may be a limit on the number of inputs processed * in a single run, multiple runs may be required to process all data in the desired date range. * Must call {@link #createPlan()} first. * * @return true if another pass is required */ public boolean getNeedsAnotherPass() { return getPlan()._needAnotherPass; }
/** * Remove all temporary paths. * * @throws IOException */ private void cleanup() throws IOException { if (_garbage != null) { _garbage.clean(); } }
private Plan getPlan() { checkPlanExists(); return _plan; } }
public DateRange getCurrentDateRange() { checkPlanExists(); return getPlan()._currentDateRange; }
/** * Gets whether another pass will be required. Because there may be a limit on the number of inputs processed * in a single run, multiple runs may be required to process all data in the desired date range. * Must call {@link #createPlan()} first. * * @return true if another pass is required */ public boolean getNeedsAnotherPass() { checkPlanExists(); return _needAnotherPass; }
/** * Gets a map from input path to schema. Because multiple inputs are allowed, there may be multiple schemas. * Must call {@link #createPlan()} first. * * @return map from path to input schema */ public Map<String,Schema> getInputSchemasByPath() { return getPlan()._inputSchemasByPath; }
/** * Remove all temporary paths. * * @throws IOException */ private void cleanup() throws IOException { if (_garbage != null) { _garbage.clean(); } }
/** * Gets the inputs which are to be processed. * Must call {@link #createPlan()} first. * * @return inputs to process */ public List<DatePath> getInputsToProcess() { checkPlanExists(); return _inputsToProcess; }
/** * Gets the previous output to reuse, or null if no output is being reused. * Must call {@link #createPlan()} first. * * @return previous output to reuse, or null */ public DatePath getPreviousOutputToProcess() { return getPlan()._previousOutputToProcess; }
/** * Get the number of reducers to use based on the input data size. * Must call {@link #createPlan()} first. * * @return number of reducers to use */ public int getNumReducers() { checkPlanExists(); return _numReducers; }
/** * Gets only the old data that will be processed. Old data is data that falls before the * desired date range. It will be subtracted out from the previous output. * Must call {@link #createPlan()} first. * * @return old inputs to process */ public List<DatePath> getOldInputsToProcess() { return getPlan()._oldInputsToProcess; }
/** * Gets the input schemas. Because multiple inputs are allowed, there may be multiple schemas. * Must call {@link #createPlan()} first. * * @return input schemas */ public List<Schema> getInputSchemas() { checkPlanExists(); return _inputSchemas; }
/** * Gets only the new data that will be processed. New data is data that falls within the * desired date range. * Must call {@link #createPlan()} first. * * @return new inputs to process */ public List<DatePath> getNewInputsToProcess() { return getPlan()._newInputsToProcess; }