/** * Determines the number of reducers to use based on the input data size. * The number of reducers to use is based on the input data size and the * <em>num.reducers.bytes.per.reducer</em> property. See {@link ReduceEstimator} * for details on reducer estimation. * * @throws IOException */ private void determineNumReducers() throws IOException { ReduceEstimator estimator = new ReduceEstimator(getFileSystem(),getProps()); List<String> inputPaths = new ArrayList<String>(); for (DatePath input : getInputsToProcess()) { inputPaths.add(input.getPath().toString()); estimator.addInputPath("input",input.getPath()); } _numReducers = estimator.getNumReducers(); }
/** * Determines the number of reducers to use based on the input data size and the previous output, * if it exists and is being reused. * The number of reducers to use is based on the input data size and the * <em>num.reducers.bytes.per.reducer</em> property. This setting can be controlled more granularly * through <em>num.reducers.input.bytes.per.reducer</em> and <em>num.reducers.previous.bytes.per.reducer</em>. * See {@link ReduceEstimator} for details on reducer estimation. * * @throws IOException */ private void determineNumReducers() throws IOException { ReduceEstimator estimator = new ReduceEstimator(getFileSystem(),getProps()); List<String> inputPaths = new ArrayList<String>(); for (DatePath input : _inputsToProcess) { inputPaths.add(input.getPath().toString()); estimator.addInputPath("input",input.getPath()); } if (_previousOutputToProcess != null) { estimator.addInputPath("previous",_previousOutputToProcess.getPath()); } _numReducers = estimator.getNumReducers(); } }
Schema inputSchema = PathUtils.getSchemaFromPath(getFileSystem(),latestInput.getPath()); ReduceEstimator estimator = new ReduceEstimator(getFileSystem(),getProperties());