@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { return inputFormat.getSplits(context); }
protected int getMapInputSplitCount() throws ClassNotFoundException, JobException, IOException, InterruptedException { if (job == null) { throw new JobException("Job is null"); } InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); return input.getSplits(job).size(); }
public static double getTotalMapInputMB(Job job) throws ClassNotFoundException, IOException, InterruptedException, JobException { if (job == null) { throw new JobException("Job is null"); } long mapInputBytes = 0; InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); for (InputSplit split : input.getSplits(job)) { mapInputBytes += split.getLength(); } // 0 input bytes is possible when the segment range hits no partition on a partitioned hive table (KYLIN-2470) if (mapInputBytes == 0) { logger.warn("Map input splits are 0 bytes, something is wrong?"); } double totalMapInputMB = (double) mapInputBytes / 1024 / 1024; return totalMapInputMB; }
@Override public HadoopInputSplit[] createInputSplits(int minNumSplits) throws IOException { configuration.setInt("mapreduce.input.fileinputformat.split.minsize", minNumSplits); JobContext jobContext = new JobContextImpl(configuration, new JobID()); jobContext.getCredentials().addAll(this.credentials); Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser()); if (currentUserCreds != null) { jobContext.getCredentials().addAll(currentUserCreds); } List<org.apache.hadoop.mapreduce.InputSplit> splits; try { splits = this.mapreduceInputFormat.getSplits(jobContext); } catch (InterruptedException e) { throw new IOException("Could not get Splits.", e); } HadoopInputSplit[] hadoopInputSplits = new HadoopInputSplit[splits.size()]; for (int i = 0; i < hadoopInputSplits.length; i++) { hadoopInputSplits[i] = new HadoopInputSplit(i, splits.get(i), jobContext); } return hadoopInputSplits; }
List<InputSplit> splits = format.getSplits(ctx);
@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { final Configuration configuration = jobContext.getConfiguration(); return ReflectionUtils.newInstance(configuration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class, InputFormat.class), configuration).getSplits(jobContext); }
protected int getMapInputSplitCount() throws ClassNotFoundException, JobException, IOException, InterruptedException { if (job == null) { throw new JobException("Job is null"); } InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); return input.getSplits(job).size(); }
protected double getTotalMapInputMB() throws ClassNotFoundException, IOException, InterruptedException, JobException { if (job == null) { throw new JobException("Job is null"); } long mapInputBytes = 0; InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); for (InputSplit split : input.getSplits(job)) { mapInputBytes += split.getLength(); } if (mapInputBytes == 0) { throw new IllegalArgumentException("Map input splits are 0 bytes, something is wrong!"); } double totalMapInputMB = (double) mapInputBytes / 1024 / 1024; return totalMapInputMB; }
public Object[] getSample(InputFormat inf, Job job) throws IOException, InterruptedException { long counter = 0; List<InputSplit> splits = inf.getSplits(job); ArrayList<K> samples = new ArrayList<K>(numSamples); int splitsToSample = Math.min(maxSplitsSampled, splits.size());
public HadoopElementIterator(final HadoopGraph graph) { try { this.graph = graph; final Configuration configuration = ConfUtil.makeHadoopConfiguration(this.graph.configuration()); final InputFormat<NullWritable, VertexWritable> inputFormat = ConfUtil.getReaderAsInputFormat(configuration); if (inputFormat instanceof FileInputFormat) { final Storage storage = FileSystemStorage.open(configuration); if (!this.graph.configuration().containsKey(Constants.GREMLIN_HADOOP_INPUT_LOCATION)) return; // there is no input location and thus, no data (empty graph) if (!Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).isPresent()) return; // there is no data at the input location (empty graph) configuration.set(Constants.MAPREDUCE_INPUT_FILEINPUTFORMAT_INPUTDIR, Constants.getSearchGraphLocation(this.graph.configuration().getInputLocation(), storage).get()); } final List<InputSplit> splits = inputFormat.getSplits(new JobContextImpl(configuration, new JobID(UUID.randomUUID().toString(), 1))); for (final InputSplit split : splits) { this.readers.add(inputFormat.createRecordReader(split, new TaskAttemptContextImpl(configuration, new TaskAttemptID()))); } } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } }
@Override @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR", justification="Delegate set by setConf") public List<InputSplit> getSplits(JobContext jobContext) throws IOException, InterruptedException { return delegate.getSplits(jobContext); }
@Override public List<InputSplit> getSplits(final JobContext jobContext) throws IOException, InterruptedException { final Configuration configuration = jobContext.getConfiguration(); return ReflectionUtils.newInstance(configuration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class, InputFormat.class), configuration).getSplits(jobContext); }
@SuppressWarnings("unchecked") @Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { instantiateWhenNeeded(); return instance.getSplits(context); }
protected int getMapInputSplitCount() throws ClassNotFoundException, JobException, IOException, InterruptedException { if (job == null) { throw new JobException("Job is null"); } InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); return input.getSplits(job).size(); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { List<InputSplit> originalSplits = wrappedInputFormat.getSplits(context); TezMapReduceSplitsGrouper grouper = new TezMapReduceSplitsGrouper(); String wrappedInputFormatName = wrappedInputFormat.getClass().getName(); return grouper .getGroupedSplits(conf, originalSplits, desiredNumSplits, wrappedInputFormatName, estimator, locationProvider); }
protected List<BoundedDataSource<Pair<K, V>>> doSplit(Job job) { return ExceptionUtils.unchecked(() -> newInputFormatClass() .getSplits(job) .stream() .map(split -> new HadoopSplit<>(this, split)) .collect(Collectors.toList())); }
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { return getInputFormat(HadoopCompat.getConfiguration(context)) .getSplits(context); }
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { return inf.getSplits( new JobContextImpl(getConf(context.getConfiguration()), context.getJobID())); }
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { return inf.getSplits( new JobContextImpl(getConf(context.getConfiguration()), context.getJobID())); }
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { return inf.getSplits( new JobContextImpl(getConf(context.getConfiguration()), context.getJobID())); }