public List<InputSplit> getSplits(List<InputSplit> splits, Configuration conf) throws IOException { // update splits to align with CRAM container boundaries List<InputSplit> newSplits = new ArrayList<InputSplit>(); Map<Path, List<Long>> fileToOffsets = new HashMap<Path, List<Long>>(); for (InputSplit split : splits) { FileSplit fileSplit = (FileSplit) split; Path path = fileSplit.getPath(); List<Long> containerOffsets = fileToOffsets.get(path); if (containerOffsets == null) { containerOffsets = getContainerOffsets(conf, path); fileToOffsets.put(path, containerOffsets); } long newStart = nextContainerOffset(containerOffsets, fileSplit.getStart()); long newEnd = nextContainerOffset(containerOffsets, fileSplit.getStart() + fileSplit.getLength()); long newLength = newEnd - newStart; if (newLength == 0) { // split is wholly within a container continue; } FileSplit newSplit = new FileSplit(fileSplit.getPath(), newStart, newLength, fileSplit.getLocations()); newSplits.add(newSplit); } return newSplits; }
public List<InputSplit> getSplits(List<InputSplit> splits, Configuration conf) throws IOException { // update splits to align with CRAM container boundaries List<InputSplit> newSplits = new ArrayList<InputSplit>(); Map<Path, List<Long>> fileToOffsets = new HashMap<Path, List<Long>>(); for (InputSplit split : splits) { FileSplit fileSplit = (FileSplit) split; Path path = fileSplit.getPath(); List<Long> containerOffsets = fileToOffsets.get(path); if (containerOffsets == null) { containerOffsets = getContainerOffsets(conf, path); fileToOffsets.put(path, containerOffsets); } long newStart = nextContainerOffset(containerOffsets, fileSplit.getStart()); long newEnd = nextContainerOffset(containerOffsets, fileSplit.getStart() + fileSplit.getLength()); long newLength = newEnd - newStart; if (newLength == 0) { // split is wholly within a container continue; } FileSplit newSplit = new FileSplit(fileSplit.getPath(), newStart, newLength, fileSplit.getLocations()); newSplits.add(newSplit); } return newSplits; }
public List<InputSplit> getSplits(List<InputSplit> splits, Configuration conf) throws IOException { // update splits to align with CRAM container boundaries List<InputSplit> newSplits = new ArrayList<InputSplit>(); Map<Path, List<Long>> fileToOffsets = new HashMap<Path, List<Long>>(); for (InputSplit split : splits) { FileSplit fileSplit = (FileSplit) split; Path path = fileSplit.getPath(); List<Long> containerOffsets = fileToOffsets.get(path); if (containerOffsets == null) { containerOffsets = getContainerOffsets(conf, path); fileToOffsets.put(path, containerOffsets); } long newStart = nextContainerOffset(containerOffsets, fileSplit.getStart()); long newEnd = nextContainerOffset(containerOffsets, fileSplit.getStart() + fileSplit.getLength()); long newLength = newEnd - newStart; if (newLength == 0) { // split is wholly within a container continue; } FileSplit newSplit = new FileSplit(fileSplit.getPath(), newStart, newLength, fileSplit.getLocations()); newSplits.add(newSplit); } return newSplits; }