/** * Creates the input splits to be forwarded to the downstream tasks of the * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before * being forwarded and only splits belonging to files in the {@code eligibleFiles} * list will be processed. * @param eligibleFiles The files to process. */ private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime( Map<Path, FileStatus> eligibleFiles) throws IOException { Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>(); if (eligibleFiles.isEmpty()) { return splitsByModTime; } for (FileInputSplit split: format.createInputSplits(readerParallelism)) { FileStatus fileStatus = eligibleFiles.get(split.getPath()); if (fileStatus != null) { Long modTime = fileStatus.getModificationTime(); List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime); if (splitsToForward == null) { splitsToForward = new ArrayList<>(); splitsByModTime.put(modTime, splitsToForward); } splitsToForward.add(new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())); } } return splitsByModTime; }
@Override public String toString() { return "[" + getSplitNumber() + "] " + getPath() + " mod@ " + modificationTime + " : " + getStart() + " + " + getLength(); } }
/** * Sets the state of the split to {@code null}. */ public void resetSplitState() { this.setSplitState(null); }
@Override public int compareTo(TimestampedFileInputSplit o) { int modTimeComp = Long.compare(this.modificationTime, o.modificationTime); if (modTimeComp != 0L) { return modTimeComp; } // the file input split does not prevent null paths. if (this.getPath() == null && o.getPath() != null) { return 1; } else if (this.getPath() != null && o.getPath() == null) { return -1; } int pathComp = this.getPath() == o.getPath() ? 0 : this.getPath().compareTo(o.getPath()); return pathComp != 0 ? pathComp : this.getSplitNumber() - o.getSplitNumber(); }
@Test public void testSplitComparison() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit richForthSplit = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); TimestampedFileInputSplit richFifthSplit = new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null); // smaller mod time Assert.assertTrue(richFirstSplit.compareTo(richSecondSplit) < 0); // lexicographically on the path Assert.assertTrue(richThirdSplit.compareTo(richFifthSplit) < 0); // same mod time, same file so smaller split number first Assert.assertTrue(richThirdSplit.compareTo(richSecondSplit) < 0); // smaller modification time first Assert.assertTrue(richThirdSplit.compareTo(richForthSplit) < 0); }
private TimestampedFileInputSplit createTimestampedFileSplit(FileInputSplit split, long modificationTime, Serializable state) { TimestampedFileInputSplit timestampedSplit = new TimestampedFileInputSplit( modificationTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()); if (state != null) { timestampedSplit.setSplitState(state); } return timestampedSplit; } }
@Test public void testSplitEquality() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null); TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null); Assert.assertEquals(richFirstSplit, richSecondSplit); TimestampedFileInputSplit richModSecondSplit = new TimestampedFileInputSplit(11, 2, new Path("test"), 0, 100, null); Assert.assertNotEquals(richSecondSplit, richModSecondSplit); TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null); Assert.assertEquals(richThirdSplit.getModificationTime(), 10); Assert.assertNotEquals(richFirstSplit, richThirdSplit); TimestampedFileInputSplit richThirdSplitCopy = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null); Assert.assertEquals(richThirdSplitCopy, richThirdSplit); }
@Override public int compareTo(TimestampedFileInputSplit o) { int modTimeComp = Long.compare(this.modificationTime, o.modificationTime); if (modTimeComp != 0L) { return modTimeComp; } // the file input split does not prevent null paths. if (this.getPath() == null && o.getPath() != null) { return 1; } else if (this.getPath() != null && o.getPath() == null) { return -1; } int pathComp = this.getPath() == o.getPath() ? 0 : this.getPath().compareTo(o.getPath()); return pathComp != 0 ? pathComp : this.getSplitNumber() - o.getSplitNumber(); }
@Override public String toString() { return "[" + getSplitNumber() + "] " + getPath() + " mod@ " + modificationTime + " : " + getStart() + " + " + getLength(); } }
@Test public void testIllegalArgument() { try { new TimestampedFileInputSplit(-10, 2, new Path("test"), 0, 100, null); // invalid modification time } catch (Exception e) { if (!(e instanceof IllegalArgumentException)) { Assert.fail(e.getMessage()); } } }
@Override public int compareTo(TimestampedFileInputSplit o) { int modTimeComp = Long.compare(this.modificationTime, o.modificationTime); if (modTimeComp != 0L) { return modTimeComp; } // the file input split does not prevent null paths. if (this.getPath() == null && o.getPath() != null) { return 1; } else if (this.getPath() != null && o.getPath() == null) { return -1; } int pathComp = this.getPath() == o.getPath() ? 0 : this.getPath().compareTo(o.getPath()); return pathComp != 0 ? pathComp : this.getSplitNumber() - o.getSplitNumber(); }
/** * Sets the state of the split to {@code null}. */ public void resetSplitState() { this.setSplitState(null); }
@Override public String toString() { return "[" + getSplitNumber() + "] " + getPath() + " mod@ " + modificationTime + " : " + getStart() + " + " + getLength(); } }
@Test public void testPriorityQ() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null); new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null);
@Override public int compareTo(TimestampedFileInputSplit o) { int modTimeComp = Long.compare(this.modificationTime, o.modificationTime); if (modTimeComp != 0L) { return modTimeComp; } // the file input split does not prevent null paths. if (this.getPath() == null && o.getPath() != null) { return 1; } else if (this.getPath() != null && o.getPath() == null) { return -1; } int pathComp = this.getPath() == o.getPath() ? 0 : this.getPath().compareTo(o.getPath()); return pathComp != 0 ? pathComp : this.getSplitNumber() - o.getSplitNumber(); }
/** * Sets the state of the split to {@code null}. */ public void resetSplitState() { this.setSplitState(null); }
@Override public String toString() { return "[" + getSplitNumber() + "] " + getPath() + " mod@ " + modificationTime + " : " + getStart() + " + " + getLength(); } }
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) { Preconditions.checkNotNull(split); return new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()); }
/** * Sets the state of the split to {@code null}. */ public void resetSplitState() { this.setSplitState(null); }
/** * Creates the input splits to be forwarded to the downstream tasks of the * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before * being forwarded and only splits belonging to files in the {@code eligibleFiles} * list will be processed. * @param eligibleFiles The files to process. */ private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime( Map<Path, FileStatus> eligibleFiles) throws IOException { Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>(); if (eligibleFiles.isEmpty()) { return splitsByModTime; } for (FileInputSplit split: format.createInputSplits(readerParallelism)) { FileStatus fileStatus = eligibleFiles.get(split.getPath()); if (fileStatus != null) { Long modTime = fileStatus.getModificationTime(); List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime); if (splitsToForward == null) { splitsToForward = new ArrayList<>(); splitsByModTime.put(modTime, splitsToForward); } splitsToForward.add(new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())); } } return splitsByModTime; }