@Override @Nullable public Double getFractionConsumed() { return boundedReader.getFractionConsumed(); }
/** * This test validates behavior of {@link * HadoopInputFormatBoundedSource.HadoopInputFormatReader#start() start()} method if InputFormat's * {@link InputFormat#getSplits(JobContext)} getSplits(JobContext)} returns InputSplitList having * zero records. */ @Test public void testReadersStartWhenZeroRecords() throws Exception { InputFormat mockInputFormat = Mockito.mock(EmployeeInputFormat.class); EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class); Mockito.when( mockInputFormat.createRecordReader( Mockito.any(InputSplit.class), Mockito.any(TaskAttemptContext.class))) .thenReturn(mockReader); Mockito.when(mockReader.nextKeyValue()).thenReturn(false); InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>( serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit(mockInputSplit)); boundedSource.setInputFormatObj(mockInputFormat); BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions()); assertEquals(false, reader.start()); assertEquals(Double.valueOf(1), reader.getFractionConsumed()); reader.close(); }
/** * This test validates behavior of {@link * HadoopInputFormatBoundedSource.HadoopInputFormatReader#start() start()} method if InputFormat's * {@link InputFormat#getSplits() getSplits()} returns InputSplitList having zero records. */ @Test public void testReadersStartWhenZeroRecords() throws Exception { InputFormat mockInputFormat = Mockito.mock(EmployeeInputFormat.class); EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class); Mockito.when( mockInputFormat.createRecordReader( Mockito.any(InputSplit.class), Mockito.any(TaskAttemptContext.class))) .thenReturn(mockReader); Mockito.when(mockReader.nextKeyValue()).thenReturn(false); InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>( serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit(mockInputSplit)); boundedSource.setInputFormatObj(mockInputFormat); BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions()); assertEquals(false, reader.start()); assertEquals(Double.valueOf(1), reader.getFractionConsumed()); reader.close(); }
float recordsRead = 0; assertEquals(Double.valueOf(0), reader.getFractionConsumed()); boolean start = reader.start(); assertEquals(true, start); reader.getFractionConsumed()); assertEquals(true, advance); while (advance) { assertEquals( Double.valueOf(++recordsRead / TestEmployeeDataSet.NUMBER_OF_RECORDS_IN_EACH_SPLIT), reader.getFractionConsumed()); assertEquals(Double.valueOf(1), reader.getFractionConsumed()); reader.close();
float recordsRead = 0; assertEquals(Double.valueOf(0), reader.getFractionConsumed()); boolean start = reader.start(); assertEquals(true, start); reader.getFractionConsumed()); assertEquals(true, advance); while (advance) { assertEquals( Double.valueOf(++recordsRead / TestEmployeeDataSet.NUMBER_OF_RECORDS_IN_EACH_SPLIT), reader.getFractionConsumed()); assertEquals(Double.valueOf(1), reader.getFractionConsumed()); reader.close();
boundedSource.setInputFormatObj(mockInputFormat); BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions()); assertEquals(Double.valueOf(0), reader.getFractionConsumed()); boolean start = reader.start(); assertEquals(true, start); if (start) { boolean advance = reader.advance(); assertEquals(null, reader.getFractionConsumed()); assertEquals(true, advance); if (advance) { advance = reader.advance(); assertEquals(null, reader.getFractionConsumed()); assertEquals(null, reader.getFractionConsumed()); reader.close();
source.createReader(PipelineOptionsFactory.create())) { assertEquals(0.0, readerOrig.getFractionConsumed(), 1e-6); assertEquals(0, readerOrig.getSplitPointsConsumed()); assertEquals( assertEquals(1.0, readerOrig.getFractionConsumed(), 1e-6); assertEquals(1, readerOrig.getSplitPointsConsumed()); assertEquals(0, readerOrig.getSplitPointsRemaining()); remainder.createReader(PipelineOptionsFactory.create())) { assertEquals(0.0, reader.getFractionConsumed(), 1e-6); assertEquals(0, reader.getSplitPointsConsumed()); assertEquals( assertEquals(1.0, reader.getFractionConsumed(), 1e-6); assertEquals(2, reader.getSplitPointsConsumed()); assertEquals(0, reader.getSplitPointsRemaining());
boundedSource.setInputFormatObj(mockInputFormat); BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions()); assertEquals(Double.valueOf(0), reader.getFractionConsumed()); boolean start = reader.start(); assertEquals(true, start); if (start) { boolean advance = reader.advance(); assertEquals(null, reader.getFractionConsumed()); assertEquals(true, advance); if (advance) { advance = reader.advance(); assertEquals(null, reader.getFractionConsumed()); assertEquals(null, reader.getFractionConsumed()); reader.close();
.createReader(PipelineOptionsFactory.create())) { assertEquals(0.0, reader.getFractionConsumed(), 1e-6); assertEquals(0, reader.getSplitPointsConsumed()); assertEquals( assertEquals(1.0, reader.getFractionConsumed(), 1e-6); assertEquals(3, reader.getSplitPointsConsumed()); assertEquals(0, reader.getSplitPointsRemaining());
@Test public void testUnsplittable() throws IOException { String baseName = "test-input"; File compressedFile = tmpFolder.newFile(baseName + ".gz"); byte[] input = generateInput(10000); writeFile(compressedFile, input, CompressionMode.GZIP); CompressedSource<Byte> source = CompressedSource.from(new ByteSource(compressedFile.getPath(), 1)); List<Byte> expected = Lists.newArrayList(); for (byte i : input) { expected.add(i); } PipelineOptions options = PipelineOptionsFactory.create(); BoundedReader<Byte> reader = source.createReader(options); List<Byte> actual = Lists.newArrayList(); for (boolean hasNext = reader.start(); hasNext; hasNext = reader.advance()) { actual.add(reader.getCurrent()); // checkpoint every 9 elements if (actual.size() % 9 == 0) { Double fractionConsumed = reader.getFractionConsumed(); assertNotNull(fractionConsumed); assertNull(reader.splitAtFraction(fractionConsumed)); } } assertEquals(expected.size(), actual.size()); assertEquals(Sets.newHashSet(expected), Sets.newHashSet(actual)); }
@Test public void testProgress() throws IOException { final int numRecords = 5; @SuppressWarnings("deprecation") // testing CountingSource BoundedSource<Long> source = CountingSource.upTo(numRecords); try (BoundedReader<Long> reader = source.createReader(PipelineOptionsFactory.create())) { // Check preconditions before starting. Note that CountingReader can always give an accurate // remaining parallelism. assertEquals(0.0, reader.getFractionConsumed(), 1e-6); assertEquals(0, reader.getSplitPointsConsumed()); assertEquals(numRecords, reader.getSplitPointsRemaining()); assertTrue(reader.start()); int i = 0; do { assertEquals(i, reader.getSplitPointsConsumed()); assertEquals(numRecords - i, reader.getSplitPointsRemaining()); ++i; } while (reader.advance()); assertEquals(numRecords, i); // exactly numRecords calls to advance() assertEquals(1.0, reader.getFractionConsumed(), 1e-6); assertEquals(numRecords, reader.getSplitPointsConsumed()); assertEquals(0, reader.getSplitPointsRemaining()); } }
@Test public void testFractionConsumedWhenReadingFilepattern() throws IOException { List<String> data1 = createStringDataset(3, 1000); File file1 = createFileWithData("file1", data1); List<String> data2 = createStringDataset(3, 1000); createFileWithData("file2", data2); List<String> data3 = createStringDataset(3, 1000); createFileWithData("file3", data3); TestFileBasedSource source = new TestFileBasedSource(file1.getParent() + "/" + "file*", 1024, null); try (BoundedSource.BoundedReader<String> reader = source.createReader(null)) { double lastFractionConsumed = 0.0; assertEquals(0.0, reader.getFractionConsumed(), 1e-6); assertTrue(reader.start()); assertTrue(reader.advance()); assertTrue(reader.advance()); // We're inside the first file. Should be in [0, 1/3). assertTrue(reader.getFractionConsumed() > 0.0); assertTrue(reader.getFractionConsumed() < 1.0 / 3.0); while (reader.advance()) { double fractionConsumed = reader.getFractionConsumed(); assertTrue(fractionConsumed > lastFractionConsumed); lastFractionConsumed = fractionConsumed; } assertEquals(1.0, reader.getFractionConsumed(), 1e-6); } }
@Test public void testToUnsplittableSource() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); BoundedSource<Long> baseSource = CountingSource.upTo(100); BoundedSource<Long> unsplittableSource = SourceTestUtils.toUnsplittableSource(baseSource); List<?> splits = unsplittableSource.split(1, options); assertEquals(1, splits.size()); assertEquals(unsplittableSource, splits.get(0)); BoundedReader<Long> unsplittableReader = unsplittableSource.createReader(options); assertEquals(0, unsplittableReader.getFractionConsumed(), 1e-15); Set<Long> expected = Sets.newHashSet(SourceTestUtils.readFromSource(baseSource, options)); Set<Long> actual = Sets.newHashSet(); actual.addAll(SourceTestUtils.readNItemsFromUnstartedReader(unsplittableReader, 40)); assertNull(unsplittableReader.splitAtFraction(0.5)); actual.addAll(SourceTestUtils.readRemainingFromReader(unsplittableReader, true /* started */)); assertEquals(1, unsplittableReader.getFractionConsumed(), 1e-15); assertEquals(100, actual.size()); assertEquals(Sets.newHashSet(expected), Sets.newHashSet(actual)); } }
@Test public void testGetProgressFromUnstartedReader() throws Exception { List<FixedRecord> records = createFixedRecords(DEFAULT_RECORD_COUNT); String filename = generateTestFile( "tmp.avro", records, SyncBehavior.SYNC_DEFAULT, 1000, AvroCoder.of(FixedRecord.class), DataFileConstants.NULL_CODEC); File file = new File(filename); AvroSource<FixedRecord> source = AvroSource.from(filename).withSchema(FixedRecord.class); try (BoundedSource.BoundedReader<FixedRecord> reader = source.createReader(null)) { assertEquals(Double.valueOf(0.0), reader.getFractionConsumed()); } List<? extends BoundedSource<FixedRecord>> splits = source.split(file.length() / 3, null); for (BoundedSource<FixedRecord> subSource : splits) { try (BoundedSource.BoundedReader<FixedRecord> reader = subSource.createReader(null)) { assertEquals(Double.valueOf(0.0), reader.getFractionConsumed()); } } }
@Test public void testIncreasingProgress() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); testSourceOptions.progressShape = SyntheticBoundedIO.ProgressShape.LINEAR; SyntheticBoundedSource source = new SyntheticBoundedSource(testSourceOptions); BoundedSource.BoundedReader<KV<byte[], byte[]>> reader = source.createReader(options); // Reader starts at 0.0 progress. assertEquals(0, reader.getFractionConsumed(), 1e-5); // Set the lastFractionConsumed < 0.0 so that we can use strict inequality in the below loop. double lastFractionConsumed = -1.0; for (boolean more = reader.start(); more; more = reader.advance()) { assertTrue(reader.getFractionConsumed() > lastFractionConsumed); lastFractionConsumed = reader.getFractionConsumed(); } assertEquals(1, reader.getFractionConsumed(), 1e-5); }
@Test public void testProgressEmptyFile() throws IOException { try (BoundedSource.BoundedReader<String> reader = prepareSource(new byte[0]).createReader(PipelineOptionsFactory.create())) { // Check preconditions before starting. assertEquals(0.0, reader.getFractionConsumed(), 1e-6); assertEquals(0, reader.getSplitPointsConsumed()); assertEquals( BoundedSource.BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining()); // Assert empty assertFalse(reader.start()); // Check postconditions after finishing assertEquals(1.0, reader.getFractionConsumed(), 1e-6); assertEquals(0, reader.getSplitPointsConsumed()); assertEquals(0, reader.getSplitPointsRemaining()); } }
@Override public BoundedSource<T> call() throws Exception { // Splits at halfway of the remaining work. Double currentlyConsumed = reader.getFractionConsumed(); if (currentlyConsumed == null || currentlyConsumed == 1.0) { return null; } double halfwayBetweenCurrentAndCompletion = 0.5 + (currentlyConsumed / 2); return reader.splitAtFraction(halfwayBetweenCurrentAndCompletion); } }
@Test public void testRegressingProgress() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); testSourceOptions.progressShape = SyntheticBoundedIO.ProgressShape.LINEAR_REGRESSING; SyntheticBoundedSource source = new SyntheticBoundedSource(testSourceOptions); BoundedSource.BoundedReader<KV<byte[], byte[]>> reader = source.createReader(options); double lastFractionConsumed = reader.getFractionConsumed(); for (boolean more = reader.start(); more; more = reader.advance()) { assertTrue(reader.getFractionConsumed() <= lastFractionConsumed); lastFractionConsumed = reader.getFractionConsumed(); } }