@Test public void testSplitAtFraction() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); File file = createFileWithData("file", createStringDataset(3, 100)); Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath()); TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null); // Shouldn't be able to split while unstarted. assertSplitAtFractionFails(source, 0, 0.7, options); assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options); assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options); assertSplitAtFractionFails(source, 0, 0.0, options); assertSplitAtFractionFails(source, 70, 0.3, options); assertSplitAtFractionFails(source, 100, 1.0, options); assertSplitAtFractionFails(source, 100, 0.99, options); assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options); }
int items = SourceTestUtils.readFromSource(subSource, null).size(); SourceTestUtils.assertSplitAtFractionFails(subSource, 0, 0.0, null); SourceTestUtils.assertSplitAtFractionFails(subSource, 0, 0.7, null); SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, 1, 0.7, null); SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent( SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent( subSource, DEFAULT_RECORD_COUNT / 10, 0.1, null); SourceTestUtils.assertSplitAtFractionFails( subSource, DEFAULT_RECORD_COUNT / 10 + 1, 0.1, null); SourceTestUtils.assertSplitAtFractionFails(subSource, DEFAULT_RECORD_COUNT / 3, 0.3, null); SourceTestUtils.assertSplitAtFractionFails(subSource, items, 0.9, null); SourceTestUtils.assertSplitAtFractionFails(subSource, items, 1.0, null); SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(subSource, items, 0.999, null);
private void testSplitAtFractionP(long splitPointFrequency) throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); testSourceOptions.splitPointFrequencyRecords = splitPointFrequency; SyntheticBoundedSource source = new SyntheticBoundedSource(testSourceOptions); SourceTestUtils.assertSplitAtFractionExhaustive(source, options); // Can't split if already consumed. SourceTestUtils.assertSplitAtFractionFails(source, 5, 0.3, options); SourceTestUtils.assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.3, options); }
/** Unit tests of splitAtFraction. */ @Test public void testReadingSplitAtFraction() throws Exception { final String table = "TEST-SPLIT-AT-FRACTION"; final int numRows = 10; final int numSamples = 1; final long bytesPerRow = 1L; makeTableData(table, numRows); service.setupSampleRowKeys(table, numSamples, bytesPerRow); BigtableSource source = new BigtableSource( config.withTableId(ValueProvider.StaticValueProvider.of(table)), null, Arrays.asList(service.getTableRange(table)), null); // With 0 items read, all split requests will fail. assertSplitAtFractionFails(source, 0, 0.1, null /* options */); assertSplitAtFractionFails(source, 0, 1.0, null /* options */); // With 1 items read, all split requests past 1/10th will succeed. assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.333, null /* options */); assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.666, null /* options */); // With 3 items read, all split requests past 3/10ths will succeed. assertSplitAtFractionFails(source, 3, 0.2, null /* options */); assertSplitAtFractionSucceedsAndConsistent(source, 3, 0.571, null /* options */); assertSplitAtFractionSucceedsAndConsistent(source, 3, 0.9, null /* options */); // With 6 items read, all split requests past 6/10ths will succeed. assertSplitAtFractionFails(source, 6, 0.5, null /* options */); assertSplitAtFractionSucceedsAndConsistent(source, 6, 0.7, null /* options */); }
int numItems = readEverythingFromReader(splitSource.createReader(null)).size(); assertSplitAtFractionFails(splitSource, 0, 0.7, options); assertSplitAtFractionSucceedsAndConsistent(splitSource, 1, 0.7, options); assertSplitAtFractionSucceedsAndConsistent(splitSource, 15, 0.7, options); assertSplitAtFractionFails(splitSource, 0, 0.0, options); assertSplitAtFractionFails(splitSource, 20, 0.3, options); assertSplitAtFractionFails(splitSource, numItems, 1.0, options); assertSplitAtFractionFails(splitSource, numItems, 0.9, options);