@Override protected Source.Reader<T> createReader(PipelineOptions options) throws IOException { return source.createReader(options); } }
/** {@inheritDoc} */ @Override public BoundedReader<T> createReader(PipelineOptions options) throws IOException { return delegate.createReader(options); }
@Override public void prepare() { try { reader = boundedSource.createReader(null); finished = !reader.start(); } catch (final Exception e) { throw new RuntimeException(e); } }
/** Reads all elements from the given {@link BoundedSource}. */ public static <T> List<T> readFromSource(BoundedSource<T> source, PipelineOptions options) throws IOException { try (BoundedSource.BoundedReader<T> reader = source.createReader(options)) { return readFromUnstartedReader(reader); } }
private BoundedSource.BoundedReader<T> createReader(SourcePartition<T> partition) { try { return ((BoundedSource<T>) partition.source).createReader(options.get()); } catch (IOException e) { throw new RuntimeException("Failed to create reader from a BoundedSource.", e); } }
@Override public BoundedReader<T> createReader(PipelineOptions options) throws IOException { return new UnsplittableReader<>(boundedSource, boundedSource.createReader(options)); }
@Override public void open(SourceInputSplit<T> sourceInputSplit) throws IOException { FlinkMetricContainer metricContainer = new FlinkMetricContainer(getRuntimeContext()); readerInvoker = new ReaderInvocationUtil<>(stepName, serializedOptions.get(), metricContainer); reader = ((BoundedSource<T>) sourceInputSplit.getSource()).createReader(options); inputAvailable = readerInvoker.invokeStart(reader); }
@Override public void open(SourceInputSplit<T> sourceInputSplit) throws IOException { FlinkMetricContainer metricContainer = new FlinkMetricContainer(getRuntimeContext()); readerInvoker = new ReaderInvocationUtil<>(stepName, serializedOptions.get(), metricContainer); reader = ((BoundedSource<T>) sourceInputSplit.getSource()).createReader(options); inputAvailable = readerInvoker.invokeStart(reader); }
@Override public void open(SourceInputSplit<T> sourceInputSplit) throws IOException { FlinkMetricContainer metricContainer = new FlinkMetricContainer(getRuntimeContext()); readerInvoker = new ReaderInvocationUtil<>( stepName, serializedOptions.get(), metricContainer); reader = ((BoundedSource<T>) sourceInputSplit.getSource()).createReader(options); inputAvailable = readerInvoker.invokeStart(reader); }
@ProcessElement public void readSoruce(ProcessContext ctxt) throws IOException { BoundedSource.BoundedReader<T> reader = ctxt.element().createReader(ctxt.getPipelineOptions()); for (boolean more = reader.start(); more; more = reader.advance()) { ctxt.outputWithTimestamp(reader.getCurrent(), reader.getCurrentTimestamp()); } } }
@Test public void testReadXMLIncorrectRecordElement() throws IOException { File file = tempFolder.newFile("trainXMLSmall"); Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8)); BoundedSource<Train> source = XmlIO.<Train>read() .from(file.toPath().toString()) .withRootElement("trains") .withRecordElement("something") .withRecordClass(Train.class) .createSource(); assertEquals(readEverythingFromReader(source.createReader(null)), new ArrayList<Train>()); }
@Test public void testReadXMLIncorrectRootElement() throws IOException { File file = tempFolder.newFile("trainXMLSmall"); Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8)); BoundedSource<Train> source = XmlIO.<Train>read() .from(file.toPath().toString()) .withRootElement("something") .withRecordElement("train") .withRecordClass(Train.class) .createSource(); exception.expectMessage("Unexpected close tag </trains>; expected </something>."); readEverythingFromReader(source.createReader(null)); }
@Test public void testSplitWithEmptyBundles() throws Exception { String fileName = "temp.xml"; List<Train> trains = generateRandomTrainList(10); File file = createRandomTrainXML(fileName, trains); BoundedSource<Train> source = XmlIO.<Train>read() .from(file.toPath().toString()) .withRootElement("trains") .withRecordElement("train") .withRecordClass(Train.class) .withMinBundleSize(10) .createSource(); List<? extends BoundedSource<Train>> splits = source.split(100, null); assertTrue(splits.size() > 2); List<Train> results = new ArrayList<>(); for (BoundedSource<Train> split : splits) { results.addAll(readEverythingFromReader(split.createReader(null))); } assertThat(trainsToStrings(trains), containsInAnyOrder(trainsToStrings(results).toArray())); }
@Test public void testReadXMLWithMultiByteChars() throws IOException { File file = tempFolder.newFile("trainXMLTiny"); Files.write(file.toPath(), xmlWithMultiByteChars.getBytes(StandardCharsets.UTF_8)); BoundedSource<Train> source = XmlIO.<Train>read() .from(file.toPath().toString()) .withRootElement("trains") .withRecordElement("train") .withRecordClass(Train.class) .withMinBundleSize(1024) .createSource(); List<Train> expectedResults = ImmutableList.of( new Train("Thomas¥", Train.TRAIN_NUMBER_UNDEFINED, null, null), new Train("Hen¶ry", Train.TRAIN_NUMBER_UNDEFINED, null, null), new Train("Jamßes", Train.TRAIN_NUMBER_UNDEFINED, null, null)); assertThat( trainsToStrings(expectedResults), containsInAnyOrder( trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray())); }
@Test public void testReadXMLLarge() throws IOException { String fileName = "temp.xml"; List<Train> trains = generateRandomTrainList(100); File file = createRandomTrainXML(fileName, trains); BoundedSource<Train> source = XmlIO.<Train>read() .from(file.toPath().toString()) .withRootElement("trains") .withRecordElement("train") .withRecordClass(Train.class) .withMinBundleSize(1024) .createSource(); assertThat( trainsToStrings(trains), containsInAnyOrder( trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray())); }
@Test public void testReadXMLWithWhitespaces() throws IOException { File file = tempFolder.newFile("trainXMLSmall"); Files.write(file.toPath(), trainXMLWithSpaces.getBytes(StandardCharsets.UTF_8)); BoundedSource<Train> source = XmlIO.<Train>read() .from(file.toPath().toString()) .withRootElement("trains") .withRecordElement("train") .withRecordClass(Train.class) .withMinBundleSize(1024) .createSource(); List<Train> expectedResults = ImmutableList.of( new Train("Thomas ", 1, "blue", null), new Train("Henry", 3, "green", null), new Train("Toby", 7, " brown ", null), new Train("Gordon", 4, "blue", null), new Train("Emily", -1, "red", null), new Train("Percy", 6, "green", null)); assertThat( trainsToStrings(expectedResults), containsInAnyOrder( trainsToStrings(readEverythingFromReader(source.createReader(null))).toArray())); }
@Test public void testToUnsplittableSource() throws Exception { PipelineOptions options = PipelineOptionsFactory.create(); BoundedSource<Long> baseSource = CountingSource.upTo(100); BoundedSource<Long> unsplittableSource = SourceTestUtils.toUnsplittableSource(baseSource); List<?> splits = unsplittableSource.split(1, options); assertEquals(1, splits.size()); assertEquals(unsplittableSource, splits.get(0)); BoundedReader<Long> unsplittableReader = unsplittableSource.createReader(options); assertEquals(0, unsplittableReader.getFractionConsumed(), 1e-15); Set<Long> expected = Sets.newHashSet(SourceTestUtils.readFromSource(baseSource, options)); Set<Long> actual = Sets.newHashSet(); actual.addAll(SourceTestUtils.readNItemsFromUnstartedReader(unsplittableReader, 40)); assertNull(unsplittableReader.splitAtFraction(0.5)); actual.addAll(SourceTestUtils.readRemainingFromReader(unsplittableReader, true /* started */)); assertEquals(1, unsplittableReader.getFractionConsumed(), 1e-15); assertEquals(100, actual.size()); assertEquals(Sets.newHashSet(expected), Sets.newHashSet(actual)); } }
/** * This test verifies that the method {@link * HadoopInputFormatBoundedSource.HadoopInputFormatReader#getCurrentSource() getCurrentSource()} * returns correct source object. */ @Test public void testGetCurrentSourceFunction() throws Exception { SerializableSplit split = new SerializableSplit(); BoundedSource<KV<Text, Employee>> source = new HadoopInputFormatBoundedSource<>( serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. split); BoundedReader<KV<Text, Employee>> hifReader = source.createReader(p.getOptions()); BoundedSource<KV<Text, Employee>> hifSource = hifReader.getCurrentSource(); assertEquals(hifSource, source); }
/** * This test verifies that the method {@link * HadoopInputFormatBoundedSource.HadoopInputFormatReader#getCurrentSource() getCurrentSource()} * returns correct source object. */ @Test public void testGetCurrentSourceFunction() throws Exception { SerializableSplit split = new SerializableSplit(); BoundedSource<KV<Text, Employee>> source = new HadoopInputFormatBoundedSource<>( serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. split); BoundedReader<KV<Text, Employee>> hifReader = source.createReader(p.getOptions()); BoundedSource<KV<Text, Employee>> hifSource = hifReader.getCurrentSource(); assertEquals(hifSource, source); }
@Test public void testProgress() throws IOException { final int numRecords = 5; @SuppressWarnings("deprecation") // testing CountingSource BoundedSource<Long> source = CountingSource.upTo(numRecords); try (BoundedReader<Long> reader = source.createReader(PipelineOptionsFactory.create())) { // Check preconditions before starting. Note that CountingReader can always give an accurate // remaining parallelism. assertEquals(0.0, reader.getFractionConsumed(), 1e-6); assertEquals(0, reader.getSplitPointsConsumed()); assertEquals(numRecords, reader.getSplitPointsRemaining()); assertTrue(reader.start()); int i = 0; do { assertEquals(i, reader.getSplitPointsConsumed()); assertEquals(numRecords - i, reader.getSplitPointsRemaining()); ++i; } while (reader.advance()); assertEquals(numRecords, i); // exactly numRecords calls to advance() assertEquals(1.0, reader.getFractionConsumed(), 1e-6); assertEquals(numRecords, reader.getSplitPointsConsumed()); assertEquals(0, reader.getSplitPointsRemaining()); } }