@Override public WindowedValue<T> readCurrent() { if (finished) { throw new IllegalStateException("Bounded reader read all elements"); } final T elem = reader.getCurrent(); try { finished = !reader.advance(); } catch (final IOException e) { e.printStackTrace(); throw new RuntimeException(e); } return WindowedValue.valueInGlobalWindow(elem); }
.createReader(opts)) { if (reader.start()) { return reader.getCurrent().getKey();
@ProcessElement public void processElement(ProcessContext c) throws Exception { TableSchema schema = BigQueryHelpers.fromJsonString( c.sideInput(schemaView), TableSchema.class); String jobUuid = c.sideInput(jobIdTokenView); BigQuerySourceBase<T> source = createSource(jobUuid, coder); List<BoundedSource<T>> sources = source.createSources( ImmutableList.of( FileSystems.matchNewResource( c.element(), false /* is directory */)), schema); checkArgument(sources.size() == 1, "Expected exactly one source."); BoundedSource<T> avroSource = sources.get(0); BoundedSource.BoundedReader<T> reader = avroSource.createReader(c.getPipelineOptions()); for (boolean more = reader.start(); more; more = reader.advance()) { c.output(reader.getCurrent()); } } })
assertEquals(true, start); if (start) { elements.add(reader.getCurrent()); boolean advance = reader.advance(); assertEquals(true, advance); while (advance) { elements.add(reader.getCurrent()); advance = reader.advance(); assertEquals(
assertEquals(true, start); if (start) { elements.add(reader.getCurrent()); boolean advance = reader.advance(); assertEquals(true, advance); while (advance) { elements.add(reader.getCurrent()); advance = reader.advance(); assertEquals(
@Override public void processElement(WindowedValue<BoundedSourceShard<OutputT>> element) throws Exception { BoundedSource<OutputT> source = element.getValue().getSource(); try (final BoundedReader<OutputT> reader = source.createReader(options)) { boolean contentsRemaining = reader.start(); Future<BoundedSource<OutputT>> residualFuture = startDynamicSplitThread(source, reader); UncommittedBundle<OutputT> output = evaluationContext.createBundle(outputPCollection); while (contentsRemaining) { output.add( WindowedValue.timestampedValueInGlobalWindow( reader.getCurrent(), reader.getCurrentTimestamp())); contentsRemaining = reader.advance(); } resultBuilder.addOutput(output); try { BoundedSource<OutputT> residual = residualFuture.get(); if (residual != null) { resultBuilder.addUnprocessedElements( element.withValue(BoundedSourceShard.of(residual))); } } catch (ExecutionException exex) { // Un-and-rewrap the exception thrown by attempting to split throw UserCodeException.wrap(exex.getCause()); } } }
@Test public void testUnsplittable() throws IOException { String baseName = "test-input"; File compressedFile = tmpFolder.newFile(baseName + ".gz"); byte[] input = generateInput(10000); writeFile(compressedFile, input, CompressionMode.GZIP); CompressedSource<Byte> source = CompressedSource.from(new ByteSource(compressedFile.getPath(), 1)); List<Byte> expected = Lists.newArrayList(); for (byte i : input) { expected.add(i); } PipelineOptions options = PipelineOptionsFactory.create(); BoundedReader<Byte> reader = source.createReader(options); List<Byte> actual = Lists.newArrayList(); for (boolean hasNext = reader.start(); hasNext; hasNext = reader.advance()) { actual.add(reader.getCurrent()); // checkpoint every 9 elements if (actual.size() % 9 == 0) { Double fractionConsumed = reader.getFractionConsumed(); assertNotNull(fractionConsumed); assertNull(reader.splitAtFraction(fractionConsumed)); } } assertEquals(expected.size(), actual.size()); assertEquals(Sets.newHashSet(expected), Sets.newHashSet(actual)); }
@ProcessElement public void processElement(ProcessContext c) { KV<String, String> kv = c.element(); Ddl ddl = c.sideInput(ddlView); String tableName = kv.getKey(); Table table = ddl.table(tableName); SerializableFunction<GenericRecord, Mutation> parseFn = new AvroRecordConverter(table); AvroSource<Mutation> source = AvroSource.from(kv.getValue()) .withParseFn(parseFn, SerializableCoder.of(Mutation.class)); try { BoundedSource.BoundedReader<Mutation> reader = source.createReader(c.getPipelineOptions()); for (boolean more = reader.start(); more; more = reader.advance()) { c.output(reader.getCurrent()); } } catch (IOException e) { throw new RuntimeException(e); } } })
private void verifyReadContents( byte[] expected, File inputFile, @Nullable DecompressingChannelFactory decompressionFactory) throws IOException { CompressedSource<Byte> source = CompressedSource.from(new ByteSource(inputFile.toPath().toString(), 1)); if (decompressionFactory != null) { source = source.withDecompression(decompressionFactory); } List<KV<Long, Byte>> actualOutput = Lists.newArrayList(); try (BoundedReader<Byte> reader = source.createReader(PipelineOptionsFactory.create())) { for (boolean more = reader.start(); more; more = reader.advance()) { actualOutput.add(KV.of(reader.getCurrentTimestamp().getMillis(), reader.getCurrent())); } } List<KV<Long, Byte>> expectedOutput = Lists.newArrayList(); for (int i = 0; i < expected.length; i++) { expectedOutput.add(KV.of((long) i, expected[i])); } assertEquals(expectedOutput, actualOutput); }
/** * Emit the current element from the given Reader. The reader is guaranteed to have data. */ private void emitElement( SourceContext<WindowedValue<OutputT>> ctx, BoundedSource.BoundedReader<OutputT> reader) { // make sure that reader state update and element emission are atomic // with respect to snapshots synchronized (ctx.getCheckpointLock()) { OutputT item = reader.getCurrent(); Instant timestamp = reader.getCurrentTimestamp(); WindowedValue<OutputT> windowedValue = WindowedValue.of(item, timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING); ctx.collectWithTimestamp(windowedValue, timestamp.getMillis()); } }
@ProcessElement public void process(ProcessContext c) throws IOException { ReadableFile file = c.element().getKey(); OffsetRange range = c.element().getValue(); FileBasedSource<T> source = CompressedSource.from(createSource.apply(file.getMetadata().resourceId().toString())) .withCompression(file.getCompression()); try (BoundedSource.BoundedReader<T> reader = source .createForSubrangeOfFile(file.getMetadata(), range.getFrom(), range.getTo()) .createReader(c.getPipelineOptions())) { for (boolean more = reader.start(); more; more = reader.advance()) { c.output(reader.getCurrent()); } } } }
@Override public WindowedValue<T> nextRecord(WindowedValue<T> t) throws IOException { if (inputAvailable) { final T current = reader.getCurrent(); final Instant timestamp = reader.getCurrentTimestamp(); // advance reader to have a record ready next time inputAvailable = readerInvoker.invokeAdvance(reader); return WindowedValue.of( current, timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING); } return null; }
@Override public T getCurrent() throws NoSuchElementException { return delegate.getCurrent(); }
@Override public T getCurrent() throws NoSuchElementException { return delegate.getCurrent(); }
@Override public WindowedValue<T> nextRecord(WindowedValue<T> t) throws IOException { if (inputAvailable) { final T current = reader.getCurrent(); final Instant timestamp = reader.getCurrentTimestamp(); // advance reader to have a record ready next time inputAvailable = readerInvoker.invokeAdvance(reader); return WindowedValue.of(current, timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING); } return null; }
@Override public T getCurrent() throws NoSuchElementException { return boundedReader.getCurrent(); }
@ProcessElement public void readSoruce(ProcessContext ctxt) throws IOException { BoundedSource.BoundedReader<T> reader = ctxt.element().createReader(ctxt.getPipelineOptions()); for (boolean more = reader.start(); more; more = reader.advance()) { ctxt.outputWithTimestamp(reader.getCurrent(), reader.getCurrentTimestamp()); } } }
@Override public WindowedValue<T> nextRecord(WindowedValue<T> t) throws IOException { if (inputAvailable) { final T current = reader.getCurrent(); final Instant timestamp = reader.getCurrentTimestamp(); // advance reader to have a record ready next time inputAvailable = readerInvoker.invokeAdvance(reader); return WindowedValue.of(current, timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING); } return null; }