/** * Returns a new {@link WriteFiles} that will write to the current {@link FileBasedSink} using the * specified number of shards. * * <p>This option should be used sparingly as it can hurt performance. See {@link WriteFiles} for * more information. * * <p>A value less than or equal to 0 will be equivalent to the default behavior of * runner-determined sharding. */ public WriteFiles<UserT, DestinationT, OutputT> withNumShards(int numShards) { if (numShards > 0) { return withNumShards(StaticValueProvider.of(numShards)); } return withRunnerDeterminedSharding(); }
/** * Test that WriteFiles with a configured number of shards produces the desired number of shard * even when there are too few elements. */ @Test @Category(NeedsRunner.class) public void testExpandShardedWrite() throws IOException { runShardedWrite( Arrays.asList("one", "two", "three", "four", "five", "six"), IDENTITY_MAP, getBaseOutputFilename(), WriteFiles.to(makeSimpleSink()).withNumShards(20)); }
replacement.withNumShards(numShards)); } catch (Exception e) { throw new RuntimeException(e);
@Parameters(name = "{index}: {0}") public static Iterable<WriteFiles<Object, Void, Object>> data() { return ImmutableList.of( WriteFiles.to(new DummySink()), WriteFiles.to(new DummySink()).withWindowedWrites(), WriteFiles.to(new DummySink()).withNumShards(17), WriteFiles.to(new DummySink()).withWindowedWrites().withNumShards(42)); }
replacement.withNumShards(numShards)); } catch (Exception e) { throw new RuntimeException(e);
@Test public void testBuildWrite() { SimpleSink<Void> sink = makeSimpleSink(); WriteFiles<String, ?, String> write = WriteFiles.to(sink).withNumShards(3); assertThat((SimpleSink<Void>) write.getSink(), is(sink)); PTransform<PCollection<String>, PCollectionView<Integer>> originalSharding = write.getComputeNumShards(); assertThat(write.getComputeNumShards(), is(nullValue())); assertThat(write.getNumShardsProvider(), instanceOf(StaticValueProvider.class)); assertThat(write.getNumShardsProvider().get(), equalTo(3)); assertThat(write.getComputeNumShards(), equalTo(originalSharding)); WriteFiles<String, ?, ?> write2 = write.withSharding(SHARDING_TRANSFORM); assertThat((SimpleSink<Void>) write2.getSink(), is(sink)); assertThat(write2.getComputeNumShards(), equalTo(SHARDING_TRANSFORM)); // original unchanged WriteFiles<String, ?, ?> writeUnsharded = write2.withRunnerDeterminedSharding(); assertThat(writeUnsharded.getComputeNumShards(), nullValue()); assertThat(write.getComputeNumShards(), equalTo(originalSharding)); }
@Override public PDone expand(PCollection<byte[]> input) { checkState( getOutputPrefix() != null, "need to set the output prefix of a TFRecordIO.Write transform"); WriteFiles<byte[], Void, byte[]> write = WriteFiles.to( new TFRecordSink( getOutputPrefix(), getShardTemplate(), getFilenameSuffix(), getCompression())); if (getNumShards() > 0) { write = write.withNumShards(getNumShards()); } input.apply("Write", write); return PDone.in(input.getPipeline()); }
@Test @Category(NeedsRunner.class) public void testWriteSpilling() throws IOException { List<String> inputs = Lists.newArrayList(); for (int i = 0; i < 100; ++i) { inputs.add("mambo_number_" + i); } runWrite( inputs, Window.into(FixedWindows.of(Duration.millis(2))), getBaseOutputFilename(), WriteFiles.to(makeSimpleSink()) .withMaxNumWritersPerBundle(2) .withWindowedWrites() .withNumShards(1)); }
getWritableByteChannelFactory())); if (getNumShards() > 0) { write = write.withNumShards(getNumShards());
is(true)); WriteFiles<Integer, Void, Integer> withStaticSharding = write.withNumShards(3); assertThat( PTransformMatchers.writeWithRunnerDeterminedSharding()
WriteFiles<String, Integer, String> writeFiles = WriteFiles.to(sink).withNumShards(numShards);
@Test public void testShardedDisplayData() { DynamicDestinations<String, Void, String> dynamicDestinations = DynamicFileDestinations.constant( DefaultFilenamePolicy.fromParams( new Params() .withBaseFilename( getBaseOutputDirectory() .resolve("file", StandardResolveOptions.RESOLVE_FILE)) .withShardTemplate("-SS-of-NN"))); SimpleSink<Void> sink = new SimpleSink<Void>( getBaseOutputDirectory(), dynamicDestinations, Compression.UNCOMPRESSED) { @Override public void populateDisplayData(DisplayData.Builder builder) { builder.add(DisplayData.item("foo", "bar")); } }; WriteFiles<String, ?, String> write = WriteFiles.to(sink).withNumShards(1); DisplayData displayData = DisplayData.from(write); assertThat(displayData, hasDisplayItem("sink", sink.getClass())); assertThat(displayData, includesDisplayDataFor("sink", sink)); assertThat(displayData, hasDisplayItem("numShards", 1)); }
new AvroSink<>(tempDirectory, resolveDynamicDestinations(), getGenericRecords())); if (getNumShards() > 0) { write = write.withNumShards(getNumShards());
.withSideInputs(Lists.newArrayList(resolved.getAllSideInputs())); if (getNumShards() != null) { writeFiles = writeFiles.withNumShards(getNumShards()); } else if (getSharding() != null) { writeFiles = writeFiles.withSharding(getSharding());