/** * Tests a zip file with no entries. This is a corner case not tested elsewhere as the default * test zip files have a single entry. */ @Test @Category(NeedsRunner.class) public void testZipCompressedReadWithNoEntries() throws Exception { File file = createZipFile(new ArrayList<>(), tempFolder, "empty zip file"); assertReadingCompressedFileMatchesExpected(file, ZIP, EMPTY, p); p.run(); }
@Test @Category(NeedsRunner.class) public void testQuantilesPerKey_reversed() { PCollection<KV<String, Integer>> input = createInputTable(p); PCollection<KV<String, List<Integer>>> quantiles = input.apply(ApproximateQuantiles.perKey(2, new DescendingIntComparator())); PAssert.that(quantiles) .containsInAnyOrder(KV.of("a", Arrays.asList(3, 1)), KV.of("b", Arrays.asList(100, 1))); p.run(); }
/** Tests that {@code containsInAnyOrder} is actually order-independent. */ @Test @Category(ValidatesRunner.class) public void testContainsInAnyOrder() throws Exception { PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4)); PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testBoundedSource() { long numElements = 1000; PCollection<Long> input = p.apply(Read.from(CountingSource.upTo(numElements))); addCountingAsserts(input, numElements); p.run(); }
@Test @Category(NeedsRunner.class) public void testEmptyBoundedSource() { PCollection<Long> input = p.apply(Read.from(CountingSource.upTo(0))); PAssert.that(input).empty(); p.run(); }
@Test @Category(NeedsRunner.class) public void testUnboundedSource() { long numElements = 1000; PCollection<Long> input = p.apply(Read.from(CountingSource.unbounded()).withMaxNumRecords(numElements)); addCountingAsserts(input, numElements); p.run(); }
/** Tests that Pipeline supports a pass-through identity function. */ @Test @Category(ValidatesRunner.class) public void testIdentityTransform() { PCollection<Integer> output = pipeline.apply(Create.of(1, 2, 3, 4)).apply("IdentityTransform", new IdentityTransform<>()); PAssert.that(output).containsInAnyOrder(1, 2, 3, 4); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testFilterLessThanEq() { PCollection<Integer> output = p.apply(Create.of(1, 2, 3, 4, 5, 6, 7)).apply(Filter.lessThanEq(4)); PAssert.that(output).containsInAnyOrder(1, 2, 3, 4); p.run(); }
@Test @Category(NeedsRunner.class) public void testFilterGreaterThanEq() { PCollection<Integer> output = p.apply(Create.of(1, 2, 3, 4, 5, 6, 7)).apply(Filter.greaterThanEq(4)); PAssert.that(output).containsInAnyOrder(4, 5, 6, 7); p.run(); }
@Test @Category(NeedsRunner.class) public void testNoFilterByPredicateWithLambda() { PCollection<Integer> output = p.apply(Create.of(1, 2, 4, 5)).apply(Filter.by(i -> false)); PAssert.that(output).empty(); p.run(); }
@Test @Category(NeedsRunner.class) public void testSelectMissingFieldName() { thrown.expect(IllegalArgumentException.class); pipeline.apply(Create.of(new POJO1())).apply(Select.fieldNames("missing")); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testReplaceAll() { PCollection<String> output = p.apply(Create.of("xj", "yj", "zj")).apply(Regex.replaceAll("[xyz]", "new")); PAssert.that(output).containsInAnyOrder("newj", "newj", "newj"); p.run(); }
@Test @Category(NeedsRunner.class) public void testIdentityFilterByPredicate() { PCollection<Integer> output = p.apply(Create.of(591, 11789, 1257, 24578, 24799, 307)) .apply(Filter.by(new TrivialFn(true))); PAssert.that(output).containsInAnyOrder(591, 11789, 1257, 24578, 24799, 307); p.run(); }
@Test @Category(NeedsRunner.class) public void testKVMatchesName() { PCollection<KV<String, String>> output = p.apply(Create.of("a b c")) .apply(Regex.findKV("a (?<keyname>b) (?<valuename>c)", "keyname", "valuename")); PAssert.that(output).containsInAnyOrder(KV.of("b", "c")); p.run(); }
@Test @Category(ValidatesRunner.class) public void testCreateEmpty() { PCollection<String> output = p.apply(Create.empty(StringUtf8Coder.of())); PAssert.that(output).containsInAnyOrder(NO_LINES_ARRAY); assertEquals(StringUtf8Coder.of(), output.getCoder()); p.run(); }
@Test @Category(ValidatesRunner.class) public void testFlattenIterables() { PCollection<Iterable<String>> input = p.apply( Create.<Iterable<String>>of(LINES).withCoder(IterableCoder.of(StringUtf8Coder.of()))); PCollection<String> output = input.apply(Flatten.iterables()); PAssert.that(output).containsInAnyOrder(LINES_ARRAY); p.run(); }
protected void runTestSimpleCombine( List<KV<String, Integer>> table, int globalSum, List<KV<String, String>> perKeyCombines) { PCollection<KV<String, Integer>> input = createInput(pipeline, table); PCollection<Integer> sum = input.apply(Values.create()).apply(Combine.globally(new SumInts())); PCollection<KV<String, String>> sumPerKey = input.apply(Combine.perKey(new TestCombineFn())); PAssert.that(sum).containsInAnyOrder(globalSum); PAssert.that(sumPerKey).containsInAnyOrder(perKeyCombines); pipeline.run(); }
@Test @Category(ValidatesRunner.class) public void testFlattenPCollectionsEmptyThenParDo() { PCollection<String> output = PCollectionList.<String>empty(p) .apply(Flatten.pCollections()) .setCoder(StringUtf8Coder.of()) .apply(ParDo.of(new IdentityFn<>())); PAssert.that(output).empty(); p.run(); }
@Test @Category(ValidatesRunner.class) public void testFlattenIterablesSets() { Set<String> linesSet = ImmutableSet.copyOf(LINES); PCollection<Set<String>> input = p.apply(Create.<Set<String>>of(linesSet).withCoder(SetCoder.of(StringUtf8Coder.of()))); PCollection<String> output = input.apply(Flatten.iterables()); PAssert.that(output).containsInAnyOrder(LINES_ARRAY); p.run(); }
@Test @Category(ValidatesRunner.class) public void testFlattenIterablesCollections() { Set<String> linesSet = ImmutableSet.copyOf(LINES); PCollection<Collection<String>> input = p.apply( Create.<Collection<String>>of(linesSet) .withCoder(CollectionCoder.of(StringUtf8Coder.of()))); PCollection<String> output = input.apply(Flatten.iterables()); PAssert.that(output).containsInAnyOrder(LINES_ARRAY); p.run(); }