checkArgument( inputTypeDescriptor.equals( TypeDescriptors.kvs(outputFormatKeyClass, outputFormatValueClass)), "%s expects following %ss: KV(Key: %s, Value: %s) but following %ss are set: KV(Key: %s, Value: %s)", Write.class.getSimpleName(),
@Test public void testTypeDescriptorsKV() throws Exception { TypeDescriptor<KV<String, Integer>> descriptor = kvs(strings(), integers()); assertEquals(descriptor, new TypeDescriptor<KV<String, Integer>>() {}); }
@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> gameInfo) { return gameInfo .apply( MapElements.into( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) .via((GameActionInfo gInfo) -> KV.of(gInfo.getKey(field), gInfo.getScore()))) .apply(Sum.integersPerKey()); } }
@Test public void testWritingDataFailInvalidValueType() { conf.set(HadoopFormatIO.OUTPUT_DIR, tmpFolder.getRoot().getAbsolutePath()); List<KV<Text, Text>> data = new ArrayList<>(); data.add(KV.of(new Text("key"), new Text("value"))); TypeDescriptor<Text> textTypeDescriptor = new TypeDescriptor<Text>() {}; PCollection<KV<Text, Text>> input = p.apply(Create.of(data)) .setTypeDescriptor(TypeDescriptors.kvs(textTypeDescriptor, textTypeDescriptor)); thrown.expect(Pipeline.PipelineExecutionException.class); thrown.expectMessage(Text.class.getName()); input.apply( "Write", HadoopFormatIO.<Text, Text>write() .withConfiguration(conf) .withPartitioning() .withExternalSynchronization(new HDFSSynchronization(getLocksDirPath()))); p.run().waitUntilFinish(); }
@Override public PCollection<KV<Integer, KV<KeyT, ValueT>>> expand(PCollection<KV<KeyT, ValueT>> input) { return input .apply( "AssignTask", ParDo.of(new AssignTaskFn<KeyT, ValueT>(configView)).withSideInputs(configView)) .setTypeDescriptor( TypeDescriptors.kvs(TypeDescriptors.integers(), input.getTypeDescriptor())) .apply("GroupByTaskId", GroupByKey.create()) .apply("FlattenGroupedTasks", ParDo.of(new FlattenGroupedTasks<>())); } }
@Test public void testWritingData() throws IOException { conf.set(HadoopFormatIO.OUTPUT_DIR, tmpFolder.getRoot().getAbsolutePath()); List<KV<Text, Employee>> data = TestEmployeeDataSet.getEmployeeData(); PCollection<KV<Text, Employee>> input = p.apply(Create.of(data)) .setTypeDescriptor( TypeDescriptors.kvs( new TypeDescriptor<Text>() {}, new TypeDescriptor<Employee>() {})); input.apply( "Write", HadoopFormatIO.<Text, Employee>write() .withConfiguration(conf) .withPartitioning() .withExternalSynchronization(new HDFSSynchronization(getLocksDirPath()))); p.run(); List<KV<Text, Employee>> writtenOutput = EmployeeOutputFormat.getWrittenOutput(); assertEquals(data.size(), writtenOutput.size()); assertTrue(data.containsAll(writtenOutput)); assertTrue(writtenOutput.containsAll(data)); Mockito.verify(EmployeeOutputFormat.getOutputCommitter()).commitJob(Mockito.any()); Mockito.verify(EmployeeOutputFormat.getOutputCommitter(), Mockito.times(REDUCERS_COUNT)) .commitTask(Mockito.any()); }
private void executeBatchTest(HadoopFormatIO.Write<Text, LongWritable> write, String outputDir) { pipeline .apply(Create.of(SENTENCES)) .apply(ParDo.of(new ConvertToLowerCaseFn())) .apply(new WordCount.CountWords()) .apply( "ConvertToHadoopFormat", ParDo.of(new ConvertToHadoopFormatFn<>(KV_STR_INT_2_TXT_LONGWRITABLE))) .setTypeDescriptor( TypeDescriptors.kvs( new TypeDescriptor<Text>() {}, new TypeDescriptor<LongWritable>() {})) .apply(write); pipeline.run(); Map<String, Long> results = loadWrittenDataAsMap(outputDir); MatcherAssert.assertThat(results.entrySet(), equalTo(computeWordCounts(SENTENCES).entrySet())); }
@Test public void testWritingDataFailInvalidKeyType() { conf.set(HadoopFormatIO.OUTPUT_DIR, tmpFolder.getRoot().getAbsolutePath()); List<KV<String, Employee>> data = new ArrayList<>(); data.add(KV.of("key", new Employee("name", "address"))); PCollection<KV<String, Employee>> input = p.apply("CreateData", Create.of(data)) .setTypeDescriptor( TypeDescriptors.kvs( new TypeDescriptor<String>() {}, new TypeDescriptor<Employee>() {})); thrown.expect(Pipeline.PipelineExecutionException.class); thrown.expectMessage(String.class.getName()); input.apply( "Write", HadoopFormatIO.<String, Employee>write() .withConfiguration(conf) .withPartitioning() .withExternalSynchronization(new HDFSSynchronization(getLocksDirPath()))); p.run().waitUntilFinish(); }
/** * Checks that {#link CoderProviders.fromStaticMethods} successfully builds a working {@link * CoderProvider} from {@link KvCoder KvCoder.class}. */ @Test public void testKvCoderProvider() throws Exception { TypeDescriptor<KV<Double, Double>> type = TypeDescriptors.kvs(TypeDescriptors.doubles(), TypeDescriptors.doubles()); CoderProvider kvCoderProvider = CoderProviders.fromStaticMethods(KV.class, KvCoder.class); assertEquals( KvCoder.of(DoubleCoder.of(), DoubleCoder.of()), kvCoderProvider.coderFor(type, Arrays.asList(DoubleCoder.of(), DoubleCoder.of()))); }
@Test public void testTypeDescriptorsTypeParameterOf() throws Exception { assertEquals(strings(), extractFooT(new Generic<String, Integer>() {})); assertEquals(integers(), extractBarT(new Generic<String, Integer>() {})); assertEquals(kvs(strings(), integers()), extractKV(new Generic<String, Integer>() {})); }
private void runValidationPipeline(Configuration configuration) { p.apply(Create.of(TestEmployeeDataSet.getEmployeeData())) .setTypeDescriptor( TypeDescriptors.kvs(new TypeDescriptor<Text>() {}, new TypeDescriptor<Employee>() {})) .apply( "Write", HadoopFormatIO.<Text, Employee>write() .withConfiguration(configuration) .withPartitioning() .withExternalSynchronization(new HDFSSynchronization(getLocksDirPath()))); }
ParDo.of(new ConvertToHadoopFormatFn<>(KV_STR_INT_2_TXT_LONGWRITABLE))) .setTypeDescriptor( TypeDescriptors.kvs( new TypeDescriptor<Text>() {}, new TypeDescriptor<LongWritable>() {}));
/** * This test validates functionality of {@link * HadoopFormatIO.Write.Builder#withConfiguration(Configuration) withConfiguration(Configuration)} * function when Hadoop OutputFormat class is not provided by the user in configuration. */ @Test public void testWriteValidationFailsMissingOutputFormatInConf() { Configuration configuration = new Configuration(); configuration.setClass(HadoopFormatIO.OUTPUT_KEY_CLASS, Text.class, Object.class); configuration.setClass(HadoopFormatIO.OUTPUT_VALUE_CLASS, Employee.class, Object.class); HadoopFormatIO.Write<Text, Employee> writeWithWrongConfig = HadoopFormatIO.<Text, Employee>write() .withConfiguration(configuration) .withPartitioning() .withExternalSynchronization(new HDFSSynchronization(getLocksDirPath())); p.apply(Create.of(TestEmployeeDataSet.getEmployeeData())) .setTypeDescriptor( TypeDescriptors.kvs(new TypeDescriptor<Text>() {}, new TypeDescriptor<Employee>() {})) .apply("Write", writeWithWrongConfig); thrown.expect(Pipeline.PipelineExecutionException.class); thrown.expectMessage("Configuration must contain \"mapreduce.job.outputformat.class\""); p.run().waitUntilFinish(); }
.apply( MapElements.into( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.longs())) .via(v -> KV.of("key", v))) .apply(Sum.longsPerKey());
/** Test that bad input data is dropped appropriately. */ @Test @Category(ValidatesRunner.class) public void testUserScoresBadInput() throws Exception { PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of())); PCollection<KV<String, Integer>> extract = input .apply(ParDo.of(new ParseEventFn())) .apply( MapElements.into( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))); PAssert.that(extract).empty(); p.run().waitUntilFinish(); } }
"MapTeamAsKey", MapElements.into( TypeDescriptors.kvs( TypeDescriptors.strings(), TypeDescriptor.of(GameActionInfo.class))) .via((GameActionInfo gInfo) -> KV.of(gInfo.team, gInfo)))
/** Test the filtering. */ @Test @Category(ValidatesRunner.class) public void testUserScoresFilter() throws Exception { final Instant startMinTimestamp = new Instant(1447965680000L); PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of())); PCollection<KV<String, Integer>> output = input .apply("ParseGameEvent", ParDo.of(new ParseEventFn())) .apply( "FilterStartTime", Filter.by( (GameActionInfo gInfo) -> gInfo.getTimestamp() > startMinTimestamp.getMillis())) // run a map to access the fields in the result. .apply( MapElements.into( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore()))); PAssert.that(output).containsInAnyOrder(FILTERED_EVENTS); p.run().waitUntilFinish(); }
"ExtractUserScore", MapElements.into( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) .via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));