/** * Load all lineage information from {@link State}s of a dataset * * @param states All states which belong to the same dataset * @return A collection of {@link LineageEventBuilder}s put in the state */ public static Collection<LineageEventBuilder> load(Collection<? extends State> states) { Preconditions.checkArgument(states != null && !states.isEmpty()); Set<LineageEventBuilder> allEvents = Sets.newHashSet(); for (State state : states) { Map<String, Set<LineageEventBuilder>> branchedEvents = load(state); branchedEvents.values().forEach(allEvents::addAll); } return allEvents; }
private void submitLineageEvent(String dataset, Collection<TaskState> states) { Collection<LineageEventBuilder> events = LineageInfo.load(states); // Send events events.forEach(event -> event.submit(metricContext)); log.info(String.format("Submitted %d lineage events for dataset %s", events.size(), dataset)); }
Collection<LineageEventBuilder> lineageEventBuilders = LineageInfo.load(Collections.singleton(taskState)); Assert.assertEquals(lineageEventBuilders.size(), 2);
Assert.assertTrue(state.contains("gobblin.event.lineage.branch.1.destination")); Collection<LineageEventBuilder> events = LineageInfo.load(ImmutableList.of(state)); Assert.assertTrue(events.size() == 4);
@Test public void testEventForPartitionedDataset() { final String topic = "testTopic"; final String kafka = "kafka"; final String hdfs = "hdfs"; final String path = "/data/tracking/PageViewEvent"; final String partitionName = "hourly/2018/08/15/15"; State state = new State(); LineageInfo lineageInfo = getLineageInfo(); DatasetDescriptor source = new DatasetDescriptor(kafka, topic); lineageInfo.setSource(source, state); DatasetDescriptor destinationDataset = new DatasetDescriptor(hdfs, path); PartitionDescriptor destination = new PartitionDescriptor(partitionName, destinationDataset); lineageInfo.putDestination(destination, 0, state); Map<String, Set<LineageEventBuilder>> events = LineageInfo.load(state); LineageEventBuilder event = first(events.get("0")); verify(event, topic, source, destination); // Verify gobblin tracking event GobblinTrackingEvent trackingEvent = event.build(); Assert.assertEquals(LineageEventBuilder.isLineageEvent(trackingEvent), true); Assert.assertEquals(LineageEventBuilder.fromEvent(trackingEvent), event); }
lineageInfo.putDestination(destination01, 1, state0); Map<String, Set<LineageEventBuilder>> events = LineageInfo.load(state0); verify(first(events.get("0")), topic, source, destination00); verify(first(events.get("1")), topic, source, destination01); Collection<LineageEventBuilder> eventsList = LineageInfo.load(states); Assert.assertTrue(eventsList.size() == 2); Assert.assertEquals(getLineageEvent(eventsList, 0, hdfs), first(events.get("0"))); destination12.addMetadata(branch, "2"); lineageInfo.putDestination(destination12, 2, state1); eventsList = LineageInfo.load(states); Assert.assertTrue(eventsList.size() == 3); Assert.assertEquals(getLineageEvent(eventsList, 0, hdfs), first(events.get("0"))); destination11.addMetadata(branch, "1"); lineageInfo.putDestination(destination11, 1, state1); eventsList = LineageInfo.load(states); Assert.assertTrue(eventsList.size() == 4); Assert.assertEquals(getLineageEvent(eventsList, 0, hdfs), first(events.get("0")));
/** * Load all lineage information from {@link State}s of a dataset * * @param states All states which belong to the same dataset * @return A collection of {@link LineageEventBuilder}s put in the state */ public static Collection<LineageEventBuilder> load(Collection<? extends State> states) { Preconditions.checkArgument(states != null && !states.isEmpty()); Set<LineageEventBuilder> allEvents = Sets.newHashSet(); for (State state : states) { Map<String, Set<LineageEventBuilder>> branchedEvents = load(state); branchedEvents.values().forEach(allEvents::addAll); } return allEvents; }
private void submitLineageEvent(String dataset, Collection<TaskState> states) { Collection<LineageEventBuilder> events = LineageInfo.load(states); // Send events events.forEach(event -> event.submit(metricContext)); log.info(String.format("Submitted %d lineage events for dataset %s", events.size(), dataset)); }