/** * Collapse {@link Relation}s based on {@link CollapseType} * @param relations lineage relations * @param collapseTypes fields to collapse relations on * @return collapsed relations */ public static Set<CollapsedRelation> collapseRelations(Iterable<Relation> relations, Set<CollapseType> collapseTypes) { Set<CollapsedRelation> collapsedRelations = new HashSet<>(); Multimap<CollapseKey, Relation> multimap = HashMultimap.create(); for (Relation relation : relations) { multimap.put(getCollapseKey(relation, collapseTypes), relation); } LOG.trace("Collapsed relations: {}", multimap.asMap()); for (Map.Entry<CollapseKey, Collection<Relation>> collapsedEntry : multimap.asMap().entrySet()) { NamespacedEntityId data = collapsedEntry.getKey().data; ProgramId program = collapsedEntry.getKey().program; Set<AccessType> accessTypes = new HashSet<>(); Set<RunId> runs = new HashSet<>(); Set<NamespacedEntityId> components = new HashSet<>(); for (Relation relation : collapsedEntry.getValue()) { accessTypes.add(relation.getAccess()); runs.add(relation.getRun()); components.addAll(relation.getComponents()); } collapsedRelations.add(toCollapsedRelation(data, program, accessTypes, runs, components)); } return collapsedRelations; }
public static LineageRecord toLineageRecord(long start, long end, Lineage lineage, Set<CollapseType> collapseTypes) { Set<RelationRecord> relationBuilder = new HashSet<>(); Map<String, ProgramRecord> programBuilder = new HashMap<>(); Map<String, DataRecord> dataBuilder = new HashMap<>(); Set<CollapsedRelation> collapsedRelations = LineageCollapser.collapseRelations(lineage.getRelations(), collapseTypes); for (CollapsedRelation relation : collapsedRelations) { String dataKey = makeDataKey(relation.getData()); String programKey = makeProgramKey(relation.getProgram()); RelationRecord relationRecord = new RelationRecord(dataKey, programKey, convertAccessType(relation.getAccess()), convertRuns(relation.getRuns()), convertComponents(relation.getComponents())); relationBuilder.add(relationRecord); programBuilder.put(programKey, new ProgramRecord(relation.getProgram())); dataBuilder.put(dataKey, new DataRecord(relation.getData())); } return new LineageRecord(start, end, relationBuilder, programBuilder, dataBuilder); }
public static LineageRecord toLineageRecord(long start, long end, Lineage lineage, Set<CollapseType> collapseTypes) { Set<RelationRecord> relationBuilder = new HashSet<>(); Map<String, ProgramRecord> programBuilder = new HashMap<>(); Map<String, DataRecord> dataBuilder = new HashMap<>(); Set<CollapsedRelation> collapsedRelations = LineageCollapser.collapseRelations(lineage.getRelations(), collapseTypes); for (CollapsedRelation relation : collapsedRelations) { String dataKey = makeDataKey(relation.getData()); String programKey = makeProgramKey(relation.getProgram()); RelationRecord relationRecord = new RelationRecord(dataKey, programKey, convertAccessType(relation.getAccess()), convertRuns(relation.getRuns()), convertComponents(relation.getComponents())); relationBuilder.add(relationRecord); programBuilder.put(programKey, new ProgramRecord(relation.getProgram())); dataBuilder.put(dataKey, new DataRecord(relation.getData())); } return new LineageRecord(start, end, relationBuilder, programBuilder, dataBuilder); }
@Test public void testCollapseAccess() { Set<Relation> relations = ImmutableSet.of( new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service1, AccessType.WRITE, runId1), new Relation(data1, service1, AccessType.READ, runId1) ); // Collapse on access Assert.assertEquals( toSet( new CollapsedRelation(data1, service1, toSet(AccessType.READ, AccessType.WRITE), toSet(runId1), Collections.emptySet()) ), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.ACCESS)) ); }
/** * Collapse {@link Relation}s based on {@link CollapseType} * @param relations lineage relations * @param collapseTypes fields to collapse relations on * @return collapsed relations */ public static Set<CollapsedRelation> collapseRelations(Iterable<Relation> relations, Set<CollapseType> collapseTypes) { Set<CollapsedRelation> collapsedRelations = new HashSet<>(); Multimap<CollapseKey, Relation> multimap = HashMultimap.create(); for (Relation relation : relations) { multimap.put(getCollapseKey(relation, collapseTypes), relation); } LOG.trace("Collapsed relations: {}", multimap.asMap()); for (Map.Entry<CollapseKey, Collection<Relation>> collapsedEntry : multimap.asMap().entrySet()) { NamespacedEntityId data = collapsedEntry.getKey().data; ProgramId program = collapsedEntry.getKey().program; Set<AccessType> accessTypes = new HashSet<>(); Set<RunId> runs = new HashSet<>(); Set<NamespacedEntityId> components = new HashSet<>(); for (Relation relation : collapsedEntry.getValue()) { accessTypes.add(relation.getAccess()); runs.add(relation.getRun()); components.addAll(relation.getComponents()); } collapsedRelations.add(toCollapsedRelation(data, program, accessTypes, runs, components)); } return collapsedRelations; }
@Test public void testCollapseComponent() { Set<Relation> relations = ImmutableSet.of( new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service1, AccessType.WRITE, runId1), new Relation(data1, service1, AccessType.READ, runId1) ); // Collapse on component Assert.assertEquals( toSet( new CollapsedRelation(data1, service1, toSet(AccessType.READ), toSet(runId1), Collections.emptySet()), new CollapsedRelation(data1, service1, toSet(AccessType.WRITE), toSet(runId1), Collections.emptySet()) ), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.COMPONENT)) ); }
@Test public void testCollapseRun() { Set<Relation> relations = ImmutableSet.of( new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service1, AccessType.WRITE, runId1), new Relation(data1, service1, AccessType.READ, runId2) ); // Collapse on run Assert.assertEquals( toSet( new CollapsedRelation(data1, service1, toSet(AccessType.READ), toSet(runId1, runId2), Collections.emptySet()), new CollapsedRelation(data1, service1, toSet(AccessType.WRITE), toSet(runId1), Collections.emptySet()) ), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.RUN)) ); }
@Test public void testCollapseMulti() { Set<Relation> relations = ImmutableSet.of( new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service1, AccessType.WRITE, runId1), new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service2, AccessType.READ, runId1), new Relation(data1, service2, AccessType.READ, runId1), new Relation(data2, service1, AccessType.READ, runId1), new Relation(data2, service1, AccessType.READ, runId1) ); // Collapse on access Assert.assertEquals( toSet( new CollapsedRelation(data1, service1, toSet(AccessType.READ, AccessType.WRITE), toSet(runId1), Collections.emptySet()), new CollapsedRelation(data1, service2, toSet(AccessType.READ), toSet(runId1), Collections.emptySet()), new CollapsedRelation(data2, service1, toSet(AccessType.READ), toSet(runId1), Collections.emptySet()) ), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.ACCESS)) ); }
toSet(runId1, runId2, runId3), Collections.emptySet()) ), LineageCollapser.collapseRelations(relations, toSet(CollapseType.ACCESS, CollapseType.RUN)) ); toSet(runId3), Collections.emptySet()) ), LineageCollapser.collapseRelations(relations, toSet(CollapseType.ACCESS, CollapseType.COMPONENT)) ); new CollapsedRelation(data1, service1, toSet(AccessType.UNKNOWN), toSet(runId2, runId3), Collections.emptySet()) ), LineageCollapser.collapseRelations(relations, toSet(CollapseType.COMPONENT, CollapseType.RUN)) ); Collections.emptySet()) ), LineageCollapser.collapseRelations(relations, toSet(CollapseType.COMPONENT, CollapseType.RUN, CollapseType.ACCESS)) );