@Override public void addAccess(ProgramRunId run, StreamId streamId, AccessType accessType, @Nullable NamespacedEntityId component) { long accessTime = System.currentTimeMillis(); LOG.trace("Writing access for run {}, stream {}, accessType {}, component {}, accessTime = {}", run, streamId, accessType, component, accessTime); Transactionals.execute(transactional, context -> { LineageDataset .getLineageDataset(context, datasetFramework, getLineageDatasetId()) .addAccess(run, streamId, accessType, accessTime, component); }); }
/** * @return a set of entities (program and data it accesses) associated with a program run. */ @Override public Set<NamespacedEntityId> getEntitiesForRun(final ProgramRunId run) { return execute(input -> input.getEntitiesForRun(run)); }
/** * Fetch program-dataset access information for a dataset for a given period. * * @param datasetInstance dataset for which to fetch access information * @param start start time period * @param end end time period * @param filter filter to be applied on result set * @return program-dataset access information */ @Override public Set<Relation> getRelations(final DatasetId datasetInstance, final long start, final long end, final Predicate<Relation> filter) { return execute(input -> input.getRelations(datasetInstance, start, end, filter)); }
lineageDataset.addAccess(run11, datasetInstance1, AccessType.READ, run11Data1AccessTime); lineageDataset.addAccess(run22, datasetInstance2, AccessType.WRITE, run22Data2AccessTime); lineageDataset.addAccess(run23, datasetInstance2, AccessType.WRITE, run23Data2AccessTime); lineageDataset.addAccess(run34, datasetInstance2, AccessType.READ_WRITE, System.currentTimeMillis()); }); Assert.assertEquals( ImmutableSet.of(new Relation(datasetInstance1, program1, AccessType.READ, runId1)), lineageDataset.getRelations(datasetInstance1, 0, 100000, x -> true) ); new Relation(datasetInstance2, program3, AccessType.READ_WRITE, runId4) ), lineageDataset.getRelations(datasetInstance2, 0, 100000, x -> true) ); new Relation(datasetInstance2, program2, AccessType.WRITE, runId3) ), lineageDataset.getRelations(program2, 0, 100000, x -> true) ); new Relation(datasetInstance2, program2, AccessType.WRITE, runId3) ), lineageDataset.getRelations(datasetInstance2, 0, 35000, x -> true) ); Assert.assertEquals(toSet(program1, datasetInstance1), lineageDataset.getEntitiesForRun(run11)); Assert.assertEquals(ImmutableList.of(run11Data1AccessTime), lineageDataset.getAccessTimesForRun(run11)); });
/** * Add a program-stream access. * * @param run program run information * @param stream stream accessed by the program * @param accessType access type * @param accessTimeMillis time of access */ public void addAccess(ProgramRunId run, StreamId stream, AccessType accessType, long accessTimeMillis) { addAccess(run, stream, accessType, accessTimeMillis, null); }
private byte[] getProgramKey(ProgramRunId run, DatasetId datasetInstance, AccessType accessType, @Nullable NamespacedEntityId component) { long invertedStartTime = getInvertedStartTime(run); MDSKey.Builder builder = new MDSKey.Builder(); addProgram(builder, run.getParent()); builder.add(invertedStartTime); addDataset(builder, datasetInstance); builder.add(run.getEntityName()); builder.add(accessType.getType()); addComponent(builder, component); return builder.build().getKey(); }
@Test public void testOneRelation() throws Exception { final LineageDataset lineageDataset = getLineageDataset("testOneRelation"); Assert.assertNotNull(lineageDataset); TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) lineageDataset); final RunId runId = RunIds.generate(10000); final DatasetId datasetInstance = new DatasetId("default", "dataset1"); final ProgramId program = new ProgramId("default", "app1", ProgramType.SERVICE, "service1"); final ProgramRunId run = program.run(runId.getId()); final long accessTimeMillis = System.currentTimeMillis(); txnl.execute(new TransactionExecutor.Subroutine() { @Override public void apply() throws Exception { lineageDataset.addAccess(run, datasetInstance, AccessType.READ, accessTimeMillis); } }); txnl.execute(() -> { Relation expected = new Relation(datasetInstance, program, AccessType.READ, runId); Set<Relation> relations = lineageDataset.getRelations(datasetInstance, 0, 100000, x -> true); Assert.assertEquals(1, relations.size()); Assert.assertEquals(expected, relations.iterator().next()); Assert.assertEquals(toSet(program, datasetInstance), lineageDataset.getEntitiesForRun(run)); Assert.assertEquals(ImmutableList.of(accessTimeMillis), lineageDataset.getAccessTimesForRun(run)); }); }
private byte[] getProgramKey(ProgramRunId run, StreamId stream, AccessType accessType, @Nullable NamespacedEntityId component) { long invertedStartTime = getInvertedStartTime(run); MDSKey.Builder builder = new MDSKey.Builder(); addProgram(builder, run.getParent()); builder.add(invertedStartTime); addStream(builder, stream); builder.add(run.getEntityName()); builder.add(accessType.getType()); addComponent(builder, component); return builder.build().getKey(); }
/** * Gets an instance of {@link LineageDataset}. The dataset instance will be created if it is not yet exist. * * @param datasetContext the {@link DatasetContext} for getting the dataset instance. * @param datasetFramework the {@link DatasetFramework} for creating the dataset instance if missing * @return an instance of {@link LineageDataset} */ public static LineageDataset getLineageDataset(DatasetContext datasetContext, DatasetFramework datasetFramework) { return getLineageDataset(datasetContext, datasetFramework, LINEAGE_DATASET_ID); }
private void addDataKey(MDSKey.Builder builder, ProgramRunId run, AccessType accessType, @Nullable NamespacedEntityId component) { long invertedStartTime = getInvertedStartTime(run); builder.add(invertedStartTime); addProgram(builder, run.getParent()); builder.add(run.getEntityName()); builder.add(accessType.getType()); addComponent(builder, component); }
private byte[] getDatasetScanKey(DatasetId datasetInstance, long time) { long invertedStartTime = invertTime(time); MDSKey.Builder builder = new MDSKey.Builder(); addDataset(builder, datasetInstance); builder.add(invertedStartTime); return builder.build().getKey(); }
private byte[] getDatasetKey(DatasetId datasetInstance, ProgramRunId run, AccessType accessType, @Nullable NamespacedEntityId component) { MDSKey.Builder builder = new MDSKey.Builder(); addDataset(builder, datasetInstance); addDataKey(builder, run, accessType, component); return builder.build().getKey(); }
private byte[] getRunScanStartKey(ProgramRunId run) { MDSKey.Builder builder = new MDSKey.Builder(); addProgram(builder, run.getParent()); builder.add(getInvertedStartTime(run)); return builder.build().getKey(); }
private byte[] getProgramScanKey(ProgramId program, long time) { long invertedStartTime = invertTime(time); MDSKey.Builder builder = new MDSKey.Builder(); addProgram(builder, program); builder.add(invertedStartTime); return builder.build().getKey(); }
private byte[] getStreamKey(StreamId stream, ProgramRunId run, AccessType accessType, @Nullable NamespacedEntityId component) { MDSKey.Builder builder = new MDSKey.Builder(); addStream(builder, stream); addDataKey(builder, run, accessType, component); return builder.build().getKey(); }
@Override public LineageDataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException { Table table = getDataset(datasetContext, ACCESS_REGISTRY_TABLE, spec, arguments, classLoader); return new LineageDataset(spec.getName(), table); } }
/** * Add a program-dataset access. * * @param run program run information * @param datasetInstance dataset accessed by the program * @param accessType access type * @param accessTimeMillis time of access */ public void addAccess(ProgramRunId run, DatasetId datasetInstance, AccessType accessType, long accessTimeMillis) { addAccess(run, datasetInstance, accessType, accessTimeMillis, null); } /**
private byte[] getProgramKey(ProgramRunId run, DatasetId datasetInstance, AccessType accessType, @Nullable NamespacedEntityId component) { long invertedStartTime = getInvertedStartTime(run); MDSKey.Builder builder = new MDSKey.Builder(); addProgram(builder, run.getParent()); builder.add(invertedStartTime); addDataset(builder, datasetInstance); builder.add(run.getEntityName()); builder.add(accessType.getType()); addComponent(builder, component); return builder.build().getKey(); }
/** * Gets an instance of {@link LineageDataset}. The dataset instance will be created if it is not yet exist. * * @param datasetContext the {@link DatasetContext} for getting the dataset instance. * @param datasetFramework the {@link DatasetFramework} for creating the dataset instance if missing * @return an instance of {@link LineageDataset} */ public static LineageDataset getLineageDataset(DatasetContext datasetContext, DatasetFramework datasetFramework) { return getLineageDataset(datasetContext, datasetFramework, LINEAGE_DATASET_ID); }
private void addDataKey(MDSKey.Builder builder, ProgramRunId run, AccessType accessType, @Nullable NamespacedEntityId component) { long invertedStartTime = getInvertedStartTime(run); builder.add(invertedStartTime); addProgram(builder, run.getParent()); builder.add(run.getEntityName()); builder.add(accessType.getType()); addComponent(builder, component); }