private void setWorkUnitWatermark(WorkUnit workUnit, Optional<CopyableFileWatermarkGenerator> watermarkGenerator, CopyEntity copyEntity) throws IOException { if (copyEntity instanceof CopyableFile) { Optional<WatermarkInterval> watermarkIntervalOptional = CopyableFileWatermarkHelper.getCopyableFileWatermark((CopyableFile) copyEntity, watermarkGenerator); if (watermarkIntervalOptional.isPresent()) { workUnit.setWatermarkInterval(watermarkIntervalOptional.get()); } } }
private List<WorkUnit> initialWorkUnits() { List<WorkUnit> workUnits = Lists.newArrayList(); for (int i=0; i < num_parallelism; i++) { WorkUnit workUnit = WorkUnit.create(newExtract(Extract.TableType.APPEND_ONLY, namespace, table)); LongWatermark lowWatermark = new LongWatermark(i * numRecordsPerExtract + 1); LongWatermark expectedHighWatermark = new LongWatermark((i + 1) * numRecordsPerExtract); workUnit.setWatermarkInterval(new WatermarkInterval(lowWatermark, expectedHighWatermark)); workUnit.setProp(WORK_UNIT_INDEX, i); workUnits.add(workUnit); } return workUnits; }
@Override public List<WorkUnit> pack(Map<String, List<WorkUnit>> workUnitsByTopic, int numContainers) { setWorkUnitEstSizes(workUnitsByTopic); List<WorkUnit> workUnits = Lists.newArrayList(); for (List<WorkUnit> workUnitsForTopic : workUnitsByTopic.values()) { // For each topic, merge all empty workunits into a single workunit, so that a single // empty task will be created instead of many. MultiWorkUnit zeroSizeWorkUnit = MultiWorkUnit.createEmpty(); for (WorkUnit workUnit : workUnitsForTopic) { if (DoubleMath.fuzzyEquals(getWorkUnitEstSize(workUnit), 0.0, EPS)) { addWorkUnitToMultiWorkUnit(workUnit, zeroSizeWorkUnit); } else { workUnit.setWatermarkInterval(getWatermarkIntervalFromWorkUnit(workUnit)); workUnits.add(workUnit); } } if (!zeroSizeWorkUnit.getWorkUnits().isEmpty()) { workUnits.add(squeezeMultiWorkUnit(zeroSizeWorkUnit)); } } return worstFitDecreasingBinPacking(workUnits, numContainers); } }
public void serialize(WorkUnit workUnit) { workUnit.setWatermarkInterval( new WatermarkInterval(new LongWatermark(lowWatermark), new LongWatermark(highWatermark))); if (hasUserSpecifiedHighWatermark) { workUnit.setProp(Partition.HAS_USER_SPECIFIED_HIGH_WATERMARK, true); } if (isLastPartition) { workUnit.setProp(Partition.IS_LAST_PARTIITON, true); } }
watermarkWorkunit.setProp(ConfigurationKeys.DATASET_URN_KEY, tableKey); watermarkWorkunit.setWatermarkInterval(new WatermarkInterval(new MultiKeyValueLongWatermark( this.previousWatermarks.get(tableKey)), new MultiKeyValueLongWatermark(expectedPartitionWatermarks)));
@Override protected WorkUnit workUnitForDataset(Dataset dataset) { WorkUnit workUnit = new WorkUnit(); if(isDatasetStateStoreEnabled) { JobState.DatasetState datasetState = null; try { datasetState = (JobState.DatasetState) this.fsDatasetStateStore.getLatestDatasetState(this.jobName, dataset.getUrn()); } catch (IOException e) { throw new RuntimeException(e); } LongWatermark previousWatermark; if(datasetState != null) { previousWatermark = datasetState.getTaskStatesAsWorkUnitStates().get(0).getActualHighWatermark(LongWatermark.class); } else { previousWatermark = new LongWatermark(0); } workUnit.setWatermarkInterval(new WatermarkInterval(previousWatermark, new LongWatermark(LAST_PROCESSED_TS))); } return workUnit; }
@Override protected WorkUnit workUnitForDatasetPartition(PartitionableDataset.DatasetPartition partition) { WorkUnit workUnit = new WorkUnit(); if(isDatasetStateStoreEnabled) { String datasetUrn = partition.getDataset().getUrn()+"@"+partition.getUrn(); JobState.DatasetState datasetState = null; try { datasetState = (JobState.DatasetState) this.fsDatasetStateStore.getLatestDatasetState(this.jobName, datasetUrn); } catch (IOException e) { throw new RuntimeException(e); } LongWatermark previousWatermark; if(datasetState != null) { previousWatermark = datasetState.getTaskStatesAsWorkUnitStates().get(0).getActualHighWatermark(LongWatermark.class); } else { previousWatermark = new LongWatermark(0); } workUnit.setWatermarkInterval(new WatermarkInterval(previousWatermark, new LongWatermark(LAST_PROCESSED_TS))); } return workUnit; }
workUnit.setWatermarkInterval(interval);
public static WorkUnitState getWorkUnitState1() { WorkUnit wu = new WorkUnit(new Extract(Extract.TableType.APPEND_ONLY, "namespace", "table")); wu.setWatermarkInterval( new WatermarkInterval(new LongWatermark(20160101235959L), new LongWatermark(20160102235959L))); State js = new State(); return new WorkUnitState(wu, js); } }
private WorkUnitState getMockWorkUnitState(Long lowWaterMark, Long highWaterMark) { WorkUnit mockWorkUnit = WorkUnit.createEmpty(); mockWorkUnit.setWatermarkInterval(new WatermarkInterval(new MultiLongWatermark(new ArrayList<Long>(){{add(lowWaterMark);}}), new MultiLongWatermark(new ArrayList<Long>(){{add(highWaterMark);}}))); WorkUnitState mockWorkUnitState = new WorkUnitState(mockWorkUnit, new State()); mockWorkUnitState.setProp(KafkaSource.TOPIC_NAME, TEST_TOPIC_NAME); mockWorkUnitState.setProp(KafkaSource.PARTITION_ID, "1"); mockWorkUnitState.setProp(ConfigurationKeys.KAFKA_BROKERS, "localhost:8080"); mockWorkUnitState.setProp(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_URL, TEST_URL); return mockWorkUnitState; }
private void setWorkUnitWatermark(WorkUnit workUnit, Optional<CopyableFileWatermarkGenerator> watermarkGenerator, CopyEntity copyEntity) throws IOException { if (copyEntity instanceof CopyableFile) { Optional<WatermarkInterval> watermarkIntervalOptional = CopyableFileWatermarkHelper.getCopyableFileWatermark((CopyableFile) copyEntity, watermarkGenerator); if (watermarkIntervalOptional.isPresent()) { workUnit.setWatermarkInterval(watermarkIntervalOptional.get()); } } }
private List<WorkUnit> initialWorkUnits() { List<WorkUnit> workUnits = Lists.newArrayList(); for (int i=0; i < num_parallelism; i++) { WorkUnit workUnit = WorkUnit.create(newExtract(Extract.TableType.APPEND_ONLY, namespace, table)); LongWatermark lowWatermark = new LongWatermark(i * numRecordsPerExtract + 1); LongWatermark expectedHighWatermark = new LongWatermark((i + 1) * numRecordsPerExtract); workUnit.setWatermarkInterval(new WatermarkInterval(lowWatermark, expectedHighWatermark)); workUnit.setProp(WORK_UNIT_INDEX, i); workUnits.add(workUnit); } return workUnits; }
@Override public List<WorkUnit> pack(Map<String, List<WorkUnit>> workUnitsByTopic, int numContainers) { setWorkUnitEstSizes(workUnitsByTopic); List<WorkUnit> workUnits = Lists.newArrayList(); for (List<WorkUnit> workUnitsForTopic : workUnitsByTopic.values()) { // For each topic, merge all empty workunits into a single workunit, so that a single // empty task will be created instead of many. MultiWorkUnit zeroSizeWorkUnit = MultiWorkUnit.createEmpty(); for (WorkUnit workUnit : workUnitsForTopic) { if (DoubleMath.fuzzyEquals(getWorkUnitEstSize(workUnit), 0.0, EPS)) { addWorkUnitToMultiWorkUnit(workUnit, zeroSizeWorkUnit); } else { workUnit.setWatermarkInterval(getWatermarkIntervalFromWorkUnit(workUnit)); workUnits.add(workUnit); } } if (!zeroSizeWorkUnit.getWorkUnits().isEmpty()) { workUnits.add(squeezeMultiWorkUnit(zeroSizeWorkUnit)); } } return worstFitDecreasingBinPacking(workUnits, numContainers); } }
public void serialize(WorkUnit workUnit) { workUnit.setWatermarkInterval( new WatermarkInterval(new LongWatermark(lowWatermark), new LongWatermark(highWatermark))); if (hasUserSpecifiedHighWatermark) { workUnit.setProp(Partition.HAS_USER_SPECIFIED_HIGH_WATERMARK, true); } if (isLastPartition) { workUnit.setProp(Partition.IS_LAST_PARTIITON, true); } }
watermarkWorkunit.setProp(ConfigurationKeys.DATASET_URN_KEY, tableKey); watermarkWorkunit.setWatermarkInterval(new WatermarkInterval(new MultiKeyValueLongWatermark( this.previousWatermarks.get(tableKey)), new MultiKeyValueLongWatermark(expectedPartitionWatermarks)));
workUnit.setWatermarkInterval(interval);