/** * * {@inheritDoc} * * Uses the <code>table</code>'s modified time as watermark. The modified time is read using * {@link HiveUnitUpdateProvider#getUpdateTime(Table)} * @throws UpdateNotFoundException if there was an error fetching update time using {@link HiveUnitUpdateProvider#getUpdateTime(Table)} * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#getExpectedHighWatermark(org.apache.hadoop.hive.ql.metadata.Table, long) */ @Override public LongWatermark getExpectedHighWatermark(Table table, long tableProcessTime) { return new LongWatermark(this.updateProvider.getUpdateTime(table)); }
protected boolean shouldCreateWorkunit(Table table, LongWatermark lowWatermark) throws UpdateNotFoundException { long updateTime = this.updateProvider.getUpdateTime(table); long createTime = getCreateTime(table); return shouldCreateWorkunit(createTime, updateTime, lowWatermark); }
protected boolean shouldCreateWorkunit(Partition sourcePartition, LongWatermark lowWatermark) throws UpdateNotFoundException { long updateTime = this.updateProvider.getUpdateTime(sourcePartition); long createTime = getCreateTime(sourcePartition); return shouldCreateWorkunit(createTime, updateTime, lowWatermark); }
@Test public void testDaily() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/daily/2016/02/02"); Assert.assertEquals(updateProvider.getUpdateTime(mockPartition), EPOCH_2016_02_02); }
@Test public void testDailyLate() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/daily_late/2016/02/02"); Assert.assertEquals(updateProvider.getUpdateTime(mockPartition), EPOCH_2016_02_02); }
@Test public void testHourlyLate() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/hourly_late/2016/02/02/10"); Assert.assertEquals(updateProvider.getUpdateTime(mockPartition), EPOCH_2016_02_02_10); }
@Test public void testHourly() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/hourly/2016/02/02/10"); Assert.assertEquals(updateProvider.getUpdateTime(mockPartition), EPOCH_2016_02_02_10); }
@Test public void testHourlyDeduped() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/hourly_deduped/2016/02/02/10"); Assert.assertEquals(updateProvider.getUpdateTime(mockPartition), EPOCH_2016_02_02_10); }
@Test(expectedExceptions = UpdateNotFoundException.class) public void testHourlyInvalid() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/hourly/2016/02/abc/10"); updateProvider.getUpdateTime(mockPartition); }
@Test(expectedExceptions = UpdateNotFoundException.class) public void testNoMatchingPattern() throws Exception { HiveUnitUpdateProvider updateProvider = new DatePatternUpdateProvider(); Partition mockPartition = createMockPartitionWithLocation("/data/TestEvent/2016/02/02/10"); updateProvider.getUpdateTime(mockPartition); }
final long updateTime = this.updateProvider.getUpdateTime(sourcePartition); if (shouldValidate(sourcePartition)) { log.info(String.format("Validating partition: %s", sourcePartition.getCompleteName()));
try { final long updateTime = this.updateProvider.getUpdateTime(hiveDataset.getTable());
long updateTime = this.updateProvider.getUpdateTime(sourcePartition); if (shouldCreateWorkunit(sourcePartition, lowWatermark)) { log.debug(String.format("Processing partition: %s", sourcePartition));
long updateTime = this.updateProvider.getUpdateTime(hiveDataset.getTable());
/** * * {@inheritDoc} * * Uses the <code>table</code>'s modified time as watermark. The modified time is read using * {@link HiveUnitUpdateProvider#getUpdateTime(Table)} * @throws UpdateNotFoundException if there was an error fetching update time using {@link HiveUnitUpdateProvider#getUpdateTime(Table)} * @see org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker#getExpectedHighWatermark(org.apache.hadoop.hive.ql.metadata.Table, long) */ @Override public LongWatermark getExpectedHighWatermark(Table table, long tableProcessTime) { return new LongWatermark(this.updateProvider.getUpdateTime(table)); }
protected boolean shouldCreateWorkunit(Partition sourcePartition, LongWatermark lowWatermark) throws UpdateNotFoundException { long updateTime = this.updateProvider.getUpdateTime(sourcePartition); long createTime = getCreateTime(sourcePartition); return shouldCreateWorkunit(createTime, updateTime, lowWatermark); }
protected boolean shouldCreateWorkunit(Table table, LongWatermark lowWatermark) throws UpdateNotFoundException { long updateTime = this.updateProvider.getUpdateTime(table); long createTime = getCreateTime(table); return shouldCreateWorkunit(createTime, updateTime, lowWatermark); }
final long updateTime = this.updateProvider.getUpdateTime(sourcePartition); if (shouldValidate(sourcePartition)) { log.info(String.format("Validating partition: %s", sourcePartition.getCompleteName()));
try { final long updateTime = this.updateProvider.getUpdateTime(hiveDataset.getTable());
long updateTime = this.updateProvider.getUpdateTime(hiveDataset.getTable());