@Override public void shutdown(SourceState state) { state.setProp(ConfigurationKeys.OFFSET_TOO_EARLY_COUNT, this.offsetTooEarlyCount); state.setProp(ConfigurationKeys.OFFSET_TOO_LATE_COUNT, this.offsetTooLateCount); state.setProp(ConfigurationKeys.FAIL_TO_GET_OFFSET_COUNT, this.failToGetOffsetCount); }
/** * Sets Job Watermark in the SourceState which will be copied to all WorkUnitStates. Job Watermark is a complete partition name. * During next run of this job, fresh work units will be created starting from this partition. */ protected void setJobWatermark(SourceState state, String watermark) { state.setProp(ComplianceConfigurationKeys.HIVE_PURGER_WATERMARK, watermark); log.info("Setting job watermark for the job: " + watermark); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { if (!state.contains(Kafka09ConsumerClient.GOBBLIN_CONFIG_VALUE_DESERIALIZER_CLASS_KEY)) { state.setProp(Kafka09ConsumerClient.GOBBLIN_CONFIG_VALUE_DESERIALIZER_CLASS_KEY, KafkaGsonDeserializer.class.getName()); } return super.getWorkunits(state); }
@Override protected void init(SourceState state) { state.setProp(DATE_PARTITIONED_SOURCE_PARTITION_PATTERN, DatePartitionType.DAY.getDateTimePattern()); super.init(state); }
private SourceState createSourceState(String topic) { SourceState state = new SourceState(); state.setProp(ConfigurationKeys.KAFKA_BROKERS, "localhost:" + kafkaTestHelper.getKafkaServerPort()); state.setProp(KafkaSource.TOPIC_WHITELIST, topic); state.setProp(KafkaSource.GOBBLIN_KAFKA_CONSUMER_CLIENT_FACTORY_CLASS, Kafka09ConsumerClient.Factory.class.getName()); state.setProp(KafkaSource.BOOTSTRAP_WITH_OFFSET, "earliest"); return state; }
private List<WorkUnit> getWorkUnits(String topic) { SourceState ss = new SourceState(); ss.setProp(ConfigurationKeys.KAFKA_BROKERS, "localhost:" + _kafkaTestHelper.getKafkaServerPort()); ss.setProp(KafkaSimpleStreamingSource.TOPIC_WHITELIST, topic); ss.setProp(ConfigurationKeys.JOB_NAME_KEY, topic); ss.setProp(KafkaSimpleStreamingSource.TOPIC_KEY_DESERIALIZER, "org.apache.kafka.common.serialization.StringDeserializer"); ss.setProp(KafkaSimpleStreamingSource.TOPIC_VALUE_DESERIALIZER, "org.apache.kafka.common.serialization.ByteArrayDeserializer"); KafkaSimpleStreamingSource<String, byte[]> simpleSource = new KafkaSimpleStreamingSource<String, byte[]>(); return simpleSource.getWorkunits(ss); }
@Test public void testNoPrioritization() throws Exception { SourceState state = new SourceState(); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "file:///"); state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, "file:///"); state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/target/dir"); state.setProp(DatasetUtils.DATASET_PROFILE_CLASS_KEY, MyFinder.class.getName()); CopySource source = new CopySource(); List<WorkUnit> workunits = source.getWorkunits(state); workunits = JobLauncherUtils.flattenWorkUnits(workunits); Assert.assertEquals(workunits.size(), MyFinder.DATASETS * MyDataset.FILE_SETS * MyFileSet.FILES); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { if (!state.contains(HadoopFileInputSource.FILE_INPUT_FORMAT_CLASS_KEY)) { state.setProp(HadoopFileInputSource.FILE_INPUT_FORMAT_CLASS_KEY, HiveSerDeWrapper.getDeserializer(state).getInputFormatClassName()); } return super.getWorkunits(state); }
/** * Test when work unit retry policy is on partial, but the job commit policy is "full". */ @Test public void testGetPreviousWorkUnitStatesOnPartialRetryFullCommit() { SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates); sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY, "onpartial"); sourceState.setProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "full"); Assert.assertEquals(this.testSource.getPreviousWorkUnitStatesForRetry(sourceState), Collections.EMPTY_LIST); }
/** * Test when work unit retry policy is on partial, and the job commit policy is "partial". */ @Test public void testGetPreviousWorkUnitStatesOnPartialRetryPartialCommit() { SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates); sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY, "onpartial"); sourceState.setProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "partial"); List<WorkUnitState> returnedWorkUnitStates = this.testSource.getPreviousWorkUnitStatesForRetry(sourceState); Assert.assertEquals(returnedWorkUnitStates, this.expectedPreviousWorkUnitStates); }
/** * Test when work unit retry policy is on full, and the job commit policy is "full". */ @Test public void testGetPreviousWorkUnitStatesOnFullRetryFullCommit() { SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates); sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY, "onfull"); sourceState.setProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "full"); List<WorkUnitState> returnedWorkUnitStates = this.testSource.getPreviousWorkUnitStatesForRetry(sourceState); Assert.assertEquals(returnedWorkUnitStates, this.expectedPreviousWorkUnitStates); }
/** * Test when work unit retry policy is on full, but the job commit policy is "partial". */ @Test public void testGetPreviousWorkUnitStatesOnFullRetryPartialCommit() { SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates); sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY, "onfull"); sourceState.setProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, "partial"); Assert.assertEquals(this.testSource.getPreviousWorkUnitStatesForRetry(sourceState), Collections.EMPTY_LIST); }
@Test public void testGetFileStreamSucceedsWithUncompressedFile() throws FileBasedHelperException, IOException { SourceState sourceState = new SourceState(); URL rootUrl = getClass().getResource("/source/"); String rootPath = rootUrl.toString(); sourceState.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, rootPath); HadoopFsHelper fsHelper = new HadoopFsHelper(sourceState); fsHelper.connect(); URL url = getClass().getResource("/source/simple.tsv"); String path = url.toString(); InputStream in = fsHelper.getFileStream(path); String contents = IOUtils.toString(in, "UTF-8"); Assert.assertEquals(contents, "A\t1\nB\t2\n"); }
@Test public void testGetFileStreamSucceedsWithGZIPFile() throws FileBasedHelperException, IOException { SourceState sourceState = new SourceState(); URL rootUrl = getClass().getResource("/source/"); String rootPath = rootUrl.toString(); sourceState.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, rootPath); HadoopFsHelper fsHelper = new HadoopFsHelper(sourceState); fsHelper.connect(); URL url = getClass().getResource("/source/simple.tsv.gz"); String path = url.toString(); InputStream in = fsHelper.getFileStream(path); String contents = IOUtils.toString(in, "UTF-8"); Assert.assertEquals(contents, "A\t1\nB\t2\n"); } }
@Test(expectedExceptions = IllegalArgumentException.class) public void testConnectFailsWithS3URLWithoutAWSCredentials() throws FileBasedHelperException { Configuration conf = new Configuration(); // plain conf, no S3 credentials SourceState sourceState = new SourceState(); sourceState.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "s3://support.elasticmapreduce/spark/install-spark/"); HadoopFsHelper fsHelper = new HadoopFsHelper(sourceState, conf); fsHelper.connect(); }
/** * Test the never-retry policy. */ @Test public void testGetPreviousWorkUnitStatesNeverRetry() { SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates); sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY, "never"); Assert.assertEquals(this.testSource.getPreviousWorkUnitStatesForRetry(sourceState), Collections.EMPTY_LIST); }
@Test public void testEquals() { SourceState sourceState = new SourceState(); sourceState.setProp("testKey", "true"); WorkUnitState workUnitState = new WorkUnitState(new WorkUnit(sourceState, null)); Assert.assertEquals(workUnitState, workUnitState); } }
/** * Test when work unit retry disabled. */ @Test public void testGetPreviousWorkUnitStatesDisabledRetry() { SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates); sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_ENABLED_KEY, Boolean.FALSE); Assert.assertEquals(this.testSource.getPreviousWorkUnitStatesForRetry(sourceState), Collections.EMPTY_LIST); }
/** * Test the always-retry policy, with WORK_UNIT_RETRY_ENABLED_KEY enabled. */ @Test public void testGetPreviousWorkUnitStatesEnabledRetry() { SourceState sourceState = new SourceState(new State(), this.previousWorkUnitStates); sourceState.setProp(ConfigurationKeys.WORK_UNIT_RETRY_ENABLED_KEY, Boolean.TRUE); List<WorkUnitState> returnedWorkUnitStates = this.testSource.getPreviousWorkUnitStatesForRetry(sourceState); Assert.assertEquals(returnedWorkUnitStates, this.expectedPreviousWorkUnitStates); }