public FailedWorkUnit () { super (); TaskUtils.setTaskFactoryClass(this, FailedTaskFactory.class); } }
/** * @return A {@link WorkUnit} that will run a {@link NoopTask}. */ public static WorkUnit noopWorkunit() { WorkUnit workUnit = new WorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, Factory.class); return workUnit; }
protected WorkUnit createWorkUnit (Dataset dataset) throws IOException { WorkUnit workUnit = new WorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, MRCompactionTaskFactory.class); suite.save (dataset, workUnit); return workUnit; }
protected WorkUnit createWorkUnitForFailure (Dataset dataset) throws IOException { WorkUnit workUnit = new FailedTask.FailedWorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, CompactionFailedTask.CompactionFailedTaskFactory.class); suite.save (dataset, workUnit); return workUnit; }
@Override public List<WorkUnit> getWorkunits(SourceState state) { WorkUnit wu = new WorkUnit(); TaskUtils.setTaskFactoryClass(wu, FailsWithExceptionTaskFactory.class); return Collections.singletonList(wu); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { List<WorkUnit> workUnits = super.getWorkunits(state); for (WorkUnit workUnit : workUnits) { TaskUtils.setTaskFactoryClass(workUnit, SleepingTaskFactory.class); } return workUnits; } }
protected WorkUnit createWorkUnitForFailure (Dataset dataset, String reason) throws IOException { WorkUnit workUnit = new FailedTask.FailedWorkUnit(); workUnit.setProp(CompactionVerifier.COMPACTION_VERIFICATION_FAIL_REASON, reason); TaskUtils.setTaskFactoryClass(workUnit, CompactionFailedTask.CompactionFailedTaskFactory.class); suite.save (dataset, workUnit); return workUnit; }
/** * Create a work unit to copy a source table to a target table using a staging table in between. * @param dataset {@link HiveDataset} for the source table. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static HiveWorkUnit tableCopyWorkUnit(HiveDataset dataset, StageableTableMetadata destinationTable, @Nullable String partitionName) { HiveWorkUnit workUnit = new HiveWorkUnit(dataset); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.TABLE_COPY.name()); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); if (!Strings.isNullOrEmpty(partitionName)) { workUnit.setPartitionName(partitionName); } TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); return workUnit; }
/** * Create a work unit for each configuration defined or a single work unit if no configurations are defined * @param state see {@link org.apache.gobblin.configuration.SourceState} * @return list of workunits */ @Override public List<WorkUnit> getWorkunits(SourceState state) { List<WorkUnit> workUnits = Lists.newArrayList(); Config config = ConfigUtils.propertiesToConfig(state.getProperties()); Config sourceConfig = ConfigUtils.getConfigOrEmpty(config, DATASET_CLEANER_SOURCE_PREFIX); List<String> configurationNames = ConfigUtils.getStringList(config, DATASET_CLEANER_CONFIGURATIONS); // use a dummy configuration name if none set if (configurationNames.isEmpty()) { configurationNames = ImmutableList.of("DummyConfig"); } for (String configurationName: configurationNames) { WorkUnit workUnit = WorkUnit.createEmpty(); // specific configuration prefixed by the configuration name has precedence over the source specific configuration // and the source specific configuration has precedence over the general configuration Config wuConfig = ConfigUtils.getConfigOrEmpty(sourceConfig, configurationName).withFallback(sourceConfig) .withFallback(config); workUnit.setProps(ConfigUtils.configToProperties(wuConfig), new Properties()); TaskUtils.setTaskFactoryClass(workUnit, DatasetCleanerTaskFactory.class); workUnits.add(workUnit); } return workUnits; }
/** * Create a work unit to materialize a table / view to a target table using a staging table in between. * @param dataset {@link HiveDataset} for the source table. * @param storageFormat format in which target table should be written. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static HiveWorkUnit viewMaterializationWorkUnit(HiveDataset dataset, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable, @Nullable String partitionName) { HiveWorkUnit workUnit = new HiveWorkUnit(dataset); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.TABLE_MATERIALIZATION.name()); workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name()); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); if (!Strings.isNullOrEmpty(partitionName)) { workUnit.setPartitionName(partitionName); } TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); return workUnit; }
@Override public List<WorkUnit> getWorkunits(SourceState state) { List<String> dirs = Splitter.on(",").splitToList(state.getProp(INPUT_DIRECTORIES_KEY)); String outputBase = state.getProp(OUTPUT_LOCATION); List<WorkUnit> workUnits = Lists.newArrayList(); for (String dir : dirs) { try { Path input = new Path(dir); Path output = new Path(outputBase, input.getName()); WorkUnit workUnit = new WorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, MRTaskFactory.class); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "WordCount_" + input.getName()); job.setJarByClass(MRTaskFactoryTest.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); MRTask.serializeJobToState(workUnit, job); workUnits.add(workUnit); } catch (IOException ioe) { log.error("Failed to create MR job for " + dir, ioe); } } return workUnits; }
/** * Create a work unit to materialize a query to a target table using a staging table in between. * @param query the query to materialize. * @param storageFormat format in which target table should be written. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static WorkUnit queryResultMaterializationWorkUnit(String query, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable) { WorkUnit workUnit = new WorkUnit(); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.QUERY_RESULT_MATERIALIZATION.name()); workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name()); workUnit.setProp(QUERY_RESULT_TO_MATERIALIZE_KEY, query); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); HiveTask.disableHiveWatermarker(workUnit); return workUnit; }
protected WorkUnit createWorkUnit(int wuNumber, String eventBusId) { WorkUnit workUnit = new WorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, EventBusPublishingTaskFactory.class); workUnit.setProp(EVENTBUS_ID_KEY, eventBusId); workUnit.setProp(TASK_ID_KEY, wuNumber); return workUnit; }
public FailedWorkUnit () { super (); TaskUtils.setTaskFactoryClass(this, FailedTaskFactory.class); } }
/** * @return A {@link WorkUnit} that will run a {@link NoopTask}. */ public static WorkUnit noopWorkunit() { WorkUnit workUnit = new WorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, Factory.class); return workUnit; }
protected WorkUnit createWorkUnitForFailure (Dataset dataset) throws IOException { WorkUnit workUnit = new FailedTask.FailedWorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, CompactionFailedTask.CompactionFailedTaskFactory.class); suite.save (dataset, workUnit); return workUnit; }
protected WorkUnit createWorkUnit (Dataset dataset) throws IOException { WorkUnit workUnit = new WorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, MRCompactionTaskFactory.class); suite.save (dataset, workUnit); return workUnit; }
protected WorkUnit createWorkUnitForFailure (Dataset dataset, String reason) throws IOException { WorkUnit workUnit = new FailedTask.FailedWorkUnit(); workUnit.setProp(CompactionVerifier.COMPACTION_VERIFICATION_FAIL_REASON, reason); TaskUtils.setTaskFactoryClass(workUnit, CompactionFailedTask.CompactionFailedTaskFactory.class); suite.save (dataset, workUnit); return workUnit; }
/** * Create a work unit to copy a source table to a target table using a staging table in between. * @param dataset {@link HiveDataset} for the source table. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static HiveWorkUnit tableCopyWorkUnit(HiveDataset dataset, StageableTableMetadata destinationTable, @Nullable String partitionName) { HiveWorkUnit workUnit = new HiveWorkUnit(dataset); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.TABLE_COPY.name()); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); if (!Strings.isNullOrEmpty(partitionName)) { workUnit.setPartitionName(partitionName); } TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); return workUnit; }
/** * Create a work unit to materialize a query to a target table using a staging table in between. * @param query the query to materialize. * @param storageFormat format in which target table should be written. * @param destinationTable {@link StageableTableMetadata} specifying staging and target tables metadata. */ public static WorkUnit queryResultMaterializationWorkUnit(String query, HiveConverterUtils.StorageFormat storageFormat, StageableTableMetadata destinationTable) { WorkUnit workUnit = new WorkUnit(); workUnit.setProp(MATERIALIZER_MODE_KEY, MaterializerMode.QUERY_RESULT_MATERIALIZATION.name()); workUnit.setProp(STORAGE_FORMAT_KEY, storageFormat.name()); workUnit.setProp(QUERY_RESULT_TO_MATERIALIZE_KEY, query); workUnit.setProp(STAGEABLE_TABLE_METADATA_KEY, HiveSource.GENERICS_AWARE_GSON.toJson(destinationTable)); TaskUtils.setTaskFactoryClass(workUnit, HiveMaterializerTaskFactory.class); HiveTask.disableHiveWatermarker(workUnit); return workUnit; }