com.ucar.datalink.worker.api.task.TaskWriterContext java code examples

/**
 * Initialize this TaskWriter with the specified context object.
 */
@SuppressWarnings("unchecked")
public void initialize(TaskWriterContext context) {
  this.context = context;
  this.parameter = (T) context.getWriterParameter();
  this.handlers = CacheBuilder.newBuilder().build(new CacheLoader<Class<? extends Record>, Handler>() {
    @Override
    public Handler load(Class<? extends Record> clazz) throws Exception {
      Handler handler = getHandler(clazz);
      if (handler == null) {
        throw new RecordNotSupportException(clazz);
      }
      handler.initialize(context);
      return handler;
    }
  });
}

/**
 * 对分组后的数据进行最终的load操作
 */
protected void load(List<RecordChunk<T>> recordChunkList, final TaskWriterContext context) {
  //statistic before
  WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY);
  long startTime = System.currentTimeMillis();
  //do load
  submitAndWait(recordChunkList, context);
  //statistic after
  writerStatistic.setTimeForLoad(System.currentTimeMillis() - startTime);
}

public void prePush() {
  context.beginSession();
  context.taskWriterSession().setData(WriterStatistic.KEY, new WriterStatistic(context.taskId(), parameter));
}

protected RecordChunk<T> mapping(RecordChunk<T> recordChunk, TaskWriterContext context) {
  WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY);
  writerStatistic.setRecordsCountBeforeMapping(recordChunk.getRecords().size());
  Long startTime = System.currentTimeMillis();
  Long taskId = Long.valueOf(context.taskId());
  RecordChunk<T> newChunk = recordChunk.copyWithoutRecords();
    List<MediaMappingInfo> list = context.getService(MediaService.class).
        getMediaMappingsByMediaAndTarget(taskId, record.RSI().getNamespace(), record.RSI().getName(),
            context.getWriterParameter().getSupportedSourceTypes(), true);

/**
 * 需要的话，可以对数据进行merge操作
 */
protected RecordChunk<T> merge(RecordChunk<T> recordChunk, TaskWriterContext context) {
  //statistic before
  WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY);
  writerStatistic.setRecordsCountBeforeMerge(recordChunk.getRecords().size());
  Long startTime = System.currentTimeMillis();
  //do merge
  if (context.getWriterParameter().isMerging()) {
    Merger<T> merger = MergerFactory.
        getMerger(recordChunk.getRecords().get(0).getClass());
    RecordChunk<T> newChunk = merger.merge(recordChunk);
    //statistic after
    writerStatistic.setTimeForMerge(System.currentTimeMillis() - startTime);
    writerStatistic.setRecordsCountAfterMerge(newChunk.getRecords().size());
    return newChunk;
  } else {
    //statistic after
    writerStatistic.setTimeForMerge(System.currentTimeMillis() - startTime);
    writerStatistic.setRecordsCountAfterMerge(recordChunk.getRecords().size());
    return recordChunk;
  }
}

@Override
public void initialize(TaskWriterContext context) {
  PluginWriterParameter parameter = context.getWriterParameter();
  List<MediaMappingInfo> taskMappings = context.getService(MediaService.class)
      .getMediaMappingsByTask(Long.valueOf(context.taskId()), false)
      .stream()
      .filter(i -> parameter.getSupportedSourceTypes().contains(i.getTargetMediaSource().getType()))
      new SynchronousQueue<>(),
      new NamedThreadFactory(
          MessageFormat.format("Task-{0}-Writer-{1}-chunk", context.taskId(), parameter.getPluginName())
      ),
      new ThreadPoolExecutor.CallerRunsPolicy()
      new LinkedBlockingQueue<>(),
      new NamedThreadFactory(
          MessageFormat.format("Task-{0}-Writer-{1}-load", context.taskId(), parameter.getPluginName())
      ));

public BaseRecordHandler(HdfsTaskWriter hdfsTaskWriter, TaskWriterContext taskWriterContext, HdfsWriterParameter hdfsWriterParameter) {
  this.hdfsTaskWriter = hdfsTaskWriter;
  this.hdfsWriterParameter = hdfsWriterParameter;
  this.hdfsFilePathGenerator = new HdfsFilePathGenerator(taskWriterContext, hdfsWriterParameter);
  this.fileStreamHolder = new FileStreamHolder(taskWriterContext.taskId(), hdfsWriterParameter);
}

if (records.size() > 0) {
  WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY);
  long key = RecordMeta.mediaMapping(records.get(0)).getTargetMediaSource().getId();
  long tableGroupStartTime = System.currentTimeMillis();
                  MediaMappingInfo mappingInfo = RecordMeta.mediaMapping(mr.getValue().get(0));
                  List<List<T>> list = BatchSplitter.splitForBatch(mr.getValue(),
                      context.getWriterParameter().getBatchSize());
                  for (List<T> item : list) {
                    Map<String, List<String>> datas = buildData(item, context);

public RecordChunk<T> transform(RecordChunk<T> recordChunk, TaskWriterContext context) {
  Long taskId = Long.valueOf(context.taskId());
  RecordChunk<T> newChunk = recordChunk.copyWithoutRecords();
  for (T record : recordChunk.getRecords()) {
    T result = transformOne(record, RecordMeta.mediaMapping(record), context);
    if (result != null) {
      newChunk.merge(result);
    }
  }
  return newChunk;
}

private void loadInGlobalMode(List<RdbEventRecord> records, TaskWriterContext context) {
  boolean originalUseBatch = context.getWriterParameter().isUseBatch();
  try {
    context.getWriterParameter().setUseBatch(false);//禁用batch模式，因为按表分组并没有根据相同sql做聚合
    List<List<RdbEventRecord>> list = new ArrayList<>();
    list.add(records);
    doHandle(context, list);
  } finally {
    context.getWriterParameter().setUseBatch(originalUseBatch);//结束时把useBatch设置为之前的值,避免影响其它地方用
  }
}

public void postPush() {
  BaseWriterStatistic statistic = context.taskWriterSession().getData(WriterStatistic.KEY);
  if (parameter.isPerfStatistic()) {
    logger.info(statistic.toJsonString());
  }
}

private String getHbaseFilePath(String schemaName, String tableName, MediaMappingInfo mappingInfo) {
  FileSplitMode splitMode = getFileSplitMode(mappingInfo);
  String dateStr = getDateStr();
  String taskFileName = tableName + "-" + taskWriterContext.taskId();
  if ("default".equals(schemaName)) {
    if (splitMode.equals(FileSplitMode.DAY)) {
      return MessageFormat.format(FILE_PATH_HBASE_PATTERN_DAY,hdfsWriterParameter.getHbasePath(), tableName, dateStr, taskFileName);
    } else {
      return MessageFormat.format(FILE_PATH_HBASE_PATTERN_DAY_SPLIT,hdfsWriterParameter.getHbasePath(), tableName, dateStr, buildSplitRange(splitMode), taskFileName);
    }
  } else {
    if (splitMode.equals(FileSplitMode.DAY)) {
      return MessageFormat.format(FILE_PATH_HBASE_NAMESPACE_PATTERN_DAY, hdfsWriterParameter.getHbasePath(),schemaName, tableName, dateStr, taskFileName);
    } else {
      return MessageFormat.format(FILE_PATH_HBASE_NAMESPACE_PATTERN_DAY_SPLIT, hdfsWriterParameter.getHbasePath(),schemaName, tableName, dateStr, buildSplitRange(splitMode), taskFileName);
    }
  }
}

private void loadDeletedRecords(Function<List<List<RdbEventRecord>>, Void> function) {
  List<List<RdbEventRecord>> batchRecords = new ArrayList<>();
  for (RecordGroupHolder.TableLoadData tableData : this.tables) {
    if (context.getWriterParameter().isUseBatch()) {
      batchRecords.addAll(split(tableData.getDeleteDatas(), context.getWriterParameter().getBatchSize()));
    } else {
      for (RdbEventRecord data : tableData.getDeleteDatas()) {
        batchRecords.add(Arrays.asList(data));
      }
    }
  }
  function.apply(batchRecords);
}

private void doGroup(List<RdbEventRecord> records, TaskWriterContext context) {
  //statistic before
  WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY);
  long key = RecordMeta.mediaMapping(records.get(0)).getTargetMediaSource().getId();
  writerStatistic.getGroupLoadStatistics().get(key).getExtendStatistic().put(StatisticKey.TABLE_GROUP_RECORDS_COUNT, records.size());
  long startTime = System.currentTimeMillis();
  //do group
  records.stream().forEach(r -> groupForOneRecord(r));
  logger.debug("Table Size in this Batch is " + tables.size());
  //statistic after
  long timeThrough = System.currentTimeMillis() - startTime;
  writerStatistic.getGroupLoadStatistics().get(key).getExtendStatistic().put(StatisticKey.TABLE_GROUP_TIME_THROUGH, timeThrough);
  writerStatistic.getGroupLoadStatistics().get(key).getExtendStatistic().put(StatisticKey.TABLE_GROUP_TABLE_COUNT, tables.size());
  writerStatistic.getGroupLoadStatistics().get(key).getExtendStatistic().put(StatisticKey.TABLE_GROUP_TIME_PER_RECORD,
      new BigDecimal(((double) timeThrough) / records.size()).setScale(2, RoundingMode.UP).doubleValue());
}

public void loadByGroup(Function<List<List<RdbEventRecord>>, Void> function) {
  if (context.getWriterParameter().isMerging()) {
    // 优先执行delete语句，针对uniqe更新，一般会进行delete + insert的处理模式，避免并发更新
    loadDeletedRecords(function);
    loadInsertAndUpdatedRecords(function);
  } else {
    boolean originalUseBatch = context.getWriterParameter().isUseBatch();
    try {
      context.getWriterParameter().setUseBatch(false);//禁用batch模式，因为按表分组并没有根据相同sql做聚合
      loadRecordsByTable(function);
    } finally {
      context.getWriterParameter().setUseBatch(originalUseBatch);//结束时把useBatch设置为之前的值,避免影响其它地方用
    }
  }
}

/**
 * 按照MediaMapping的配置，重建数据，并对数据进行清洗和转换
 */
protected RecordChunk<T> transform(RecordChunk<T> recordChunk, TaskWriterContext context) {
  //statistic before
  WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY);
  writerStatistic.setRecordsCountBeforeTransform(recordChunk.getRecords().size());
  long startTime = System.currentTimeMillis();
  //do transform
  Transformer<T> transformer = TransformerFactory.
      getTransformer(recordChunk.getRecords().get(0).getClass());
  RecordChunk<T> newChunk = transformer.transform(recordChunk, context);
  //statistic after
  writerStatistic.setRecordsCountAfterTransform(newChunk.getRecords().size());
  writerStatistic.setTimeForTransform(System.currentTimeMillis() - startTime);
  return newChunk;
}

private int loadOne(TaskWriterContext context, T record, DbDialect dbDialect, LobCreator lobCreator, JdbcTemplate template) {
  return template.update(getSql(record), ps -> {
    fillPreparedStatement(ps, lobCreator, record, dbDialect, context.getWriterParameter().isSyncAutoAddColumn());
  });
}

private void buildSql(List<RdbEventRecord> records, TaskWriterContext context) {
  WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY);
  long key = RecordMeta.mediaMapping(records.get(0)).getTargetMediaSource().getId();
  writerStatistic.getGroupLoadStatistics().get(key).getExtendStatistic().put(StatisticKey.SQL_BUILD_RECORDS_COUNT, records.size());

private void loadInsertAndUpdatedRecords(Function<List<List<RdbEventRecord>>, Void> function) {
  List<List<RdbEventRecord>> batchRecords = new ArrayList<>();
  for (RecordGroupHolder.TableLoadData tableData : this.tables) {
    if (context.getWriterParameter().isUseBatch()) {
      batchRecords.addAll(split(tableData.getInsertDatas(), context.getWriterParameter().getBatchSize()));
      batchRecords.addAll(split(tableData.getUpdateDatas(), context.getWriterParameter().getBatchSize()));
    } else {
      for (RdbEventRecord data : tableData.getInsertDatas()) {
        batchRecords.add(Arrays.asList(data));
      }
      for (RdbEventRecord data : tableData.getUpdateDatas()) {
        batchRecords.add(Arrays.asList(data));
      }
    }
  }
  function.apply(batchRecords);
}

WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY);
writerStatistic.setRecordsCountBeforeGroup(recordChunk.getRecords().size());
long startTime = System.currentTimeMillis();

Javadoc

Created by lubiao on 2017/3/9.

Most used methods

getWriterParameter
Get the Parameter for the TaskWriter.
taskWriterSession
Get the writer-part session of this task,so the scope of the session data is just for TaskWriter.
taskId
beginSession
getService

Popular in Java

Start an intent from android
compareTo (BigDecimal)
startActivity (Activity)
runOnUiThread (Activity)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
Socket (java.net)
Provides a client-side TCP socket.
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
VirtualMachine (com.sun.tools.attach)
A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
BoxLayout (javax.swing)
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
Top plugins for WebStorm

How to useTaskWriterContext in com.ucar.datalink.worker.api.task

Best Java code snippets using com.ucar.datalink.worker.api.task.TaskWriterContext (Showing top 20 results out of 315)

How to use
TaskWriterContext
in
com.ucar.datalink.worker.api.task