/** * Initialize this TaskWriter with the specified context object. */ @SuppressWarnings("unchecked") public void initialize(TaskWriterContext context) { this.context = context; this.parameter = (T) context.getWriterParameter(); this.handlers = CacheBuilder.newBuilder().build(new CacheLoader<Class<? extends Record>, Handler>() { @Override public Handler load(Class<? extends Record> clazz) throws Exception { Handler handler = getHandler(clazz); if (handler == null) { throw new RecordNotSupportException(clazz); } handler.initialize(context); return handler; } }); }
/** * 对分组后的数据进行最终的load操作 */ protected void load(List<RecordChunk<T>> recordChunkList, final TaskWriterContext context) { //statistic before WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY); long startTime = System.currentTimeMillis(); //do load submitAndWait(recordChunkList, context); //statistic after writerStatistic.setTimeForLoad(System.currentTimeMillis() - startTime); }
public void prePush() { context.beginSession(); context.taskWriterSession().setData(WriterStatistic.KEY, new WriterStatistic(context.taskId(), parameter)); }
protected RecordChunk<T> mapping(RecordChunk<T> recordChunk, TaskWriterContext context) { WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY); writerStatistic.setRecordsCountBeforeMapping(recordChunk.getRecords().size()); Long startTime = System.currentTimeMillis(); Long taskId = Long.valueOf(context.taskId()); RecordChunk<T> newChunk = recordChunk.copyWithoutRecords(); List<MediaMappingInfo> list = context.getService(MediaService.class). getMediaMappingsByMediaAndTarget(taskId, record.RSI().getNamespace(), record.RSI().getName(), context.getWriterParameter().getSupportedSourceTypes(), true);
/** * 需要的话,可以对数据进行merge操作 */ protected RecordChunk<T> merge(RecordChunk<T> recordChunk, TaskWriterContext context) { //statistic before WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY); writerStatistic.setRecordsCountBeforeMerge(recordChunk.getRecords().size()); Long startTime = System.currentTimeMillis(); //do merge if (context.getWriterParameter().isMerging()) { Merger<T> merger = MergerFactory. getMerger(recordChunk.getRecords().get(0).getClass()); RecordChunk<T> newChunk = merger.merge(recordChunk); //statistic after writerStatistic.setTimeForMerge(System.currentTimeMillis() - startTime); writerStatistic.setRecordsCountAfterMerge(newChunk.getRecords().size()); return newChunk; } else { //statistic after writerStatistic.setTimeForMerge(System.currentTimeMillis() - startTime); writerStatistic.setRecordsCountAfterMerge(recordChunk.getRecords().size()); return recordChunk; } }
@Override public void initialize(TaskWriterContext context) { PluginWriterParameter parameter = context.getWriterParameter(); List<MediaMappingInfo> taskMappings = context.getService(MediaService.class) .getMediaMappingsByTask(Long.valueOf(context.taskId()), false) .stream() .filter(i -> parameter.getSupportedSourceTypes().contains(i.getTargetMediaSource().getType())) new SynchronousQueue<>(), new NamedThreadFactory( MessageFormat.format("Task-{0}-Writer-{1}-chunk", context.taskId(), parameter.getPluginName()) ), new ThreadPoolExecutor.CallerRunsPolicy() new LinkedBlockingQueue<>(), new NamedThreadFactory( MessageFormat.format("Task-{0}-Writer-{1}-load", context.taskId(), parameter.getPluginName()) ));
public BaseRecordHandler(HdfsTaskWriter hdfsTaskWriter, TaskWriterContext taskWriterContext, HdfsWriterParameter hdfsWriterParameter) { this.hdfsTaskWriter = hdfsTaskWriter; this.hdfsWriterParameter = hdfsWriterParameter; this.hdfsFilePathGenerator = new HdfsFilePathGenerator(taskWriterContext, hdfsWriterParameter); this.fileStreamHolder = new FileStreamHolder(taskWriterContext.taskId(), hdfsWriterParameter); }
if (records.size() > 0) { WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY); long key = RecordMeta.mediaMapping(records.get(0)).getTargetMediaSource().getId(); long tableGroupStartTime = System.currentTimeMillis(); MediaMappingInfo mappingInfo = RecordMeta.mediaMapping(mr.getValue().get(0)); List<List<T>> list = BatchSplitter.splitForBatch(mr.getValue(), context.getWriterParameter().getBatchSize()); for (List<T> item : list) { Map<String, List<String>> datas = buildData(item, context);
public RecordChunk<T> transform(RecordChunk<T> recordChunk, TaskWriterContext context) { Long taskId = Long.valueOf(context.taskId()); RecordChunk<T> newChunk = recordChunk.copyWithoutRecords(); for (T record : recordChunk.getRecords()) { T result = transformOne(record, RecordMeta.mediaMapping(record), context); if (result != null) { newChunk.merge(result); } } return newChunk; }
private void loadInGlobalMode(List<RdbEventRecord> records, TaskWriterContext context) { boolean originalUseBatch = context.getWriterParameter().isUseBatch(); try { context.getWriterParameter().setUseBatch(false);//禁用batch模式,因为按表分组并没有根据相同sql做聚合 List<List<RdbEventRecord>> list = new ArrayList<>(); list.add(records); doHandle(context, list); } finally { context.getWriterParameter().setUseBatch(originalUseBatch);//结束时把useBatch设置为之前的值,避免影响其它地方用 } }
public void postPush() { BaseWriterStatistic statistic = context.taskWriterSession().getData(WriterStatistic.KEY); if (parameter.isPerfStatistic()) { logger.info(statistic.toJsonString()); } }
private String getHbaseFilePath(String schemaName, String tableName, MediaMappingInfo mappingInfo) { FileSplitMode splitMode = getFileSplitMode(mappingInfo); String dateStr = getDateStr(); String taskFileName = tableName + "-" + taskWriterContext.taskId(); if ("default".equals(schemaName)) { if (splitMode.equals(FileSplitMode.DAY)) { return MessageFormat.format(FILE_PATH_HBASE_PATTERN_DAY,hdfsWriterParameter.getHbasePath(), tableName, dateStr, taskFileName); } else { return MessageFormat.format(FILE_PATH_HBASE_PATTERN_DAY_SPLIT,hdfsWriterParameter.getHbasePath(), tableName, dateStr, buildSplitRange(splitMode), taskFileName); } } else { if (splitMode.equals(FileSplitMode.DAY)) { return MessageFormat.format(FILE_PATH_HBASE_NAMESPACE_PATTERN_DAY, hdfsWriterParameter.getHbasePath(),schemaName, tableName, dateStr, taskFileName); } else { return MessageFormat.format(FILE_PATH_HBASE_NAMESPACE_PATTERN_DAY_SPLIT, hdfsWriterParameter.getHbasePath(),schemaName, tableName, dateStr, buildSplitRange(splitMode), taskFileName); } } }
private void loadDeletedRecords(Function<List<List<RdbEventRecord>>, Void> function) { List<List<RdbEventRecord>> batchRecords = new ArrayList<>(); for (RecordGroupHolder.TableLoadData tableData : this.tables) { if (context.getWriterParameter().isUseBatch()) { batchRecords.addAll(split(tableData.getDeleteDatas(), context.getWriterParameter().getBatchSize())); } else { for (RdbEventRecord data : tableData.getDeleteDatas()) { batchRecords.add(Arrays.asList(data)); } } } function.apply(batchRecords); }
private void doGroup(List<RdbEventRecord> records, TaskWriterContext context) { //statistic before WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY); long key = RecordMeta.mediaMapping(records.get(0)).getTargetMediaSource().getId(); writerStatistic.getGroupLoadStatistics().get(key).getExtendStatistic().put(StatisticKey.TABLE_GROUP_RECORDS_COUNT, records.size()); long startTime = System.currentTimeMillis(); //do group records.stream().forEach(r -> groupForOneRecord(r)); logger.debug("Table Size in this Batch is " + tables.size()); //statistic after long timeThrough = System.currentTimeMillis() - startTime; writerStatistic.getGroupLoadStatistics().get(key).getExtendStatistic().put(StatisticKey.TABLE_GROUP_TIME_THROUGH, timeThrough); writerStatistic.getGroupLoadStatistics().get(key).getExtendStatistic().put(StatisticKey.TABLE_GROUP_TABLE_COUNT, tables.size()); writerStatistic.getGroupLoadStatistics().get(key).getExtendStatistic().put(StatisticKey.TABLE_GROUP_TIME_PER_RECORD, new BigDecimal(((double) timeThrough) / records.size()).setScale(2, RoundingMode.UP).doubleValue()); }
public void loadByGroup(Function<List<List<RdbEventRecord>>, Void> function) { if (context.getWriterParameter().isMerging()) { // 优先执行delete语句,针对uniqe更新,一般会进行delete + insert的处理模式,避免并发更新 loadDeletedRecords(function); loadInsertAndUpdatedRecords(function); } else { boolean originalUseBatch = context.getWriterParameter().isUseBatch(); try { context.getWriterParameter().setUseBatch(false);//禁用batch模式,因为按表分组并没有根据相同sql做聚合 loadRecordsByTable(function); } finally { context.getWriterParameter().setUseBatch(originalUseBatch);//结束时把useBatch设置为之前的值,避免影响其它地方用 } } }
/** * 按照MediaMapping的配置,重建数据,并对数据进行清洗和转换 */ protected RecordChunk<T> transform(RecordChunk<T> recordChunk, TaskWriterContext context) { //statistic before WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY); writerStatistic.setRecordsCountBeforeTransform(recordChunk.getRecords().size()); long startTime = System.currentTimeMillis(); //do transform Transformer<T> transformer = TransformerFactory. getTransformer(recordChunk.getRecords().get(0).getClass()); RecordChunk<T> newChunk = transformer.transform(recordChunk, context); //statistic after writerStatistic.setRecordsCountAfterTransform(newChunk.getRecords().size()); writerStatistic.setTimeForTransform(System.currentTimeMillis() - startTime); return newChunk; }
private int loadOne(TaskWriterContext context, T record, DbDialect dbDialect, LobCreator lobCreator, JdbcTemplate template) { return template.update(getSql(record), ps -> { fillPreparedStatement(ps, lobCreator, record, dbDialect, context.getWriterParameter().isSyncAutoAddColumn()); }); }
private void buildSql(List<RdbEventRecord> records, TaskWriterContext context) { WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY); long key = RecordMeta.mediaMapping(records.get(0)).getTargetMediaSource().getId(); writerStatistic.getGroupLoadStatistics().get(key).getExtendStatistic().put(StatisticKey.SQL_BUILD_RECORDS_COUNT, records.size());
private void loadInsertAndUpdatedRecords(Function<List<List<RdbEventRecord>>, Void> function) { List<List<RdbEventRecord>> batchRecords = new ArrayList<>(); for (RecordGroupHolder.TableLoadData tableData : this.tables) { if (context.getWriterParameter().isUseBatch()) { batchRecords.addAll(split(tableData.getInsertDatas(), context.getWriterParameter().getBatchSize())); batchRecords.addAll(split(tableData.getUpdateDatas(), context.getWriterParameter().getBatchSize())); } else { for (RdbEventRecord data : tableData.getInsertDatas()) { batchRecords.add(Arrays.asList(data)); } for (RdbEventRecord data : tableData.getUpdateDatas()) { batchRecords.add(Arrays.asList(data)); } } } function.apply(batchRecords); }
WriterStatistic writerStatistic = context.taskWriterSession().getData(WriterStatistic.KEY); writerStatistic.setRecordsCountBeforeGroup(recordChunk.getRecords().size()); long startTime = System.currentTimeMillis();