/** * Handles common parameter initialization that a subclass might want to leverage. * @param context * @param conf */ protected void doSetup(Context context, Configuration conf) { // If a custom separator has been used, // decode it back from Base64 encoding. separator = conf.get(ImportTsv.SEPARATOR_CONF_KEY); if (separator == null) { separator = ImportTsv.DEFAULT_SEPARATOR; } else { separator = Bytes.toString(Base64.getDecoder().decode(separator)); } // Should never get 0 as we are setting this to a valid value in job configuration. ts = conf.getLong(ImportTsv.TIMESTAMP_CONF_KEY, 0); skipBadLines = context.getConfiguration().getBoolean(ImportTsv.SKIP_LINES_CONF_KEY, true); badLineCount = context.getCounter("ImportTsv", "Bad Lines"); }
@Override protected void doCleanup(Context context) throws IOException, InterruptedException { long grandTotal = 0; for (HLLCounter hll : cuboidHLLMap.values()) { grandTotal += hll.getCountEstimate(); } double mapperOverlapRatio = grandTotal == 0 ? 0 : (double) totalRowsBeforeMerge / grandTotal; CubeStatsWriter.writePartialCuboidStatistics(context.getConfiguration(), new Path(output), // cuboidHLLMap, samplingPercentage, baseCuboidRowCountInMappers.size(), mapperOverlapRatio, taskId); } }
@Override protected void setup(Context context) throws IOException, InterruptedException { this.outKey = new AvroKey<>(); this.deltaComparatorOptional = Optional.absent(); Configuration conf = context.getConfiguration(); String deltaSchemaProviderClassName = conf.get(DELTA_SCHEMA_PROVIDER); if (deltaSchemaProviderClassName != null) { this.deltaFieldNamesProvider = GobblinConstructorUtils.invokeConstructor(AvroDeltaFieldNameProvider.class, deltaSchemaProviderClassName, conf); this.deltaComparatorOptional = Optional.of(new AvroValueDeltaSchemaComparator(deltaFieldNamesProvider)); } }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(Locale.ROOT); // only used in Build job, not in Merge job cuboidLevel = context.getConfiguration().getInt(BatchConstants.CFG_CUBE_CUBOID_LEVEL, 0); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeDesc = CubeManager.getInstance(config).getCube(cubeName).getDescriptor(); measuresDescs = cubeDesc.getMeasures(); codec = new BufferedMeasureCodec(measuresDescs); aggs = new MeasureAggregators(measuresDescs); input = new Object[measuresDescs.size()]; result = new Object[measuresDescs.size()]; List<Integer> needAggMeasuresList = Lists.newArrayList(); for (int i = 0; i < measuresDescs.size(); i++) { if (cuboidLevel == 0) { needAggMeasuresList.add(i); } else { if (!measuresDescs.get(i).getFunction().getMeasureType().onlyAggrInBaseCuboid()) { needAggMeasuresList.add(i); } } } needAggrMeasures = new int[needAggMeasuresList.size()]; for (int i = 0; i < needAggMeasuresList.size(); i++) { needAggrMeasures[i] = needAggMeasuresList.get(i); } }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); mos = new MultipleOutputs(context); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); List<TblColRef> uhcColumns = cubeDesc.getAllUHCColumns(); int taskId = context.getTaskAttemptID().getTaskID().getId(); col = uhcColumns.get(taskId); logger.info("column name: " + col.getIdentity()); if (cube.getDescriptor().getShardByColumns().contains(col)) { //for ShardByColumns builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); builder.init(null, 0, null); } else { //for GlobalDictionaryColumns String hdfsDir = conf.get(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR); DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype()); String builderClass = cubeDesc.getDictionaryBuilderClass(col); builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass); builder.init(dictionaryInfo, 0, hdfsDir); } }
/** * Handles initializing this class with objects specific to it (i.e., the parser). * Common initialization that might be leveraged by a subsclass is done in * <code>doSetup</code>. Hence a subclass may choose to override this method * and call <code>doSetup</code> as well before handling it's own custom params. * * @param context */ @Override protected void setup(Context context) { Configuration conf = context.getConfiguration(); doSetup(context, conf); parser = new ImportTsv.TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator); if (parser.getRowKeyColumnIndex() == -1) { throw new RuntimeException("No row key column specified"); } this.kvCreator = new CellCreator(conf); }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); cubeConfig = cube.getConfig(); baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId(); baseCuboidRowCountInMappers = Lists.newLinkedList(); output = conf.get(BatchConstants.CFG_OUTPUT_PATH); samplingPercentage = Integer .parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT)); taskId = context.getTaskAttemptID().getTaskID().getId(); cuboidHLLMap = Maps.newHashMap(); }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(Locale.ROOT); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); List<MeasureDesc> measuresDescs = cubeDesc.getMeasures(); codec = new BufferedMeasureCodec(measuresDescs); aggs = new MeasureAggregators(measuresDescs); input = new Object[measuresDescs.size()]; result = new Object[measuresDescs.size()]; outputKey = new Text(); outputValue = new Text(); }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); }
protected void cleanup(Context context) throws IOException, InterruptedException { Configuration c = context.getConfiguration(); String startRow = c.get(KEY_STARTROW); String lastRow = c.get(KEY_LASTROW); LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\""); LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\""); if (startRow != null && startRow.length() > 0) { assertEquals(startRow, first); } if (lastRow != null && lastRow.length() > 0) { assertEquals(lastRow, last); } }
/** {@inheritDoc} */ @Override protected void setup(Context ctx) throws IOException, InterruptedException { X.println("___ Reducer: " + ctx.getTaskAttemptID()); String taskId = ctx.getTaskAttemptID().toString(); String workDir = FileSystem.getLocal(ctx.getConfiguration()).getWorkingDirectory().toString(); assertNull(taskWorkDirs.put(workDir, taskId)); }
private static void logError(String msg, Context context) throws IOException { TableName table = getTableName(context.getConfiguration()); LOG.error("Failure in chain verification: " + msg); try (Connection connection = ConnectionFactory.createConnection(context.getConfiguration()); Admin admin = connection.getAdmin()) { LOG.error("cluster status:\n" + admin.getClusterStatus()); LOG.error("table regions:\n" + Joiner.on("\n").join(admin.getTableRegions(table))); } } }
@Override protected void setup(Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); this.kvCreator = new CellCreator(conf); }
@Override protected void setup(Reducer<BytesWritable, BytesWritable, BytesWritable, BytesWritable>.Context context) throws IOException, InterruptedException { super.setup(context); this.connection = ConnectionFactory.createConnection(context.getConfiguration()); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { Configuration c = context.getConfiguration(); cleanup(c); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { metricNames.add(aggregators[i].getName()); combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); aggregators = config.getSchema().getDataSchema().getAggregators(); combiningAggs = new AggregatorFactory[aggregators.length]; for (int i = 0; i < aggregators.length; ++i) { combiningAggs[i] = aggregators[i].getCombiningFactory(); } typeHelperMap = InputRowSerde.getTypeHelperMap(config.getSchema() .getDataSchema() .getParser() .getParseSpec() .getDimensionsSpec()); }
@Override protected void setup(Context context) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); determineIntervals = !config.getSegmentGranularIntervals().isPresent(); }
@Override protected void setup(Context context) { if (config == null) { synchronized (DeterminePartitionsDimSelectionBaseReducer.class) { if (config == null) { config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); } } } }