@Override protected void doSetup(Context context) throws IOException { tmpBuf = ByteBuffer.allocate(4096); Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeInstance cube = CubeManager.getInstance(config).getCube(conf.get(BatchConstants.CFG_CUBE_NAME)); List<TblColRef> uhcColumns = cube.getDescriptor().getAllUHCColumns(); FileSplit fileSplit = (FileSplit) context.getInputSplit(); String colName = fileSplit.getPath().getParent().getName(); for (int i = 0; i < uhcColumns.size(); i++) { if (uhcColumns.get(i).getIdentity().equalsIgnoreCase(colName)) { index = i; break; } } type = uhcColumns.get(index).getType(); //for debug logger.info("column name: " + colName); logger.info("index: " + index); logger.info("type: " + type); }
@Override protected void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY)); sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null); targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY, TableOutputFormat.OUTPUT_CONF_PREFIX); sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY); targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY); dryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false); doDeletes = conf.getBoolean(DO_DELETES_CONF_KEY, true); doPuts = conf.getBoolean(DO_PUTS_CONF_KEY, true); sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir); LOG.info("Read source hash manifest: " + sourceTableHash); LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys"); TableSplit split = (TableSplit) context.getInputSplit(); ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow()); sourceHashReader = sourceTableHash.newReader(conf, splitStartKey); findNextKeyHashPair(); // create a hasher, but don't start it right away // instead, find the first hash batch at or after the start row // and skip any rows that come before. they will be caught by the previous task targetHasher = new HashTable.ResultHasher(); }
@Override protected void doSetup(Context context) throws IOException { Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSeg = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID)); flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat(); intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSeg), cubeDesc); globalColumns = cubeDesc.getAllGlobalDictColumns(); globalColumnIndex = new int[globalColumns.size()]; globalColumnValues = Lists.newArrayListWithExpectedSize(globalColumns.size()); for (int i = 0; i < globalColumns.size(); i++) { TblColRef colRef = globalColumns.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); globalColumnIndex[i] = columnIndexOnFlatTbl; globalColumnValues.add(Sets.<String> newHashSet()); } splitKey = DictionaryGetterUtil.getInputSplitSignature(cubeSeg, context.getInputSplit()); }
@SuppressWarnings("unchecked") protected void setup(Context context) throws IOException, InterruptedException { // Find the Mapper from the TaggedInputSplit. TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit(); mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit .getMapperClass(), context.getConfiguration()); }
@SuppressWarnings("unchecked") protected void setup(Context context) throws IOException, InterruptedException { // Find the Mapper from the TaggedInputSplit. TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit(); mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit .getMapperClass(), context.getConfiguration()); }
@SuppressWarnings("unchecked") protected void setup(Context context) throws IOException, InterruptedException { // Find the Mapper from the TaggedInputSplit. TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit(); mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit .getMapperClass(), context.getConfiguration()); }
@SuppressWarnings("unchecked") protected void setup(Context context) throws IOException, InterruptedException { // Find the Mapper from the TaggedInputSplit. TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit(); mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit .getMapperClass(), context.getConfiguration()); }
@Override protected void setup(Context context) { String filename = ((FileSplit) context.getInputSplit()).getPath().getName(); documentId = new Text(filename); }
@Override protected void setup(Context context) throws IOException, InterruptedException { FileSplit split = (FileSplit) context.getInputSplit(); flag = split.getPath().getName();// 判断读的数据集 }
@SuppressWarnings("unchecked") protected void setup(Context context) throws IOException, InterruptedException { // Find the Mapper from the TaggedInputSplit. TaggedInputSplit inputSplit = (TaggedInputSplit) context.getInputSplit(); mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit .getMapperClass(), context.getConfiguration()); }
@Override protected void setup(Context context) throws IOException, InterruptedException { // assert that the user gets FileInputSplit (as opposed to the MultiInputTaggedSplit) from the context Preconditions.checkArgument(context.getInputSplit() instanceof FileSplit); try { // assert that the user gets the TextInputFormat, as opposed to the MultiInputFormat from the context Preconditions.checkArgument(context.getInputFormatClass() == TextInputFormat.class); } catch (ClassNotFoundException e) { Throwables.propagate(e); } }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.publishConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(); segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME).toUpperCase(); config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration()); cubeManager = CubeManager.getInstance(config); cube = cubeManager.getCube(cubeName); cubeDesc = cube.getDescriptor(); mergedCubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW); // int colCount = cubeDesc.getRowkey().getRowKeyColumns().length; newKeyBuf = new byte[256];// size will auto-grow // decide which source segment InputSplit inputSplit = context.getInputSplit(); String filePath = ((FileSplit) inputSplit).getPath().toString(); System.out.println("filePath:" + filePath); String jobID = extractJobIDFromPath(filePath); System.out.println("jobID:" + jobID); sourceCubeSegment = findSegmentWithUuid(jobID, cube); System.out.println(sourceCubeSegment); this.rowKeySplitter = new RowKeySplitter(sourceCubeSegment, 65, 255); }
sourceTable = sourceConnection.getTable(tableName); final InputSplit tableSplit = context.getInputSplit();
flatDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); dictionaryMap = DictionaryGetterUtil.getDictionaryMap(cubeSegment, context.getInputSplit(), conf);
@Override protected void map(String key, String value, final Context context) throws IOException, InterruptedException final InputSplit split = context.getInputSplit(); if (!(split instanceof DatasourceInputSplit)) { throw new IAE(
private void finishRemainingHashRanges(Context context) throws IOException, InterruptedException { TableSplit split = (TableSplit) context.getInputSplit(); byte[] splitEndRow = split.getEndRow(); boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
@Override protected void map(AvroKey<GenericRecord> key, NullWritable value, Context context) throws IOException, InterruptedException { if (context.getNumReduceTasks() == 0) { context.write(key, NullWritable.get()); } else { populateComparableKeyRecord(key.datum(), this.outKey.datum()); this.outValue.datum(key.datum()); try { context.write(this.outKey, this.outValue); } catch (AvroRuntimeException e) { final Path[] paths = ((CombineFileSplit) context.getInputSplit()).getPaths(); throw new IOException("Unable to process paths " + StringUtils.join(paths, ','), e); } } context.getCounter(EVENT_COUNTER.RECORD_COUNT).increment(1); }
@Override protected void doSetup(Context context) throws IOException, InterruptedException { super.bindCurrentConfiguration(context.getConfiguration()); String cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); CubeManager cubeManager = CubeManager.getInstance(config); CubeInstance cube = cubeManager.getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); CubeSegment mergedCubeSegment = cube.getSegmentById(segmentID); // decide which source segment FileSplit fileSplit = (FileSplit) context.getInputSplit(); IMROutput2.IMRMergeOutputFormat outputFormat = MRUtil.getBatchMergeOutputSide2(mergedCubeSegment) .getOuputFormat(); CubeSegment sourceCubeSegment = outputFormat.findSourceSegment(fileSplit, cube); reEncoder = new SegmentReEncoder(cubeDesc, sourceCubeSegment, mergedCubeSegment, config); }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinPropsAndMetadata(); cube = CubeManager.getInstance(kylinConfig).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSegment = cube.getSegmentById(segmentID); CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); Map<TblColRef, Dictionary<String>> dictionaryMap = DictionaryGetterUtil.getDictionaryMap(cubeSegment, context.getInputSplit(), context.getConfiguration()); baseCuboidBuilder = new BaseCuboidBuilder(kylinConfig, cubeDesc, cubeSegment, intermediateTableDesc, dictionaryMap); }
@Override protected void setup(Context context) throws IOException, InterruptedException { targetBatchSize = context.getConfiguration() .getLong(HASH_BATCH_SIZE_CONF_KEY, DEFAULT_BATCH_SIZE); hasher = new ResultHasher(); TableSplit split = (TableSplit) context.getInputSplit(); hasher.startBatch(new ImmutableBytesWritable(split.getStartRow())); }