public BaseCuboidBuilder(KylinConfig kylinConfig, CubeDesc cubeDesc, CubeSegment cubeSegment, CubeJoinedFlatTableEnrich intermediateTableDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { this.kylinConfig = kylinConfig; this.cubeDesc = cubeDesc; this.cubeSegment = cubeSegment; this.intermediateTableDesc = intermediateTableDesc; this.dictionaryMap = dictionaryMap; Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc); rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid); measureDescList = cubeDesc.getMeasures(); aggrIngesters = MeasureIngester.create(measureDescList); measureCodec = new BufferedMeasureCodec(measureDescList); kvBuilder = new KeyValueBuilder(intermediateTableDesc); }
public byte[] buildKey(String[] flatRow) { String[] colKeys = kvBuilder.buildKey(flatRow); return rowKeyEncoder.encode(colKeys); }
private void initVariables(Long cuboidId) { rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, Cuboid.findForMandatory(cubeDesc, cuboidId)); keyBuf = rowKeyEncoder.createBuf(); dimensions = Long.bitCount(cuboidId); measureColumns = new ImmutableBitSet(dimensions, dimensions + measureCount); } }
@Test public void testEncodeWithSlr() throws Exception { CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITH_SLR_READY"); // CubeSegment seg = cube.getTheOnlySegment(); CubeDesc cubeDesc = cube.getDescriptor(); // String data = // "1234567892013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular"; byte[][] data = new byte[9][]; data[0] = Bytes.toBytes("123456789"); data[1] = Bytes.toBytes("2012-12-15"); data[2] = Bytes.toBytes("11848"); data[3] = Bytes.toBytes("Health & Beauty"); data[4] = Bytes.toBytes("Fragrances"); data[5] = Bytes.toBytes("Women"); data[6] = Bytes.toBytes("FP-GTC"); data[7] = Bytes.toBytes("0"); data[8] = Bytes.toBytes("15"); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); AbstractRowKeyEncoder rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cube.getFirstSegment(), baseCuboid); byte[] encodedKey = rowKeyEncoder.encode(data); assertEquals(48, encodedKey.length); byte[] sellerId = Arrays.copyOfRange(encodedKey, 8, 26); byte[] cuboidId = Arrays.copyOfRange(encodedKey, 0, 8); byte[] rest = Arrays.copyOfRange(encodedKey, 26, encodedKey.length); assertTrue(Bytes.toString(sellerId).startsWith("123456789")); assertEquals(511, Bytes.toLong(cuboidId)); assertArrayEquals(new byte[] { 11, 55, -13, 13, 22, 34, 121, 70, 80, 45, 71, 84, 67, 9, 9, 9, 9, 9, 9, 0, 10, 5 }, rest); }
private void init(Collection<ColumnValueRange> andDimensionRanges) { int size = andDimensionRanges.size(); Map<TblColRef, String> startValues = Maps.newHashMapWithExpectedSize(size); Map<TblColRef, String> stopValues = Maps.newHashMapWithExpectedSize(size); Map<TblColRef, Set<String>> fuzzyValues = Maps.newHashMapWithExpectedSize(size); for (ColumnValueRange dimRange : andDimensionRanges) { TblColRef column = dimRange.getColumn(); startValues.put(column, dimRange.getBeginValue()); stopValues.put(column, dimRange.getEndValue()); fuzzyValues.put(column, dimRange.getEqualValues()); TblColRef partitionDateColumnRef = cubeSeg.getCubeDesc().getModel().getPartitionDesc().getPartitionDateColumnRef(); if (column.equals(partitionDateColumnRef)) { initPartitionRange(dimRange); } } AbstractRowKeyEncoder encoder = AbstractRowKeyEncoder.createInstance(cubeSeg, cuboid); encoder.setBlankByte(RowConstants.ROWKEY_LOWER_BYTE); this.startKey = encoder.encode(startValues); encoder.setBlankByte(RowConstants.ROWKEY_UPPER_BYTE); // In order to make stopRow inclusive add a trailing 0 byte. #See // Scan.setStopRow(byte [] stopRow) this.stopKey = Bytes.add(encoder.encode(stopValues), ZERO_TAIL_BYTES); // restore encoder defaults for later reuse (note // AbstractRowKeyEncoder.createInstance() caches instances) encoder.setBlankByte(AbstractRowKeyEncoder.DEFAULT_BLANK_BYTE); // always fuzzy match cuboid ID to lock on the selected cuboid this.fuzzyKeys = buildFuzzyKeys(fuzzyValues); }
@Test public void testEncodeWithoutSlr() throws Exception { CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITHOUT_SLR_READY"); // CubeSegment seg = cube.getTheOnlySegment(); CubeDesc cubeDesc = cube.getDescriptor(); // String data = // "2013-08-18Abbigliamento e accessoriDonna: AccessoriSciarpFoulard e ScialliAuctionItalyRegular"; byte[][] data = new byte[8][]; data[0] = Bytes.toBytes("2012-12-15"); data[1] = Bytes.toBytes("11848"); data[2] = Bytes.toBytes("Health & Beauty"); data[3] = Bytes.toBytes("Fragrances"); data[4] = Bytes.toBytes("Women"); data[5] = Bytes.toBytes("FP-GTC"); data[6] = Bytes.toBytes("0"); data[7] = Bytes.toBytes("15"); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); AbstractRowKeyEncoder rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cube.getFirstSegment(), baseCuboid); byte[] encodedKey = rowKeyEncoder.encode(data); assertEquals(30, encodedKey.length); byte[] cuboidId = Arrays.copyOfRange(encodedKey, 0, 8); byte[] rest = Arrays.copyOfRange(encodedKey, 8, encodedKey.length); assertEquals(255, Bytes.toLong(cuboidId)); assertArrayEquals(new byte[] { 11, 55, -13, 13, 22, 34, 121, 70, 80, 45, 71, 84, 67, 9, 9, 9, 9, 9, 9, 0, 10, 5 }, rest); }
@Test public void testEncodeAndDecodeWithUtf8() throws IOException { CubeInstance cube = CubeManager.getInstance(getTestConfig()).getCube("TEST_KYLIN_CUBE_WITHOUT_SLR_READY"); CubeDesc cubeDesc = cube.getDescriptor(); byte[][] data = new byte[8][]; data[0] = Bytes.toBytes("2012-12-15"); data[1] = Bytes.toBytes("11848"); data[2] = Bytes.toBytes("Health & Beauty"); data[3] = Bytes.toBytes("Fragrances"); data[4] = Bytes.toBytes("Women"); data[5] = Bytes.toBytes("刊登格式测试");// UTF-8 data[6] = Bytes.toBytes("0"); data[7] = Bytes.toBytes("15"); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); AbstractRowKeyEncoder rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cube.getFirstSegment(), baseCuboid); byte[] encodedKey = rowKeyEncoder.encode(data); assertEquals(30, encodedKey.length); RowKeyDecoder rowKeyDecoder = new RowKeyDecoder(cube.getFirstSegment()); rowKeyDecoder.decode(encodedKey); List<String> names = rowKeyDecoder.getNames(null); List<String> values = rowKeyDecoder.getValues(); assertEquals("[CAL_DT, LEAF_CATEG_ID, META_CATEG_NAME, CATEG_LVL2_NAME, CATEG_LVL3_NAME, LSTG_FORMAT_NAME, LSTG_SITE_ID, SLR_SEGMENT_CD]", names.toString()); assertEquals("[2012-12-15, 11848, Health & Beauty, Fragrances, Women, 刊登格式, 0, 15]", values.toString()); } }
@Override public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId); baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc, AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid), MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap()); initialized = true; } } } } baseCuboidBuilder.resetAggrs(); byte[] rowKey = baseCuboidBuilder.buildKey(rowArray); Object[] result = baseCuboidBuilder.buildValueObjects(rowArray); return new Tuple2<>(new ByteArray(rowKey), result); } }
@Override public void write(long cuboidId, GTRecord record) throws IOException { if (lastCuboidId == null || !lastCuboidId.equals(cuboidId)) { if (lastCuboidId != null) { logger.info("Cuboid " + lastCuboidId + " has " + cuboidRowCount + " rows"); cuboidRowCount = 0; } // output another cuboid initVariables(cuboidId); lastCuboidId = cuboidId; } cuboidRowCount++; rowKeyEncoder.encode(record, record.getInfo().getPrimaryKey(), keyBuf); //output measures valueBuf.clear(); try { record.exportColumns(measureColumns, valueBuf); } catch (BufferOverflowException boe) { valueBuf = ByteBuffer.allocate((int) (record.sizeOf(measureColumns) * 1.5)); record.exportColumns(measureColumns, valueBuf); } outputKey.set(keyBuf, 0, keyBuf.length); outputValue.set(valueBuf.array(), 0, valueBuf.position()); writeAsKeyValue(outputKey, outputValue); }
private void initVariables(Long cuboidId) { rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, Cuboid.findForMandatory(cubeDesc, cuboidId)); keyBuf = rowKeyEncoder.createBuf(); dimensions = Long.bitCount(cuboidId); measureColumns = new ImmutableBitSet(dimensions, dimensions + measureCount); } }
AbstractRowKeyEncoder rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cube.getFirstSegment(), baseCuboid); byte[] encodedKey = rowKeyEncoder.encode(data); assertEquals(48, encodedKey.length); byte[] sellerId = Arrays.copyOfRange(encodedKey, 8, 26);
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(); segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME); intermediateTableRowDelimiter = context.getConfiguration().get(BatchConstants.CFG_CUBE_INTERMEDIATE_TABLE_ROW_DELIMITER, Character.toString(BatchConstants.INTERMEDIATE_TABLE_ROW_DELIMITER)); if (Bytes.toBytes(intermediateTableRowDelimiter).length > 1) { throw new RuntimeException("Expected delimiter byte length is 1, but got " + Bytes.toBytes(intermediateTableRowDelimiter).length); } byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0]; KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration()); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment); bytesSplitter = new BytesSplitter(200, 4096); rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid); measureCodec = new MeasureCodec(cubeDesc.getMeasures()); measures = new Object[cubeDesc.getMeasures().size()]; int colCount = cubeDesc.getRowkey().getRowKeyColumns().length; keyBytesBuf = new byte[colCount][]; initNullBytes(); }
private byte[] buildKey(SplittedBytes[] splitBuffers) { int[] rowKeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes(); for (int i = 0; i < baseCuboid.getColumns().size(); i++) { int index = rowKeyColumnIndexes[i]; keyBytesBuf[i] = Arrays.copyOf(splitBuffers[index].value, splitBuffers[index].length); if (isNull(keyBytesBuf[i])) { keyBytesBuf[i] = null; } } return rowKeyEncoder.encode(keyBytesBuf); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(); segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration()); metadataManager = MetadataManager.getInstance(config); cube = CubeManager.getInstance(config).getCube(cubeName); cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW); cubeDesc = cube.getDescriptor(); factTableDesc = metadataManager.getTableDesc(cubeDesc.getFactTable()); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); // intermediateTableDesc = new // JoinedFlatTableDesc(cube.getDescriptor()); rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid); measureCodec = new MeasureCodec(cubeDesc.getMeasures()); measures = new Object[cubeDesc.getMeasures().size()]; int colCount = cubeDesc.getRowkey().getRowKeyColumns().length; keyBytesBuf = new byte[colCount][]; bytesSplitter = new BytesSplitter(factTableDesc.getColumns().length, 4096); nullValue = new byte[] { (byte) '\\', (byte) 'N' };// As in Hive, null // value is // represented by \N prepareJoins(); prepareMetrics(); }
return rowKeyEncoder.encode(keyBytesBuf);
public BaseCuboidBuilder(KylinConfig kylinConfig, CubeDesc cubeDesc, CubeSegment cubeSegment, CubeJoinedFlatTableEnrich intermediateTableDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { this.kylinConfig = kylinConfig; this.cubeDesc = cubeDesc; this.cubeSegment = cubeSegment; this.intermediateTableDesc = intermediateTableDesc; this.dictionaryMap = dictionaryMap; Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc); rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid); measureDescList = cubeDesc.getMeasures(); aggrIngesters = MeasureIngester.create(measureDescList); measureCodec = new BufferedMeasureCodec(measureDescList); kvBuilder = new KeyValueBuilder(intermediateTableDesc); }
public byte[] buildKey(String[] flatRow) { String[] colKeys = kvBuilder.buildKey(flatRow); return rowKeyEncoder.encode(colKeys); }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME).toUpperCase(); segmentName = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_NAME); intermediateTableRowDelimiter = context.getConfiguration().get(BatchConstants.CFG_CUBE_INTERMEDIATE_TABLE_ROW_DELIMITER, Character.toString(BatchConstants.INTERMEDIATE_TABLE_ROW_DELIMITER)); if (Bytes.toBytes(intermediateTableRowDelimiter).length > 1) { throw new RuntimeException("Expected delimiter byte length is 1, but got " + Bytes.toBytes(intermediateTableRowDelimiter).length); } byteRowDelimiter = Bytes.toBytes(intermediateTableRowDelimiter)[0]; KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(context.getConfiguration()); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSegment = cube.getSegment(segmentName, SegmentStatusEnum.NEW); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); intermediateTableDesc = new CubeJoinedFlatTableDesc(cube.getDescriptor(), cubeSegment); bytesSplitter = new BytesSplitter(200, 16384); rowKeyEncoder = AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid); measureCodec = new MeasureCodec(cubeDesc.getMeasures()); measures = new Object[cubeDesc.getMeasures().size()]; int colCount = cubeDesc.getRowkey().getRowKeyColumns().length; keyBytesBuf = new byte[colCount][]; aggrIngesters = MeasureIngester.create(cubeDesc.getMeasures()); dictionaryMap = cubeSegment.buildDictionaryMap(); initNullBytes(); }
private byte[] buildKey(SplittedBytes[] splitBuffers) { int[] rowKeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes(); for (int i = 0; i < baseCuboid.getColumns().size(); i++) { int index = rowKeyColumnIndexes[i]; keyBytesBuf[i] = Arrays.copyOf(splitBuffers[index].value, splitBuffers[index].length); if (isNull(keyBytesBuf[i])) { keyBytesBuf[i] = null; } } return rowKeyEncoder.encode(keyBytesBuf); }
@Override public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId); baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc, AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid), MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap()); initialized = true; } } } } baseCuboidBuilder.resetAggrs(); byte[] rowKey = baseCuboidBuilder.buildKey(rowArray); Object[] result = baseCuboidBuilder.buildValueObjects(rowArray); return new Tuple2<>(new ByteArray(rowKey), result); } }