public KeyValue create(byte[] keyBytes, int keyOffset, int keyLength, Object[] measureValues) { for (int i = 0; i < colValues.length; i++) { colValues[i] = measureValues[refIndex[i]]; } ByteBuffer valueBuf = codec.encode(colValues); return create(keyBytes, keyOffset, keyLength, valueBuf.array(), 0, valueBuf.position()); }
public ByteBuffer buildValue(String[] flatRow) { return measureCodec.encode(buildValueObjects(flatRow)); }
@Override public void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { aggs.reset(); for (Text value : values) { if (vcounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) { logger.info("Handling value with ordinal (This is not KV number!): " + vcounter); } codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), input); aggs.aggregate(input, needAggrMeasures); } aggs.collectStates(result); ByteBuffer valueBuf = codec.encode(result); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(key, outputValue); } }
public void flush() throws IOException { logger.info("AggregationCache(size={} est_mem_size={} threshold={}) will spill to {}", buffMap.size(), estMemSize, spillThreshold, dumpedFile.getAbsolutePath()); ByteArrayOutputStream baos = new ByteArrayOutputStream(MAX_BUFFER_SIZE); if (buffMap != null) { DataOutputStream bos = new DataOutputStream(baos); Object[] aggrResult = null; try { bos.writeInt(buffMap.size()); for (Entry<byte[], MeasureAggregator[]> entry : buffMap.entrySet()) { MeasureAggregators aggs = new MeasureAggregators(entry.getValue()); aggrResult = new Object[metrics.trueBitCount()]; aggs.collectStates(aggrResult); ByteBuffer metricsBuf = measureCodec.encode(aggrResult); bos.writeInt(entry.getKey().length); bos.write(entry.getKey()); bos.writeInt(metricsBuf.position()); bos.write(metricsBuf.array(), 0, metricsBuf.position()); } } finally { buffMap = null; IOUtils.closeQuietly(bos); } } spillBuffer = baos.toByteArray(); IOUtils.closeQuietly(baos); logger.info("Accurately spill data size = {}", spillBuffer.length); }
private Text newValueText(BufferedMeasureCodec codec, String sum, String min, String max, int count, int item_count) { Object[] values = new Object[] { sum == null ? null : new BigDecimal(sum), // new BigDecimal(min), new BigDecimal(max), new Long(count), new Long(item_count) }; ByteBuffer buf = codec.encode(values); Text t = new Text(); t.set(buf.array(), 0, buf.position()); return t; }
/** * Re-encode with both dimension and measure in encoded (Text) format. * @param key * @param value * @return * @throws IOException */ public Pair<Text, Text> reEncode(Text key, Text value) throws IOException { if (initialized == false) { throw new IllegalStateException("Not initialized"); } Object[] measureObjs = new Object[measureDescs.size()]; // re-encode measures if dictionary is used if (dictMeasures.size() > 0) { codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), measureObjs); for (Pair<Integer, MeasureIngester> pair : dictMeasures) { int i = pair.getFirst(); MeasureIngester ingester = pair.getSecond(); measureObjs[i] = ingester.reEncodeDictionary(measureObjs[i], measureDescs.get(i), oldDicts, newDicts); } ByteBuffer valueBuf = codec.encode(measureObjs); textValue.set(valueBuf.array(), 0, valueBuf.position()); return Pair.newPair(processKey(key), textValue); } else { return Pair.newPair(processKey(key), value); } }
@Override public void doReduce(ByteArrayWritable key, Iterable<ByteArrayWritable> values, Context context) throws IOException, InterruptedException { aggs.reset(); for (ByteArrayWritable value : values) { if (vcounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) { logger.info("Handling value with ordinal (This is not KV number!): " + vcounter); } codec.decode(value.asBuffer(), input); aggs.aggregate(input); } aggs.collectStates(result); // output key outputKey.set(key.array(), key.offset(), key.length()); // output value ByteBuffer valueBuf = codec.encode(result); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(outputKey, outputValue); }
@Override public Tuple2<org.apache.hadoop.io.Text, org.apache.hadoop.io.Text> call(Tuple2<Text, Object[]> tuple2) throws Exception { if (initialized == false) { synchronized (SparkCubingMerge.class) { if (initialized == false) { synchronized (SparkCubingMerge.class) { if (initialized == false) { KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kylinConfig)) { CubeDesc desc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cubeName); codec = new BufferedMeasureCodec(desc.getMeasures()); initialized = true; } } } } } } ByteBuffer valueBuf = codec.encode(tuple2._2()); byte[] encodedBytes = new byte[valueBuf.position()]; System.arraycopy(valueBuf.array(), 0, encodedBytes, 0, valueBuf.position()); return new Tuple2<>(tuple2._1(), new org.apache.hadoop.io.Text(encodedBytes)); } };
@Override public Tuple2<org.apache.hadoop.io.Text, org.apache.hadoop.io.Text> call( Tuple2<ByteArray, Object[]> tuple2) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { KylinConfig kylinConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kylinConfig)) { CubeDesc desc = CubeDescManager.getInstance(kylinConfig).getCubeDesc(cubeName); codec = new BufferedMeasureCodec(desc.getMeasures()); initialized = true; } } } } ByteBuffer valueBuf = codec.encode(tuple2._2()); org.apache.hadoop.io.Text textResult = new org.apache.hadoop.io.Text(); textResult.set(valueBuf.array(), 0, valueBuf.position()); return new Tuple2<>(new org.apache.hadoop.io.Text(tuple2._1().array()), textResult); } }).saveAsNewAPIHadoopDataset(job.getConfiguration());
@Override protected GTRecord finalizeResult(GTRecord record, Object[] aggStates) { // 1. load dimensions for (int c : dimensions) { returnRecord.cols[c] = record.cols[c]; } // 2. serialize metrics byte[] bytes = measureCodec.encode(aggStates).array(); int[] sizes = measureCodec.getMeasureSizes(); // 3. load metrics int offset = 0; for (int i = 0; i < metrics.trueBitCount(); i++) { int c = metrics.trueBitAt(i); returnRecord.cols[c].reset(bytes, offset, sizes[i]); offset += sizes[i]; } return returnRecord; } }
void load(byte[] key, MeasureAggregator[] value) { int offset = 0; for (int i = 0; i < dimensions.trueBitCount(); i++) { int c = dimensions.trueBitAt(i); final int columnLength = info.codeSystem.maxCodeLength(c); record.cols[c].reset(key, offset, columnLength); offset += columnLength; } for (int i = 0; i < value.length; i++) { tmpValues[i] = value[i].getState(); } byte[] bytes = measureCodec.encode(tmpValues).array(); int[] sizes = measureCodec.getMeasureSizes(); offset = 0; for (int i = 0; i < value.length; i++) { int col = metrics.trueBitAt(i); record.cols[col].reset(bytes, offset, sizes[i]); offset += sizes[i]; } } }
@Test(expected = IllegalArgumentException.class) public void testError() throws Exception { CubeDesc cubeDesc = CubeManager.getInstance(getTestConfig()).getCube("test_kylin_cube_with_slr_ready").getDescriptor(); HBaseColumnDesc hbaseCol = cubeDesc.getHbaseMapping().getColumnFamily()[0].getColumns()[0]; BufferedMeasureCodec codec = new BufferedMeasureCodec(hbaseCol.getMeasures()); BigDecimal sum = new BigDecimal("11111111111111111111333.1234567"); BigDecimal min = new BigDecimal("333.1111111"); BigDecimal max = new BigDecimal("333.1999999"); LongWritable count = new LongWritable(2); Long item_count = new Long(100); codec.encode(new Object[] { sum, min, max, count, item_count }); } }
@Test public void testDecode() throws Exception { CubeDesc cubeDesc = CubeManager.getInstance(getTestConfig()).getCube("test_kylin_cube_with_slr_ready").getDescriptor(); HBaseColumnDesc hbaseCol = cubeDesc.getHbaseMapping().getColumnFamily()[0].getColumns()[0]; BufferedMeasureCodec codec = new BufferedMeasureCodec(hbaseCol.getMeasures()); BigDecimal sum = new BigDecimal("333.1234567"); BigDecimal min = new BigDecimal("333.1111111"); BigDecimal max = new BigDecimal("333.1999999"); Long count = new Long(2); Long item_count = new Long(100); ByteBuffer buf = codec.encode(new Object[] { sum, min, max, count, item_count }); buf.flip(); byte[] valueBytes = new byte[buf.limit()]; System.arraycopy(buf.array(), 0, valueBytes, 0, buf.limit()); RowValueDecoder rowValueDecoder = new RowValueDecoder(hbaseCol); for (MeasureDesc measure : cubeDesc.getMeasures()) { FunctionDesc aggrFunc = measure.getFunction(); int index = hbaseCol.findMeasure(aggrFunc); rowValueDecoder.setProjectIndex(index); } rowValueDecoder.decodeAndConvertJavaObj(valueBytes); Object[] measureValues = rowValueDecoder.getValues(); //BigDecimal.ROUND_HALF_EVEN in BigDecimalSerializer assertEquals("[333.1235, 333.1111, 333.2000, 2, 100]", Arrays.toString(measureValues)); }
@Test public void basicTest() { MeasureDesc[] descs = new MeasureDesc[] { measure("double"), measure("long"), measure("decimal"), measure("HLLC16"), measure("bitmap") }; BufferedMeasureCodec codec = new BufferedMeasureCodec(descs); Double d = new Double(1.0); Long l = new Long(2); BigDecimal b = new BigDecimal("333.1234"); HLLCounter hllc = new HLLCounter(16); hllc.add("1234567"); hllc.add("abcdefg"); BitmapCounter bitmap = RoaringBitmapCounterFactory.INSTANCE.newBitmap(); bitmap.add(123); bitmap.add(45678); bitmap.add(Integer.MAX_VALUE - 10); Object[] values = new Object[] { d, l, b, hllc, bitmap }; ByteBuffer buf = codec.encode(values); buf.flip(); System.out.println("size: " + buf.limit()); Object[] copy = new Object[values.length]; codec.decode(buf, copy); for (int i = 0; i < values.length; i++) { Object x = values[i]; Object y = copy[i]; assertEquals(x, y); } }
public KeyValue create(byte[] keyBytes, int keyOffset, int keyLength, Object[] measureValues) { for (int i = 0; i < colValues.length; i++) { colValues[i] = measureValues[refIndex[i]]; } ByteBuffer valueBuf = codec.encode(colValues); return create(keyBytes, keyOffset, keyLength, valueBuf.array(), 0, valueBuf.position()); }
public ByteBuffer buildValue(String[] flatRow) { return measureCodec.encode(buildValueObjects(flatRow)); }
@Override public void doReduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { aggs.reset(); for (Text value : values) { if (vcounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) { logger.info("Handling value with ordinal (This is not KV number!): " + vcounter); } codec.decode(ByteBuffer.wrap(value.getBytes(), 0, value.getLength()), input); aggs.aggregate(input, needAggrMeasures); } aggs.collectStates(result); ByteBuffer valueBuf = codec.encode(result); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(key, outputValue); } }
@Override public void doReduce(ByteArrayWritable key, Iterable<ByteArrayWritable> values, Context context) throws IOException, InterruptedException { aggs.reset(); for (ByteArrayWritable value : values) { if (vcounter++ % BatchConstants.NORMAL_RECORD_LOG_THRESHOLD == 0) { logger.info("Handling value with ordinal (This is not KV number!): " + vcounter); } codec.decode(value.asBuffer(), input); aggs.aggregate(input); } aggs.collectStates(result); // output key outputKey.set(key.array(), key.offset(), key.length()); // output value ByteBuffer valueBuf = codec.encode(result); outputValue.set(valueBuf.array(), 0, valueBuf.position()); context.write(outputKey, outputValue); }
@Override protected GTRecord finalizeResult(GTRecord record, Object[] aggStates) { // 1. load dimensions for (int c : dimensions) { returnRecord.cols[c] = record.cols[c]; } // 2. serialize metrics byte[] bytes = measureCodec.encode(aggStates).array(); int[] sizes = measureCodec.getMeasureSizes(); // 3. load metrics int offset = 0; for (int i = 0; i < metrics.trueBitCount(); i++) { int c = metrics.trueBitAt(i); returnRecord.cols[c].reset(bytes, offset, sizes[i]); offset += sizes[i]; } return returnRecord; } }
void load(byte[] key, MeasureAggregator[] value) { int offset = 0; for (int i = 0; i < dimensions.trueBitCount(); i++) { int c = dimensions.trueBitAt(i); final int columnLength = info.codeSystem.maxCodeLength(c); record.cols[c].reset(key, offset, columnLength); offset += columnLength; } for (int i = 0; i < value.length; i++) { tmpValues[i] = value[i].getState(); } byte[] bytes = measureCodec.encode(tmpValues).array(); int[] sizes = measureCodec.getMeasureSizes(); offset = 0; for (int i = 0; i < value.length; i++) { int col = metrics.trueBitAt(i); record.cols[col].reset(bytes, offset, sizes[i]); offset += sizes[i]; } } }