columns = new HBaseColumn[] { getSliceColumn(min, min) }; } else { int max = Integer.parseInt(m.group("max")); List<Pair<Integer, Integer>> sliceRanges; if (_doPyramidding) { sliceRanges = decomposeRange(min, max); } else { sliceRanges = new ArrayList<>(); for (int i = 0; i < sliceRanges.size(); ++i) { Pair<Integer, Integer> sliceRange = sliceRanges.get(i); columns[i] = getSliceColumn(sliceRange.getFirst(), sliceRange.getSecond()); realResults.add(compose((List) rawResults, i, columns.length, numReal));
public HBaseSlicedPyramidIO (String zookeeperQuorum, String zookeeperPort, String hbaseMaster) throws IOException { super(zookeeperQuorum, zookeeperPort, hbaseMaster); setPyramidding(true); }
if (start > end) return decomposeRange(end, start); int midPoint = start - (start % msb) + msb; int[] startRanges = numberToBits(midPoint - start);; int[] endRanges = numberToBits(end + 1 - midPoint);
HBaseSlicedPyramidIO io = new HBaseSlicedPyramidIO("hadoop-s1", "2181", "hadoop-s1:60000"); TileSerializer<List<Integer>> serializer = new KryoSerializer<>(new TypeDescriptor(List.class, new TypeDescriptor(Integer.class))); io.initializeForWrite(table); io.writeTiles(table, serializer, Arrays.asList(tile)); TileData<List<Integer>> full = io.readTiles(table, serializer, indices).get(0); TileData<List<Integer>> s0 = io.readTiles(table+"[0]", serializer, indices).get(0); TileData<List<Integer>> s1 = io.readTiles(table+"[1]", serializer, indices).get(0); TileData<List<Integer>> s2 = io.readTiles(table+"[2]", serializer, indices).get(0); TileData<List<Integer>> s3 = io.readTiles(table+"[3]", serializer, indices).get(0); List<TileData<List<Integer>>> slices = Arrays.asList(s0, s1, s2, s3); TileData<List<Integer>> s01 = io.readTiles(table+"[0-1]", serializer, indices).get(0); TileData<List<Integer>> s23 = io.readTiles(table+"[2-3]", serializer, indices).get(0); List<TileData<List<Integer>>> slicePairs = Arrays.asList(s01, s23);
@Test public void testSliceDecompositionVsSingleSlices () throws Exception { String table = "nycTaxiHeatmap_sw2015_weekly"; HBaseSlicedPyramidIO io = new HBaseSlicedPyramidIO("hadoop-s1", "2181", "hadoop-s1:60000"); TileSerializer<List<Integer>> serializer = new KryoSerializer<>(new TypeDescriptor(List.class, new TypeDescriptor(Integer.class))); TileData<List<Integer>> full = io.readTiles(table, ks, indices).get(0); long fullEnd = System.nanoTime(); TileData<List<Integer>> pyramided = io.readTiles(table+"[7-27]", ks, indices).get(0); long pyramidedEnd = System.nanoTime(); io.setPyramidding(false); long individualStart = System.nanoTime(); TileData<List<Integer>> individual = io.readTiles(table+"[7-27]", ks, indices).get(0); long individualEnd = System.nanoTime();
int iterations = 1; int slices = 53; HBaseSlicedPyramidIO io = new HBaseSlicedPyramidIO("hadoop-s1", "2181", "hadoop-s1:60000"); TileSerializer<List<Integer>> ks = new KryoSerializer<List<Integer>>(new TypeDescriptor(List.class, new TypeDescriptor(Integer.class))); String table = "nycTaxiHeatmap_sw2015_weekly"; for (int i=0; i<iterations; ++i) { for (int s=0; s<slices; ++s) { full = io.readTiles(table, ks, indices).get(0); TileData<List<Integer>> slice = io.readTiles(table + "["+s+"]", ks, indices).get(0); for (int x=0; x<full.getDefinition().getXBins(); ++x) { for (int y=0; y<full.getDefinition().getYBins(); ++y) { for (int i=0; i<iterations; ++i) { for (int s=0; s<slices; ++s) { io.readTiles(table + "["+s+"]", ks, indices); for (int i=0; i<iterations; ++i) { for (int s=0; s<slices; ++s) { io.readTiles(singleSliceTable, dSerializer, indices);
private void testRange (int start, int end, Pair<Integer, Integer>... expectedRangeElements) { List<Pair<Integer, Integer>> actualRangeElements = HBaseSlicedPyramidIO.decomposeRange(start, end); Assert.assertEquals(expectedRangeElements.length, actualRangeElements.size()); for (int n=0; n<expectedRangeElements.length; ++n) Assert.assertEquals(expectedRangeElements[n], actualRangeElements.get(n)); }
@Override protected PyramidIO createInstance () throws ConfigurationException { try { String quorum = getPropertyValue(HBasePyramidIOFactory.HBASE_ZOOKEEPER_QUORUM); String port = getPropertyValue(HBasePyramidIOFactory.HBASE_ZOKEEPER_PORT); String master = getPropertyValue(HBasePyramidIOFactory.HBASE_MASTER); return new HBaseSlicedPyramidIO(quorum, port, master); } catch (IOException e) { throw new ConfigurationException("Error creating HBase sliced pyramid IO", e); } } }
private <T> Put addSlices (Put existingPut, TileSerializer<List<T>> serializer, TileData<List<T>> tile) throws IOException { // Figure out into how many slices to divide the data int slices = numSlices(tile); // Store the whole thing pyramidded. int slicesPerWrite = 1; while (slicesPerWrite < slices) { // Divide the tile into slices, storing each of them individually in their own column for (int startSlice = 0; startSlice < slices; startSlice = startSlice + slicesPerWrite) { int endSlice = startSlice + slicesPerWrite - 1; TileData<List<T>> slice = new DenseTileMultiSliceView<T>(tile, startSlice, endSlice).harden(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); serializer.serialize(slice, baos); existingPut = addToPut(existingPut, rowIdFromTileIndex(tile.getDefinition()), getSliceColumn(startSlice, endSlice), baos.toByteArray()); } // If not pyramidding, bail out after our first time through. if (_doPyramidding) slicesPerWrite = slicesPerWrite * 2; else slicesPerWrite = slices; } return existingPut; }
@Test public void testRoundRoundTripWhole () throws Exception { String table = "hbsioTest"; HBaseSlicedPyramidIO io = new HBaseSlicedPyramidIO("hadoop-s1", "2181", "hadoop-s1:60000"); TileSerializer<List<Integer>> serializer = new PrimitiveArrayAvroSerializer<>(Integer.class, CodecFactory.nullCodec()); try { data.setBin(0, 0, Arrays.asList(-0, -1, -2, -3, -4, -5, -6, -7)); io.initializeForWrite(table); io.writeTiles(table, serializer, Arrays.asList(data)); List<TileData<List<Integer>>> slice = io.readTiles(table + "[" + i + "]", serializer, Arrays.asList(index)); Assert.assertEquals(1, slice.size()); TileData<List<Integer>> tile = slice.get(0); List<TileData<List<Integer>>> slice = io.readTiles(table, serializer, Arrays.asList(index)); Assert.assertEquals(1, slice.size()); TileData<List<Integer>> tile = slice.get(0); io.dropTable(table);
List<Pair<Integer, Integer>> rangeElts = HBaseSlicedPyramidIO.decomposeRange(start, end); Assert.assertTrue(rangeElts.size() > 0); // Check 5, part of check 2 int duplicateSizes = 0;
@Test public void testWeirdTable () throws Exception { String table = "nycHeatmap_sw2015_sliced_DELETEME"; HBaseSlicedPyramidIO io = new HBaseSlicedPyramidIO("hadoop-s1", "2181", "hadoop-s1:60000"); TileSerializer<List<Integer>> serializer = new KryoSerializer<>(new TypeDescriptor(List.class, new TypeDescriptor(Integer.class)));