private GTScanRange mergeKeyRange(List<GTScanRange> ranges) { GTScanRange first = ranges.get(0); if (ranges.size() == 1) return first; GTRecord start = first.pkStart; GTRecord end = first.pkEnd; Set<GTRecord> newFuzzyKeys = Sets.newLinkedHashSet(); boolean hasNonFuzzyRange = false; for (GTScanRange range : ranges) { hasNonFuzzyRange = hasNonFuzzyRange || range.fuzzyKeys.isEmpty(); newFuzzyKeys.addAll(range.fuzzyKeys); end = rangeEndComparator.max(end, range.pkEnd); } // if any range is non-fuzzy, then all fuzzy keys must be cleared // too many fuzzy keys will slow down HBase scan if (hasNonFuzzyRange || newFuzzyKeys.size() > maxFuzzyKeys) { if (newFuzzyKeys.size() > maxFuzzyKeys) { logger.debug("too many FuzzyKeys, clean it!"); } newFuzzyKeys.clear(); } return new GTScanRange(start, end, Lists.newArrayList(newFuzzyKeys)); }
@Test public void verifySegmentSkipping2() { { LogicalTupleFilter filter = and(timeComp0, ageComp1); CubeScanRangePlanner planner = new CubeScanRangePlanner(info, info.colRef(0), filter); List<GTScanRange> r = planner.planScanRanges(); assertEquals(1, r.size());//scan range are [close,close] assertEquals("[null, 10]-[1421193600000, 10]", r.get(0).toString()); assertEquals(1, r.get(0).fuzzyKeys.size()); assertEquals("[[null, 10, null, null, null]]", r.get(0).fuzzyKeys.toString()); } { LogicalTupleFilter filter = and(timeComp5, ageComp1); CubeScanRangePlanner planner = new CubeScanRangePlanner(info, info.colRef(0), filter); List<GTScanRange> r = planner.planScanRanges(); assertEquals(1, r.size());//scan range are [close,close] } }
List<GTScanRange> r = planner.planScanRanges(); assertEquals(1, r.size()); assertEquals("[1421193600000, 10]-[null, 20]", r.get(0).toString()); assertEquals("[[null, 10, null, null, null], [null, 20, null, null, null]]", r.get(0).fuzzyKeys.toString()); List<GTScanRange> r = planner.planScanRanges(); assertEquals(3, r.size()); assertEquals("[1421280000000, 10]-[1421280000000, 10]", r.get(0).toString()); assertEquals("[1421280000000, 20]-[1421280000000, 20]", r.get(1).toString()); assertEquals("[1421280000000, 30]-[1421280000000, 30]", r.get(2).toString()); planner.setMaxScanRanges(2); List<GTScanRange> r2 = planner.planScanRanges();
private List<GTScanRange> splitFuzzyKeys(List<GTScanRange> mergedRanges) { List<GTScanRange> result = Lists.newArrayList(); for (GTScanRange range : mergedRanges) { // if the fuzzy key is huge but still within in split range, then we split fuzzy keys to multiple ones. if (range.fuzzyKeys.size() > maxFuzzyKeysPerSplit && range.fuzzyKeys.size() <= maxFuzzyKeys) { List<GTRecord> fuzzyKeys = range.fuzzyKeys; Collections.sort(fuzzyKeys); int nSplit = (fuzzyKeys.size() - 1) / maxFuzzyKeysPerSplit + 1; int nFuzzyKeysPerSplit = fuzzyKeys.size() / nSplit; int startIndex = 0; for (int i = 1; i <= nSplit; i++) { int endIndex = i == nSplit ? fuzzyKeys.size() : i * nFuzzyKeysPerSplit; List<GTRecord> subFuzzyKeys = fuzzyKeys.subList(startIndex, endIndex); result.add(new GTScanRange(range.pkStart, range.pkEnd, subFuzzyKeys)); startIndex = endIndex; } logger.debug(String.format(Locale.ROOT, "large FuzzyKeys split size : %d", result.size())); } else { result.add(range); } } return result; }
List<GTScanRange> r = planner.planScanRanges(); assertEquals(1, r.size());//scan range are [close,close] assertEquals("[null, 10]-[1421193600000, 10]", r.get(0).toString()); assertEquals(1, r.get(0).fuzzyKeys.size()); assertEquals("[[null, 10, null, null, null]]", r.get(0).fuzzyKeys.toString()); List<GTScanRange> r = planner.planScanRanges(); assertEquals(2, r.size()); assertEquals("[1421193600000, 10]-[null, 10]", r.get(1).toString()); assertEquals("[[null, 10, null, null, null], [1421193600000, 10, null, null, null]]", r.get(1).fuzzyKeys.toString()); List<GTScanRange> r = planner.planScanRanges(); assertEquals(2, r.size()); assertEquals("[1421193600000, null]-[null, null]", r.get(1).toString()); assertEquals(0, r.get(1).fuzzyKeys.size()); List<GTScanRange> r = planner.planScanRanges(); assertEquals(1, r.size()); assertEquals("[null, null]-[null, null]", r.get(0).toString()); List<GTScanRange> r = planner.planScanRanges(); assertEquals(1, r.size()); assertEquals("[null, null]-[null, null]", r.get(0).toString());
public GTScanRange replaceGTInfo(final GTInfo gtInfo) { List<GTRecord> newFuzzyKeys = Lists.newArrayList(); for (GTRecord input : fuzzyKeys) { newFuzzyKeys.add(new GTRecord(gtInfo, input.cols)); } return new GTScanRange(new GTRecord(gtInfo, pkStart.cols), // new GTRecord(gtInfo, pkEnd.cols), // newFuzzyKeys); }
protected GTScanRange newScanRange(Collection<ColumnRange> andDimRanges) { GTRecord pkStart = new GTRecord(gtInfo); GTRecord pkEnd = new GTRecord(gtInfo); Map<Integer, Set<ByteArray>> fuzzyValues = Maps.newHashMap(); for (ColumnRange range : andDimRanges) { int col = range.column.getColumnDesc().getZeroBasedIndex(); if (!gtInfo.getPrimaryKey().get(col)) continue; pkStart.set(col, range.begin); pkEnd.set(col, range.end); if (range.valueSet != null && !range.valueSet.isEmpty()) { fuzzyValues.put(col, range.valueSet); } } List<GTRecord> fuzzyKeys = buildFuzzyKeys(fuzzyValues); return new GTScanRange(pkStart, pkEnd, fuzzyKeys); }
this.info = info; if (ranges == null) { this.ranges = Lists.newArrayList(new GTScanRange(new GTRecord(info), new GTRecord(info))); } else { this.ranges = ranges;
sFuzzyKeys.add(deserializeGTRecord(in, sInfo)); GTScanRange sRange = new GTScanRange(sPkStart, sPkEnd, sFuzzyKeys); sRanges.add(sRange);
private List<GTScanRange> splitFuzzyKeys(List<GTScanRange> mergedRanges) { List<GTScanRange> result = Lists.newArrayList(); for (GTScanRange range : mergedRanges) { // if the fuzzy key is huge but still within in split range, then we split fuzzy keys to multiple ones. if (range.fuzzyKeys.size() > maxFuzzyKeysPerSplit && range.fuzzyKeys.size() <= maxFuzzyKeys) { List<GTRecord> fuzzyKeys = range.fuzzyKeys; Collections.sort(fuzzyKeys); int nSplit = (fuzzyKeys.size() - 1) / maxFuzzyKeysPerSplit + 1; int nFuzzyKeysPerSplit = fuzzyKeys.size() / nSplit; int startIndex = 0; for (int i = 1; i <= nSplit; i++) { int endIndex = i == nSplit ? fuzzyKeys.size() : i * nFuzzyKeysPerSplit; List<GTRecord> subFuzzyKeys = fuzzyKeys.subList(startIndex, endIndex); result.add(new GTScanRange(range.pkStart, range.pkEnd, subFuzzyKeys)); startIndex = endIndex; } logger.debug(String.format(Locale.ROOT, "large FuzzyKeys split size : %d", result.size())); } else { result.add(range); } } return result; }
private GTScanRange mergeKeyRange(List<GTScanRange> ranges) { GTScanRange first = ranges.get(0); if (ranges.size() == 1) return first; GTRecord start = first.pkStart; GTRecord end = first.pkEnd; Set<GTRecord> newFuzzyKeys = Sets.newLinkedHashSet(); boolean hasNonFuzzyRange = false; for (GTScanRange range : ranges) { hasNonFuzzyRange = hasNonFuzzyRange || range.fuzzyKeys.isEmpty(); newFuzzyKeys.addAll(range.fuzzyKeys); end = rangeEndComparator.max(end, range.pkEnd); } // if any range is non-fuzzy, then all fuzzy keys must be cleared // too many fuzzy keys will slow down HBase scan if (hasNonFuzzyRange || newFuzzyKeys.size() > maxFuzzyKeys) { if (newFuzzyKeys.size() > maxFuzzyKeys) { logger.debug("too many FuzzyKeys, clean it!"); } newFuzzyKeys.clear(); } return new GTScanRange(start, end, Lists.newArrayList(newFuzzyKeys)); }
public GTScanRange replaceGTInfo(final GTInfo gtInfo) { List<GTRecord> newFuzzyKeys = Lists.newArrayList(); for (GTRecord input : fuzzyKeys) { newFuzzyKeys.add(new GTRecord(gtInfo, input.cols)); } return new GTScanRange(new GTRecord(gtInfo, pkStart.cols), // new GTRecord(gtInfo, pkEnd.cols), // newFuzzyKeys); }
protected GTScanRange newScanRange(Collection<ColumnRange> andDimRanges) { GTRecord pkStart = new GTRecord(gtInfo); GTRecord pkEnd = new GTRecord(gtInfo); Map<Integer, Set<ByteArray>> fuzzyValues = Maps.newHashMap(); for (ColumnRange range : andDimRanges) { int col = range.column.getColumnDesc().getZeroBasedIndex(); if (!gtInfo.getPrimaryKey().get(col)) continue; pkStart.set(col, range.begin); pkEnd.set(col, range.end); if (range.valueSet != null && !range.valueSet.isEmpty()) { fuzzyValues.put(col, range.valueSet); } } List<GTRecord> fuzzyKeys = buildFuzzyKeys(fuzzyValues); return new GTScanRange(pkStart, pkEnd, fuzzyKeys); }
this.info = info; if (ranges == null) { this.ranges = Lists.newArrayList(new GTScanRange(new GTRecord(info), new GTRecord(info))); } else { this.ranges = ranges;
sFuzzyKeys.add(deserializeGTRecord(in, sInfo)); GTScanRange sRange = new GTScanRange(sPkStart, sPkEnd, sFuzzyKeys); sRanges.add(sRange);