/** * Extract a {@link Aggregation.Type#COUNT_DISTINCT} Aggregation. * * @param node The non-null COUNT_DISTINCT {@link FunctionCall} expression. * @return A {@link Aggregation.Type#COUNT_DISTINCT} Aggregation. * @throws NullPointerException when node is null. */ public Aggregation extractCountDistinct(FunctionCall node) throws NullPointerException { requireNonNull(node); Aggregation countDistinct = new Aggregation(); countDistinct.setType(COUNT_DISTINCT); countDistinct.setFields(getFields(node.getArguments())); if (aliases.containsKey(node)) { Map<String, Object> attributes = new HashMap<>(); attributes.put(NEW_NAME_FIELD, getAlias(node)); countDistinct.setAttributes(attributes); } return countDistinct; }
/** * Constructor that takes in an {@link Aggregation} and a {@link BulletConfig}. The size of the aggregation is used * as a LIMIT operation. * * @param aggregation The aggregation that specifies how and what this will compute. * @param config The config that has relevant configs for this strategy. */ @SuppressWarnings("unchecked") public Raw(Aggregation aggregation, BulletConfig config) { int maximumSize = config.getAs(BulletConfig.RAW_AGGREGATION_MAX_SIZE, Integer.class); size = Math.min(aggregation.getSize(), maximumSize); }
/** * Extract a {@link Aggregation.Type#RAW} Aggregation. * * @param size The non-null size from BQL LIMIT clause. * @return A {@link Aggregation.Type#RAW} Aggregation. * @throws NullPointerException when size is null. */ public Aggregation extractRaw(Optional<Long> size) throws NullPointerException { requireNonNull(size); Aggregation raw = new Aggregation(); raw.setType(Aggregation.Type.RAW); size.ifPresent(sizeValue -> raw.setSize(sizeValue.intValue())); return raw; }
@Override @SuppressWarnings("unchecked") public void configure(BulletConfig config) { if (filters != null) { filters = rewriteClauses(filters); filters.forEach(f -> f.configure(config)); } if (projection != null) { projection.configure(config); } // Must have an aggregation if (aggregation == null) { aggregation = new Aggregation(); } aggregation.configure(config); boolean disableWindowing = config.getAs(BulletConfig.WINDOW_DISABLE, Boolean.class); if (disableWindowing) { window = null; } else if (window != null) { window.configure(config); } long durationDefault = config.getAs(BulletConfig.QUERY_DEFAULT_DURATION, Long.class); long durationMax = config.getAs(BulletConfig.QUERY_MAX_DURATION, Long.class); // Null or negative, then default, else min of duration and max. duration = (duration == null || duration <= 0) ? durationDefault : Math.min(duration, durationMax); if (postAggregations != null) { postAggregations.forEach(p -> p.configure(config)); } }
/** * Constructor that requires an {@link Aggregation} and a {@link BulletConfig} configuration. * * @param aggregation An {@link Aggregation} with valid fields and attributes for this aggregation type. * @param config The config that has relevant configs for this strategy. */ @SuppressWarnings("unchecked") public TopK(Aggregation aggregation, BulletConfig config) { super(aggregation, config); String errorConfiguration = config.getAs(BulletConfig.TOP_K_AGGREGATION_SKETCH_ERROR_TYPE, String.class); ErrorType errorType = getErrorType(errorConfiguration); Map<String, Object> attributes = aggregation.getAttributes(); newName = attributes == null ? DEFAULT_NEW_NAME : attributes.getOrDefault(NEW_NAME_FIELD, DEFAULT_NEW_NAME).toString(); int maxMapSize = config.getAs(BulletConfig.TOP_K_AGGREGATION_SKETCH_ENTRIES, Integer.class); Number threshold = getThreshold(attributes); int size = aggregation.getSize(); BulletRecordProvider provider = config.getBulletRecordProvider(); sketch = threshold != null ? new FrequentItemsSketch(errorType, maxMapSize, threshold.longValue(), size, provider) : new FrequentItemsSketch(errorType, maxMapSize, size, provider); }
/** * Default constructor. GSON recommended. */ public Query() { // If no aggregation is provided, the default one is used. Aggregations must be present. aggregation = new Aggregation(); }
aggregation.initialize().ifPresent(errors::addAll); Aggregation.Type type = aggregation.getType(); Window.Classification kind = window.getType(); if (type != Aggregation.Type.RAW && kind == Window.Classification.RECORD_RECORD) {
/** * Returns a new {@link Strategy} instance that can handle this aggregation. * * @param aggregation The non-null, initialized {@link Aggregation} instance. * @param config The {@link BulletConfig} containing configuration for the strategy. * * @return The created instance of a strategy that can implement the Aggregation. */ public static Strategy findStrategy(Aggregation aggregation, BulletConfig config) { // Guaranteed to be present. switch (aggregation.getType()) { case COUNT_DISTINCT: return new CountDistinct(aggregation, config); case DISTRIBUTION: return new Distribution(aggregation, config); case RAW: return new Raw(aggregation, config); case TOP_K: return new TopK(aggregation, config); } // If we have any fields -> GroupBy return Utilities.isEmpty(aggregation.getFields()) ? new GroupAll(aggregation, config) : new GroupBy(aggregation, config); } }
@Override public Optional<List<BulletError>> initialize() { if (Utilities.isEmpty(fields) || fields.size() != 1) { return Optional.of(singletonList(REQUIRES_ONE_FIELD_ERROR)); } Map<String, Object> attributes = aggregation.getAttributes(); if (Utilities.isEmpty(attributes)) { return Optional.of(singletonList(REQUIRES_TYPE_ERROR)); } String typeString = Utilities.getCasted(attributes, TYPE, String.class); Type type = SUPPORTED_DISTRIBUTION_TYPES.get(typeString); if (type == null) { return Optional.of(singletonList(REQUIRES_TYPE_ERROR)); } // Try to initialize sketch now sketch = getSketch(entries, maxPoints, rounding, type, attributes, provider); if (sketch == null) { return Optional.of(type == Type.QUANTILE ? asList(REQUIRES_POINTS_ERROR, REQUIRES_POINTS_PROPER_RANGE) : singletonList(REQUIRES_POINTS_ERROR)); } // Initialize field since we have exactly 1 field = fields.get(0); return Optional.empty(); }
private void addTransientFieldsFor(PostStrategy postStrategy) { Projection projection = runningQuery.getQuery().getProjection(); Aggregation aggregation = runningQuery.getQuery().getAggregation(); if (aggregation.getType() == Aggregation.Type.RAW && projection != null) { Map<String, String> projectionFields = projection.getFields(); if (projectionFields != null) { postStrategy.getRequiredFields().stream().filter(field -> !projectionFields.containsValue(field)) .forEach(field -> transientFields.put(field, field)); } } } }
/** * The constructor for creating a Sketch based strategy. * * @param aggregation An {@link Aggregation} with valid fields and attributes for this aggregation type. * @param config The config that has relevant configs for this sketch based strategy. */ @SuppressWarnings("unchecked") public SketchingStrategy(Aggregation aggregation, BulletConfig config) { metadataKeys = (Map<String, String>) config.getAs(BulletConfig.RESULT_METADATA_METRICS, Map.class); separator = config.getAs(BulletConfig.AGGREGATION_COMPOSITE_FIELD_SEPARATOR, String.class); shouldMeta = config.getAs(BulletConfig.RESULT_METADATA_ENABLE, Boolean.class); fieldsToNames = aggregation.getFields(); fields = Utilities.isEmpty(fieldsToNames) ? Collections.emptyList() : new ArrayList<>(fieldsToNames.keySet()); }
/** * Constructor that requires an {@link Aggregation} and a {@link BulletConfig} configuration. * * @param aggregation An {@link Aggregation} with valid fields and attributes for this aggregation type. * @param config The config that has relevant configs for this strategy. */ @SuppressWarnings("unchecked") public GroupBy(Aggregation aggregation, BulletConfig config) { super(aggregation, config); Map<String, Object> attributes = aggregation.getAttributes(); operations = GroupOperation.getOperations(attributes); Map<GroupOperation, Number> metrics = GroupData.makeInitialMetrics(operations); container = new CachingGroupData(null, metrics); ResizeFactor resizeFactor = getResizeFactor(config, BulletConfig.GROUP_AGGREGATION_SKETCH_RESIZE_FACTOR); float samplingProbability = config.getAs(BulletConfig.GROUP_AGGREGATION_SKETCH_SAMPLING, Float.class); // Default at 512 gives a 13.27% error rate at 99.73% confidence (3 SD). Irrelevant since we are using this to // mostly cap the number of groups. You can use the Sketch theta to extrapolate the aggregation for all the data. int nominalEntries = config.getAs(BulletConfig.GROUP_AGGREGATION_SKETCH_ENTRIES, Integer.class); int maximumSize = config.getAs(BulletConfig.GROUP_AGGREGATION_MAX_SIZE, Integer.class); int size = Math.min(aggregation.getSize(), maximumSize); sketch = new TupleSketch(resizeFactor, samplingProbability, nominalEntries, size, config.getBulletRecordProvider()); }
/** * Constructor that requires an {@link Aggregation}. * * @param aggregation The {@link Aggregation} that specifies how and what this will compute. * @param config The BulletConfig. */ public GroupAll(Aggregation aggregation, BulletConfig config) { // GroupOperations is all we care about - size etc. are meaningless for Group All since it's a single result operations = GroupOperation.getOperations(aggregation.getAttributes()); data = new GroupData(operations); this.provider = config.getBulletRecordProvider(); }
/** * Extract a {@link Aggregation.Type#GROUP} Aggregation. * * @param selectFields The non-null Set of {@link Expression} in the BQL SELECT clause. * @param groupByFields The non-null Set of {@link Expression} in the BQL GROUP BY clause. * @param size The non-null Optional of size of this group aggregation. * @return A {@link Aggregation.Type#GROUP} Aggregation. * @throws NullPointerException when any of selectFields, groupByFields and size is null. * @throws ParsingException when any of selectionField is not grouping function nor in group by clause. */ public Aggregation extractGroup(Set<Expression> selectFields, Set<Expression> groupByFields, Optional<Long> size) throws NullPointerException, ParsingException { requireNonNull(selectFields); requireNonNull(groupByFields); requireNonNull(size); Aggregation group = new Aggregation(); group.setType(GROUP); size.ifPresent(sizeValue -> group.setSize(sizeValue.intValue())); if (!groupByFields.isEmpty()) { group.setFields(getFields(new ArrayList<>(groupByFields))); } Map<String, Object> attributes = getGroupAttributes(selectFields, groupByFields); if (!attributes.isEmpty()) { group.setAttributes(attributes); } return group; }
/** * Constructor that requires an {@link Aggregation} and a {@link BulletConfig} configuration. * * @param aggregation An {@link Aggregation} with valid fields and attributes for this aggregation type. * @param config The config that has relevant configs for this strategy. */ @SuppressWarnings("unchecked") public CountDistinct(Aggregation aggregation, BulletConfig config) { super(aggregation, config); Map<String, Object> attributes = aggregation.getAttributes(); ResizeFactor resizeFactor = getResizeFactor(config, BulletConfig.COUNT_DISTINCT_AGGREGATION_SKETCH_RESIZE_FACTOR); float samplingProbability = config.getAs(BulletConfig.COUNT_DISTINCT_AGGREGATION_SKETCH_SAMPLING, Float.class); Family family = getFamily(config.getAs(BulletConfig.COUNT_DISTINCT_AGGREGATION_SKETCH_FAMILY, String.class)); int nominalEntries = config.getAs(BulletConfig.COUNT_DISTINCT_AGGREGATION_SKETCH_ENTRIES, Integer.class); newName = attributes == null ? DEFAULT_NEW_NAME : attributes.getOrDefault(NEW_NAME_FIELD, DEFAULT_NEW_NAME).toString(); sketch = new ThetaSketch(resizeFactor, family, samplingProbability, nominalEntries, config.getBulletRecordProvider()); }
/** * Constructor that requires an {@link Aggregation} and a {@link BulletConfig} configuration. * * @param aggregation An {@link Aggregation} with valid fields and attributes for this aggregation type. * @param config The config that has relevant configs for this strategy. */ @SuppressWarnings("unchecked") public Distribution(Aggregation aggregation, BulletConfig config) { super(aggregation, config); entries = config.getAs(BulletConfig.DISTRIBUTION_AGGREGATION_SKETCH_ENTRIES, Integer.class); rounding = config.getAs(BulletConfig.DISTRIBUTION_AGGREGATION_GENERATED_POINTS_ROUNDING, Integer.class); int pointLimit = config.getAs(BulletConfig.DISTRIBUTION_AGGREGATION_MAX_POINTS, Integer.class); maxPoints = Math.min(pointLimit, aggregation.getSize()); this.aggregation = aggregation; this.provider = config.getBulletRecordProvider(); // The sketch is initialized in initialize! }
/** * Extract a {@link Aggregation.Type#TOP_K} Aggregation. * * @param groupByFields The non-null Set of {@link Expression} in the BQL GROUP BY clause. * @param threshold The non-null Optional of threshold of this TopK aggregation. * @param size The non-null Optional of size of this TopK aggregation. * @return A {@link Aggregation.Type#TOP_K} Aggregation. * @throws NullPointerException when any of groupByFields, threshold and size is null. */ public Aggregation extractTopK(Set<Expression> groupByFields, Optional<Long> threshold, Optional<Long> size) throws NullPointerException { requireNonNull(groupByFields); requireNonNull(threshold); requireNonNull(size); Aggregation topK = new Aggregation(); topK.setType(TOP_K); size.ifPresent(sizeValue -> topK.setSize(sizeValue.intValue())); topK.setFields(getFields(new ArrayList<>(groupByFields))); Map<String, Object> attributes = new HashMap<>(); threshold.ifPresent(min -> attributes.put(THRESHOLD_FIELD, min)); for (Node node : aliases.keySet()) { if (node instanceof FunctionCall) { attributes.put(NEW_NAME_FIELD, getAlias((Expression) node)); } } if (!attributes.isEmpty()) { topK.setAttributes(attributes); } return topK; }
/** * Extract a {@link Aggregation.Type#DISTRIBUTION} Aggregation. * * @param node The non-null {@link Distribution} expression. * @param size The non-null Optional of size of this distribution aggregation. * @return A {@link Aggregation.Type#DISTRIBUTION} Aggregation. * @throws NullPointerException when any of node and size is null. */ public Aggregation extractDistribution(Distribution node, Optional<Long> size) throws NullPointerException { requireNonNull(node); requireNonNull(size); Aggregation distribution = new Aggregation(); distribution.setType(DISTRIBUTION); size.ifPresent(sizeValue -> distribution.setSize(sizeValue.intValue())); distribution.setFields(getFields(node.getColumns())); Map<String, Object> attributes = node.getAttributes(); if (aliases.containsKey(node)) { attributes.put(NEW_NAME_FIELD, getAlias(node)); } distribution.setAttributes(attributes); return distribution; }
/** * Extract a {@link Aggregation.Type#TOP_K} Aggregation. * * @param node The non-null {@link TopK} expression. * @return A {@link Aggregation.Type#TOP_K} Aggregation. * @throws NullPointerException when node is null. */ public Aggregation extractTopKFunction(TopK node) throws NullPointerException { requireNonNull(node); Aggregation topK = new Aggregation(); topK.setType(TOP_K); topK.setSize(node.getSize().intValue()); topK.setFields(getFields(node.getColumns())); Map<String, Object> attributes = new HashMap<>(); node.getThreshold().ifPresent(threshold -> attributes.put(THRESHOLD_FIELD, threshold)); if (aliases.containsKey(node)) { attributes.put(NEW_NAME_FIELD, getAlias(node)); } if (!attributes.isEmpty()) { topK.setAttributes(attributes); } return topK; }