private static AggregationDesc buildAggregationDescCountStar( VectorizationContext ctx) { AggregationDesc agg = new AggregationDesc(); agg.setGenericUDAFName("count"); agg.setMode(GenericUDAFEvaluator.Mode.PARTIAL1); agg.setParameters(new ArrayList<ExprNodeDesc>()); agg.setGenericUDAFEvaluator(new GenericUDAFCount.GenericUDAFCountEvaluator()); return agg; }
private static ImmutablePair<VectorAggregationDesc,String> getVectorAggregationDesc( AggregationDesc aggrDesc, VectorizationContext vContext) throws HiveException { String aggregateName = aggrDesc.getGenericUDAFName(); List<ExprNodeDesc> parameterList = aggrDesc.getParameters(); final int parameterCount = parameterList.size(); final GenericUDAFEvaluator.Mode udafEvaluatorMode = aggrDesc.getMode(); /* * Look at evaluator to get output type info. */ GenericUDAFEvaluator evaluator = aggrDesc.getGenericUDAFEvaluator(); ObjectInspector[] parameterObjectInspectors = new ObjectInspector[parameterCount]; for (int i = 0; i < parameterCount; i++) { TypeInfo typeInfo = parameterList.get(i).getTypeInfo(); parameterObjectInspectors[i] = TypeInfoUtils .getStandardWritableObjectInspectorFromTypeInfo(typeInfo); } // The only way to get the return object inspector (and its return type) is to // initialize it... ObjectInspector returnOI = evaluator.init( aggrDesc.getMode(), parameterObjectInspectors); final TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(returnOI.getTypeName()); return getVectorAggregationDesc( aggregateName, parameterList, evaluator, outputTypeInfo, udafEvaluatorMode, vContext); }
private boolean validateAggregationDesc(AggregationDesc aggDesc, GroupByDesc.Mode groupByMode, boolean hasKeys) { String udfName = aggDesc.getGenericUDAFName().toLowerCase(); if (!supportedAggregationUdfs.contains(udfName)) { setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported"); return false; } // The planner seems to pull this one out. if (aggDesc.getDistinct()) { setExpressionIssue("Aggregation Function", "DISTINCT not supported"); return false; } ArrayList<ExprNodeDesc> parameters = aggDesc.getParameters(); if (parameters != null && !validateExprNodeDesc(parameters, "Aggregation Function UDF " + udfName + " parameter")) { return false; } return true; }
private boolean checkAggregator(AggregationDesc agg) throws SemanticException { if (LOG.isDebugEnabled()) { LOG.debug(String.format("Checking '%s'", agg.getExprString())); } boolean result = checkExpressions(agg.getParameters()); FunctionInfo fi = FunctionRegistry.getFunctionInfo(agg.getGenericUDAFName()); result = result && (fi != null) && fi.isNative(); if (!result) { LOG.info("Aggregator is not native: " + agg.getExprString()); } return result; }
for (int i = 0; i < aggrs.size(); i++) { AggregationDesc aggr = aggrs.get(i); ArrayList<ExprNodeDesc> parameters = aggr.getParameters(); aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()]; aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters String name = names[names.length - 2]; int tag = Integer.parseInt(name.split("\\:")[1]); if (aggr.getDistinct()) { if (aggr.getDistinct()) { aggregationIsDistinct[i] = true; for (int i = 0; i < aggregationEvaluators.length; i++) { AggregationDesc agg = conf.getAggregators().get(i); aggregationEvaluators[i] = agg.getGenericUDAFEvaluator(); .get(i).getMode(), aggregationParameterObjectInspectors[i]);
if (aggr.getDistinct()) { ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>(); ExprNodeDesc param = aggr.getParameters().get(0); assert param instanceof ExprNodeColumnDesc; ExprNodeColumnDesc paramC = (ExprNodeColumnDesc) param; paramC.setColumn("VALUE._col" + pos); parameters.add(paramC); aggr.setParameters(parameters); aggr.setDistinct(false); aggr.setMode(Mode.FINAL);
Mode amode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.HASH, udafAttr.isDistinctUDAF); aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udafAttr.udafEvaluator, udafAttr.udafParams, udafAttr.isDistinctUDAF, amode)); GenericUDAFInfo udafInfo;
if (newAggrList != null && newAggrList.size() > 0) { for (AggregationDesc aggregationDesc : newAggrList) { rewriteQueryCtx.setEval(aggregationDesc.getGenericUDAFEvaluator()); aggrExprNode = (ExprNodeColumnDesc) aggregationDesc.getParameters().get(0); rewriteQueryCtx.setAggrExprNode(aggrExprNode); if (childAggrList != null && childAggrList.size() > 0) { for (AggregationDesc aggregationDesc : childAggrList) { List<ExprNodeDesc> paraList = aggregationDesc.getParameters(); List<ObjectInspector> parametersOIList = new ArrayList<ObjectInspector>(); for (ExprNodeDesc expr : paraList) { aggregationDesc.setGenericUDAFEvaluator(evaluator); aggregationDesc.setGenericUDAFName("sum");
cGBYr.getConf().setAggregators(cGBYm.getConf().getAggregators()); for (AggregationDesc aggr : cGBYm.getConf().getAggregators()) { aggr.setMode(GenericUDAFEvaluator.Mode.COMPLETE); aggr.setParameters(ExprNodeDescUtils.backtrack(aggr.getParameters(), cGBYr, cRS));
GenericUDAFEvaluator genericUDAFEvaluator = desc.getGenericUDAFEvaluator(); GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); aggregations.add(new AggregationDesc(desc.getGenericUDAFName(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, amode)); String f = SemanticAnalyzer.getColumnInternalName(aggregations.size());
List<AggregationDesc> aggrList = conf.getAggregators(); if (aggrList == null || aggrList.size() != 1 || !("count".equals(aggrList.get(0).getGenericUDAFName()))) { return null; } else { List<ExprNodeDesc> para = aggrList.get(0).getParameters(); if (para == null || para.size() == 0 || para.size() > 1) { canApplyCtx.setAggParameterException(true);
AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", aggFnOIs, false, false), params, false, Mode.PARTIAL1); AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", aggFnOIs, false, false), params, false, Mode.PARTIAL1); AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", aggFnOIs, false, false), params, false, Mode.PARTIAL1); GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator(); bloomFilterEval.setSourceOperator(selectOp); bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES)); bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR)); bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval); aggs.add(min); aggs.add(max); AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", minFinalFnOIs, false, false), minFinalParams, false, Mode.FINAL); AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", maxFinalFnOIs, false, false), maxFinalParams, false, Mode.FINAL); AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", bloomFilterFinalFnOIs,
@Override public void init(AggregationDesc desc) throws HiveException { resultWriter = VectorExpressionWriterFactory.genVectorExpressionWritable( desc.getParameters().get(0)); }
paramOIs.add(exprNodeDesc.getWritableObjectInspector()); AggregationDesc aggDesc = new AggregationDesc("sum", FunctionRegistry.getGenericUDAFEvaluator("sum", paramOIs, false, false), params, new VectorAggregationDesc[] { new VectorAggregationDesc( aggDesc.getGenericUDAFName(), new GenericUDAFSum.GenericUDAFSumLong(), aggDesc.getMode(), TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, null, TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, VectorUDAFCountStar.class)});
public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) throws HiveException { ArrayList<ExprNodeDesc> paramDescList = desc.getParameters(); VectorExpression[] vectorParams = new VectorExpression[paramDescList.size()]; String aggregateName = desc.getGenericUDAFName(); VectorExpressionDescriptor.ArgumentType inputType = VectorExpressionDescriptor.ArgumentType.NONE; GenericUDAFEvaluator.Mode udafEvaluatorMode = desc.getMode(); for (AggregateDefinition aggDef : aggregatesDefinition) { if (aggregateName.equalsIgnoreCase(aggDef.getName()) &&
/** * Checks if this grouping is like distinct, which means that all non-distinct grouping * columns behave like they were distinct - for example min and max operators. */ public boolean isDistinctLike() { ArrayList<AggregationDesc> aggregators = getAggregators(); for (AggregationDesc ad : aggregators) { if (!ad.getDistinct()) { GenericUDAFEvaluator udafEval = ad.getGenericUDAFEvaluator(); UDFType annot = AnnotationUtils.getAnnotation(udafEval.getClass(), UDFType.class); if (annot == null || !annot.distinctLike()) { return false; } } } return true; }
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { FileSinkOperator FS = (FileSinkOperator) nd; int shift = stack.get(stack.size() - 2) instanceof SelectOperator ? 0 : 1; GroupByOperator cGBY = (GroupByOperator) stack.get(stack.size() - 3 + shift); ReduceSinkOperator RS = (ReduceSinkOperator) stack.get(stack.size() - 4 + shift); if (RS.getConf().getNumReducers() != 1 || !RS.getConf().getKeyCols().isEmpty()) { return null; } GroupByOperator pGBY = (GroupByOperator) stack.get(stack.size() - 5 + shift); Path fileName = FS.getConf().getFinalDirName(); TableDesc tsDesc = createIntermediateFS(pGBY, fileName); for (AggregationDesc aggregation : cGBY.getConf().getAggregators()) { List<ExprNodeDesc> parameters = aggregation.getParameters(); aggregation.setParameters(ExprNodeDescUtils.backtrack(parameters, cGBY, RS)); } pctx.setFetchTabledesc(tsDesc); pctx.setFetchSource(cGBY); pctx.setFetchSink(SimpleFetchOptimizer.replaceFSwithLS(FS, "NULL")); RS.setParentOperators(null); RS.setChildOperators(null); cGBY.setParentOperators(null); return null; }
static ArrayList<GenericUDAFEvaluator> getUDAFEvaluators( ArrayList<AggregationDesc> aggs) { ArrayList<GenericUDAFEvaluator> result = new ArrayList<GenericUDAFEvaluator>(); for (int i = 0; i < aggs.size(); i++) { result.add(aggs.get(i).getGenericUDAFEvaluator()); } return result; }
private static Pair<GroupByDesc,VectorGroupByDesc> buildGroupByDescCountStar( VectorizationContext ctx) { AggregationDesc agg = buildAggregationDescCountStar(ctx); ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>(); aggs.add(agg); ArrayList<String> outputColumnNames = new ArrayList<String>(); outputColumnNames.add("_col0"); GroupByDesc desc = new GroupByDesc(); VectorGroupByDesc vectorDesc = new VectorGroupByDesc(); vectorDesc.setVecAggrDescs( new VectorAggregationDesc[] { new VectorAggregationDesc( agg.getGenericUDAFName(), new GenericUDAFCount.GenericUDAFCountEvaluator(), agg.getMode(), null, ColumnVector.Type.NONE, null, TypeInfoFactory.longTypeInfo, ColumnVector.Type.LONG, VectorUDAFCountStar.class)}); vectorDesc.setProcessingMode(VectorGroupByDesc.ProcessingMode.HASH); desc.setOutputColumnNames(outputColumnNames); desc.setAggregators(aggs); return new Pair<GroupByDesc,VectorGroupByDesc>(desc, vectorDesc); }
boolean removeSemiJoin = false; for (AggregationDesc agg : aggregationDescs) { if (agg.getGenericUDAFName() != "bloom_filter") { continue; (GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator(); long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries(); if (expectedEntries == -1 || expectedEntries >