private Predicate buildPredicate(Split split, CategoricalValueEncodings categoricalValueEncodings) { if (split == null) { // Left child always applies, but is evaluated second return new True(); } int featureIndex = inputSchema.predictorToFeatureIndex(split.feature()); FieldName fieldName = FieldName.create(inputSchema.getFeatureNames().get(featureIndex)); if (split.featureType().equals(FeatureType.Categorical())) { // Note that categories in MLlib model select the *left* child but the // convention here will be that the predicate selects the *right* child // So the predicate will evaluate "not in" this set // More ugly casting @SuppressWarnings("unchecked") Collection<Double> javaCategories = (Collection<Double>) (Collection<?>) JavaConversions.seqAsJavaList(split.categories()); Set<Integer> negativeEncodings = javaCategories.stream().map(Double::intValue).collect(Collectors.toSet()); Map<Integer,String> encodingToValue = categoricalValueEncodings.getEncodingValueMap(featureIndex); List<String> negativeValues = negativeEncodings.stream().map(encodingToValue::get).collect(Collectors.toList()); String joinedValues = TextUtils.joinPMMLDelimited(negativeValues); return new SimpleSetPredicate(fieldName, SimpleSetPredicate.BooleanOperator.IS_NOT_IN, new Array(Array.Type.STRING, joinedValues)); } else { // For MLlib, left means <= threshold, so right means > return new SimplePredicate(fieldName, SimplePredicate.Operator.GREATER_THAN) .setValue(Double.toString(split.threshold())); } }
/** * @param values {@code double} value to make into a PMML {@link Array} * @return PMML {@link Array} representation */ public static Array toArray(double... values) { List<Double> valueList = new ArrayList<>(values.length); for (double value : values) { valueList.add(value); } String arrayValue = TextUtils.joinPMMLDelimitedNumbers(valueList); return new Array(Array.Type.REAL, arrayValue).setN(valueList.size()); }
/** * Create @Array for numerical variable * * @param columnConfig * - ColumnConfig for numerical variable * @return Array for numerical variable ( positive count + negative count ) */ private Array createCountArray(ColumnConfig columnConfig) { Array countAllArray = new Array(); List<Integer> binCountAll = new ArrayList<Integer>(columnConfig.getBinCountPos().size()); for(int i = 0; i < binCountAll.size(); i++) { binCountAll.add(columnConfig.getBinCountPos().get(i) + columnConfig.getBinCountNeg().get(i)); } countAllArray.setType(Array.Type.INT); countAllArray.setN(binCountAll.size()); countAllArray.setValue(StringUtils.join(binCountAll, ' ')); return countAllArray; }
/** * @param pmml PMML representation of Clusters * @return List of {@link ClusterInfo} */ public static List<ClusterInfo> read(PMML pmml) { Model model = pmml.getModels().get(0); Preconditions.checkArgument(model instanceof ClusteringModel); ClusteringModel clusteringModel = (ClusteringModel) model; return clusteringModel.getClusters().stream().map(cluster -> new ClusterInfo(Integer.parseInt(cluster.getId()), VectorMath.parseVector(TextUtils.parseDelimited(cluster.getArray().getValue(), ' ')), cluster.getSize()) ).collect(Collectors.toList()); }
assertEquals(ComparisonMeasure.Kind.DISTANCE, clusteringModel.getComparisonMeasure().getKind()); assertEquals(NUM_FEATURES, clusteringModel.getClusters().get(0).getArray().getN().intValue()); for (Cluster cluster : clusteringModel.getClusters()) { assertGreater(cluster.getSize(), 0);
static public List<? extends Number> asNumberList(Array array){ List<?> content = getContent(array); Array.Type type = array.getType(); if(type == null){ throw new MissingAttributeException(array, PMMLAttributes.ARRAY_TYPE); } switch(type){ case INT: case REAL: return (List)content; case STRING: throw new InvalidElementException(array); default: throw new UnsupportedAttributeException(array, type); } }
Map<String,Integer> valueEncodingMap = categoricalValueEncodings.getValueEncodingMap(featureNumber); String[] categories = TextUtils.parseDelimited(simpleSetPredicate.getArray().getValue(), ' '); BitSet activeCategories = new BitSet(valueEncodingMap.size()); if (operator == SimpleSetPredicate.BooleanOperator.IS_IN) {
static public int getSize(Array array){ Integer n = array.getN(); if(n != null){ return n; } List<?> content = getContent(array); return content.size(); }
static public List<?> parse(Array array){ Array.Type type = array.getType(); if(type == null){ throw new MissingAttributeException(array, PMMLAttributes.ARRAY_TYPE); String value = array.getValue(); Integer n = array.getN(); if(n != null && n != tokens.size()){ throw new InvalidElementException(array);
Cluster cluster = clusters.get(id); String[] tokens = TextUtils.parseDelimited(cluster.getArray().getValue(), ' '); double[] modelCenter = VectorMath.parseVector(tokens);
.setPredicate(new SimpleSetPredicate(FieldName.create("color"), SimpleSetPredicate.BooleanOperator.IS_NOT_IN, new Array(Array.Type.STRING, "red"))); right.addScoreDistributions(new ScoreDistribution("banana", halfCount));
/** * @param values {@code double} value to make into a PMML {@link Array} * @return PMML {@link Array} representation */ public static Array toArray(double... values) { List<Double> valueList = new ArrayList<>(values.length); for (double value : values) { valueList.add(value); } String arrayValue = TextUtils.joinPMMLDelimitedNumbers(valueList); return new Array(Array.Type.REAL, arrayValue).setN(valueList.size()); }
/** * @param pmml PMML representation of Clusters * @return List of {@link ClusterInfo} */ public static List<ClusterInfo> read(PMML pmml) { Model model = pmml.getModels().get(0); Preconditions.checkArgument(model instanceof ClusteringModel); ClusteringModel clusteringModel = (ClusteringModel) model; return clusteringModel.getClusters().stream().map(cluster -> new ClusterInfo(Integer.parseInt(cluster.getId()), VectorMath.parseVector(TextUtils.parseDelimited(cluster.getArray().getValue(), ' ')), cluster.getSize()) ).collect(Collectors.toList()); }
/** * Create an instance of {@link Array } * */ public Array createArray() { return new Array(); }
private Predicate transform(SimpleSetPredicate simpleSetPredicate){ Array array = simpleSetPredicate.getArray(); String value = array.getValue(); List<String> tokens = ArrayUtil.parse(value, true); if(tokens.size() != 1){ return simpleSetPredicate; } value = tokens.get(0); SimpleSetPredicate.BooleanOperator booleanOperator = simpleSetPredicate.getBooleanOperator(); switch(booleanOperator){ case IS_IN: return createSimplePredicate(simpleSetPredicate.getField(), SimplePredicate.Operator.EQUAL, value); case IS_NOT_IN: return createSimplePredicate(simpleSetPredicate.getField(), SimplePredicate.Operator.NOT_EQUAL, value); default: break; } return simpleSetPredicate; }
/** * Create an instance of {@link Array } * */ public Array createArray() { return new Array(); }
Map<String,Integer> valueEncodingMap = categoricalValueEncodings.getValueEncodingMap(featureNumber); String[] categories = TextUtils.parseDelimited(simpleSetPredicate.getArray().getValue(), ' '); BitSet activeCategories = new BitSet(valueEncodingMap.size()); if (operator == SimpleSetPredicate.BooleanOperator.IS_IN) {
Array array = new Array(arrayStr.toString().trim(), Array.Type.fromValue("string")); p.setArray(array); if(isLeft) {