/** * @param pmml PMML model to add extension to, with a single {@code String} content and no value. * The content is encoded as if they were being added to a PMML {@link Array} and are * space-separated with PMML quoting rules * @param key extension key * @param content list of values to add as a {@code String} */ public static void addExtensionContent(PMML pmml, String key, Collection<?> content) { if (content.isEmpty()) { return; } String joined = TextUtils.joinPMMLDelimited(content); pmml.addExtensions(new Extension().setName(key).addContent(joined)); }
@Test public void testJoinPMMLDelimited() { assertEquals("ab \"a b\" \"with \\\"quotes\\\" \"", TextUtils.joinPMMLDelimited(Arrays.asList("ab", "a b", "with \"quotes\" "))); assertEquals("1 22 3", TextUtils.joinPMMLDelimited(Arrays.asList("1", "22", "3"))); assertEquals("\" c\\\" d \\\"e \" \" c\\\" d \\\"e \"", TextUtils.joinPMMLDelimited(Arrays.asList(" c\" d \"e ", " c\" d \"e "))); }
private Predicate buildPredicate(Split split, CategoricalValueEncodings categoricalValueEncodings) { if (split == null) { // Left child always applies, but is evaluated second return new True(); } int featureIndex = inputSchema.predictorToFeatureIndex(split.feature()); FieldName fieldName = FieldName.create(inputSchema.getFeatureNames().get(featureIndex)); if (split.featureType().equals(FeatureType.Categorical())) { // Note that categories in MLlib model select the *left* child but the // convention here will be that the predicate selects the *right* child // So the predicate will evaluate "not in" this set // More ugly casting @SuppressWarnings("unchecked") Collection<Double> javaCategories = (Collection<Double>) (Collection<?>) JavaConversions.seqAsJavaList(split.categories()); Set<Integer> negativeEncodings = javaCategories.stream().map(Double::intValue).collect(Collectors.toSet()); Map<Integer,String> encodingToValue = categoricalValueEncodings.getEncodingValueMap(featureIndex); List<String> negativeValues = negativeEncodings.stream().map(encodingToValue::get).collect(Collectors.toList()); String joinedValues = TextUtils.joinPMMLDelimited(negativeValues); return new SimpleSetPredicate(fieldName, SimpleSetPredicate.BooleanOperator.IS_NOT_IN, new Array(Array.Type.STRING, joinedValues)); } else { // For MLlib, left means <= threshold, so right means > return new SimplePredicate(fieldName, SimplePredicate.Operator.GREATER_THAN) .setValue(Double.toString(split.threshold())); } }
/** * @param pmml PMML model to add extension to, with a single {@code String} content and no value. * The content is encoded as if they were being added to a PMML {@link Array} and are * space-separated with PMML quoting rules * @param key extension key * @param content list of values to add as a {@code String} */ public static void addExtensionContent(PMML pmml, String key, Collection<?> content) { if (content.isEmpty()) { return; } String joined = TextUtils.joinPMMLDelimited(content); pmml.addExtensions(new Extension().setName(key).addContent(joined)); }