@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ Number threshold = getThreshold(); List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ Feature feature = features.get(i); ContinuousFeature continuousFeature = feature.toContinuousFeature(); // "($name <= threshold) ? 0 : 1" Apply apply = PMMLUtil.createApply("threshold", continuousFeature.ref(), PMMLUtil.createConstant(threshold)); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("binarizer", continuousFeature), apply); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
FieldName name = FeatureUtil.getName(powerFeature);
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ UFunc func = getFunc(); if(func == null){ return features; } List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ ContinuousFeature continuousFeature = (features.get(i)).toContinuousFeature(); DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName(func.getName(), continuousFeature), OpType.CONTINUOUS, DataType.DOUBLE, () -> encodeUFunc(func, continuousFeature.ref())); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
@Override public void addFeature(Feature feature){ FieldName name = FeatureUtil.getName(feature); DataField dataField = getDataField(name); if(dataField != null){ Expression expression = feature.ref(); Expression transformedExpression = encodeExpression(name, expression); if(!(expression).equals(transformedExpression)){ DerivedField derivedField = createDerivedField(FieldName.create("preProcess(" + name.getValue() + ")"), OpType.CONTINUOUS, DataType.DOUBLE, transformedExpression); feature = new ContinuousFeature(PreProcessEncoder.this, derivedField); } } super.addFeature(feature); }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<? extends Number> scale = getScale(); ClassDictUtil.checkSize(features, scale); List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ Feature feature = features.get(i); Number value = scale.get(i); if(ValueUtil.isOne(value)){ result.add(feature); continue; } ContinuousFeature continuousFeature = feature.toContinuousFeature(); // "$name / scale" Apply apply = PMMLUtil.createApply("/", continuousFeature.ref(), PMMLUtil.createConstant(value)); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("max_abs_scaler", continuousFeature), apply); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
if(characteristic == null){ characteristic = new Characteristic() .setName("score(" + FeatureUtil.getName(feature) + ")");
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ String function = getFunction(); if(features.size() <= 1){ return features; } Apply apply = new Apply(translateFunction(function)); for(Feature feature : features){ apply.addExpressions(feature.ref()); } FieldName name = FeatureUtil.createName(function, features); DerivedField derivedField = encoder.createDerivedField(name, OpType.CONTINUOUS, DataType.DOUBLE, apply); return Collections.singletonList(new ContinuousFeature(encoder, derivedField)); }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<?> classes = getClasses(); ClassDictUtil.checkSize(1, features); Feature feature = features.get(0); List<String> inputCategories = new ArrayList<>(); List<String> outputCategories = new ArrayList<>(); for(int i = 0; i < classes.size(); i++){ inputCategories.add(ValueUtil.formatValue(classes.get(i))); outputCategories.add(ValueUtil.formatValue(i)); } Supplier<MapValues> mapValuesSupplier = () -> { encoder.toCategorical(feature.getName(), inputCategories); return PMMLUtil.createMapValues(feature.getName(), inputCategories, outputCategories); }; DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName("label_encoder", feature), OpType.CATEGORICAL, DataType.INTEGER, mapValuesSupplier); Feature encodedFeature = new CategoricalFeature(encoder, derivedField, outputCategories); Feature result = new CategoricalFeature(encoder, feature, inputCategories){ @Override public ContinuousFeature toContinuousFeature(){ return encodedFeature.toContinuousFeature(); } }; return Collections.singletonList(result); }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ GregorianCalendar epoch = getEpoch(); String function = getFunction(); ZoneId epochZoneId = ZoneId.systemDefault(); TimeZone epochTimeZone = epoch.getTimeZone(); if(epochTimeZone != null){ epochZoneId = epochTimeZone.toZoneId(); } LocalDateTime epochDateTime = LocalDateTime.ofInstant(epoch.toInstant(), epochZoneId); if(epochDateTime.getMonthValue() != 1 || epochDateTime.getDayOfMonth() != 1){ throw new IllegalArgumentException(String.valueOf(epochDateTime)); } List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ ObjectFeature objectFeature = (ObjectFeature)features.get(i); Apply apply = PMMLUtil.createApply(function, objectFeature.ref(), PMMLUtil.createConstant(epochDateTime.getYear(), DataType.INTEGER)); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("days_since_year", objectFeature), OpType.CONTINUOUS, DataType.INTEGER, apply); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
FieldName name = FeatureUtil.createName("lookup", features);
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ Tokenizer transformer = getTransformer(); Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); Apply apply = PMMLUtil.createApply("lowercase", feature.ref()); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply); return Collections.singletonList(new DocumentFeature(encoder, derivedField, "\\s+")); } }
FieldName name = (classes.size() > 1 ? FeatureUtil.createName("label_binarizer", feature, i) : FeatureUtil.createName("label_binarizer", feature));
Apply apply = PMMLUtil.createApply("lowercase", feature.ref()); DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, () -> apply);
DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("scale", feature), OpType.CONTINUOUS, DataType.DOUBLE, apply);
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<? extends Number> min = getMin(); List<? extends Number> scale = getScale(); ClassDictUtil.checkSize(features, min, scale); List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ Feature feature = features.get(i); Number minValue = min.get(i); Number scaleValue = scale.get(i); if(ValueUtil.isOne(scaleValue) && ValueUtil.isZero(minValue)){ result.add(feature); continue; } ContinuousFeature continuousFeature = feature.toContinuousFeature(); // "($name * scale) + min" Expression expression = continuousFeature.ref(); if(!ValueUtil.isOne(scaleValue)){ expression = PMMLUtil.createApply("*", expression, PMMLUtil.createConstant(scaleValue)); } // End if if(!ValueUtil.isZero(minValue)){ expression = PMMLUtil.createApply("+", expression, PMMLUtil.createConstant(minValue)); } DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("mix_max_scaler", continuousFeature), expression); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ String function = getFunction(); Boolean trimBlanks = getTrimBlanks(); if(function == null && !trimBlanks){ return features; } List<Feature> result = new ArrayList<>(); for(Feature feature : features){ Expression expression = feature.ref(); if(function != null){ expression = PMMLUtil.createApply(function, expression); } // End if if(trimBlanks){ expression = PMMLUtil.createApply("trimBlanks", expression); } Field<?> field = encoder.toCategorical(feature.getName(), Collections.emptyList()); // XXX: Should have been set by the previous transformer field.setDataType(DataType.STRING); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("normalize", feature), OpType.CATEGORICAL, DataType.STRING, expression); feature = new StringFeature(encoder, derivedField); result.add(feature); } return result; }
DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("robust_scaler", continuousFeature), expression);
DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("standard_scaler", continuousFeature), expression);
DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("cut", feature), OpType.CATEGORICAL, (labels != null ? DataType.STRING : DataType.INTEGER), discretize);
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ RegexTokenizer transformer = getTransformer(); if(!transformer.getGaps()){ throw new IllegalArgumentException("Expected splitter mode, got token matching mode"); } // End if if(transformer.getMinTokenLength() != 1){ throw new IllegalArgumentException("Expected 1 as minimum token length, got " + transformer.getMinTokenLength() + " as minimum token length"); } Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); Field<?> field = feature.getField(); if(transformer.getToLowercase()){ Apply apply = PMMLUtil.createApply("lowercase", feature.ref()); field = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply); } return Collections.singletonList(new DocumentFeature(encoder, field, transformer.getPattern())); } }