@Override public Feature apply(Feature feature){ if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; return binaryFeature; } else { ContinuousFeature continuousFeature = feature.toContinuousFeature(dataType); return continuousFeature; } } };
@Override public ContinuousFeature toContinuousFeature(){ return encodedFeature.toContinuousFeature(); } };
@Override public Feature[] apply(Feature feature){ Feature[] features = new Feature[degree]; if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; Arrays.fill(features, binaryFeature); } else { features[0] = feature; ContinuousFeature continuousFeature = feature.toContinuousFeature(); for(int i = 2; i <= degree; i++){ features[i - 1] = new PowerFeature(encoder, continuousFeature, i); } } return features; } };
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ Integer power = getPower(); List<Feature> result = new ArrayList<>(); for(Feature feature : features){ if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; result.add(binaryFeature); } else { ContinuousFeature continuousFeature = feature.toContinuousFeature(); result.add(new PowerFeature(encoder, continuousFeature, power)); } } return result; }
@Override public Feature apply(Feature feature){ if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; return binaryFeature; } else { ContinuousFeature continuousFeature = feature.toContinuousFeature(); DataType dataType = continuousFeature.getDataType(); switch(dataType){ case INTEGER: case FLOAT: break; case DOUBLE: continuousFeature = continuousFeature.toContinuousFeature(DataType.FLOAT); break; default: throw new IllegalArgumentException(); } return continuousFeature; } } };
@Override public Feature apply(Feature feature){ if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; return binaryFeature; } else { ContinuousFeature continuousFeature = feature.toContinuousFeature(); DataType dataType = continuousFeature.getDataType(); switch(dataType){ case INTEGER: case FLOAT: break; case DOUBLE: continuousFeature = continuousFeature.toContinuousFeature(DataType.FLOAT); break; default: throw new IllegalArgumentException(); } return continuousFeature; } } };
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ UFunc func = getFunc(); if(func == null){ return features; } List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ ContinuousFeature continuousFeature = (features.get(i)).toContinuousFeature(); DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName(func.getName(), continuousFeature), OpType.CONTINUOUS, DataType.DOUBLE, () -> encodeUFunc(func, continuousFeature.ref())); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ Number threshold = getThreshold(); List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ Feature feature = features.get(i); ContinuousFeature continuousFeature = feature.toContinuousFeature(); // "($name <= threshold) ? 0 : 1" Apply apply = PMMLUtil.createApply("threshold", continuousFeature.ref(), PMMLUtil.createConstant(threshold)); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("binarizer", continuousFeature), apply); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<? extends Number> scale = getScale(); ClassDictUtil.checkSize(features, scale); List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ Feature feature = features.get(i); Number value = scale.get(i); if(ValueUtil.isOne(value)){ result.add(feature); continue; } ContinuousFeature continuousFeature = feature.toContinuousFeature(); // "$name / scale" Apply apply = PMMLUtil.createApply("/", continuousFeature.ref(), PMMLUtil.createConstant(value)); DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("max_abs_scaler", continuousFeature), apply); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
ContinuousFeature continuousFeature = feature.toContinuousFeature();
@Override public ContinuousFeature toContinuousFeature(){ Supplier<Apply> applySupplier = () -> { Feature feature = getFeature(); Number factor = getFactor(); return PMMLUtil.createApply("*", (feature.toContinuousFeature()).ref(), PMMLUtil.createConstant(factor)); }; return toContinuousFeature(name, DataType.DOUBLE, applySupplier); } };
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<? extends Number> min = getMin(); List<? extends Number> scale = getScale(); ClassDictUtil.checkSize(features, min, scale); List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ Feature feature = features.get(i); Number minValue = min.get(i); Number scaleValue = scale.get(i); if(ValueUtil.isOne(scaleValue) && ValueUtil.isZero(minValue)){ result.add(feature); continue; } ContinuousFeature continuousFeature = feature.toContinuousFeature(); // "($name * scale) + min" Expression expression = continuousFeature.ref(); if(!ValueUtil.isOne(scaleValue)){ expression = PMMLUtil.createApply("*", expression, PMMLUtil.createConstant(scaleValue)); } // End if if(!ValueUtil.isZero(minValue)){ expression = PMMLUtil.createApply("+", expression, PMMLUtil.createConstant(minValue)); } DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("mix_max_scaler", continuousFeature), expression); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
ContinuousFeature continuousFeature = feature.toContinuousFeature();
ContinuousFeature continuousFeature = feature.toContinuousFeature();
Feature feature = features.get(j); ContinuousFeature continuousFeature = feature.toContinuousFeature();
ContinuousFeature continuousFeature = feature.toContinuousFeature();
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ Bucketizer transformer = getTransformer(); Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); ContinuousFeature continuousFeature = feature.toContinuousFeature(); Discretize discretize = new Discretize(continuousFeature.getName()); List<String> categories = new ArrayList<>(); double[] splits = transformer.getSplits(); for(int i = 0; i < (splits.length - 1); i++){ String category = String.valueOf(i); categories.add(category); Interval interval = new Interval((i < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED) .setLeftMargin(formatMargin(splits[i])) .setRightMargin(formatMargin(splits[i + 1])); DiscretizeBin discretizeBin = new DiscretizeBin(category, interval); discretize.addDiscretizeBins(discretizeBin); } DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.INTEGER, discretize); return Collections.singletonList(new CategoricalFeature(encoder, derivedField, categories)); }
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ Binarizer transformer = getTransformer(); Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); ContinuousFeature continuousFeature = feature.toContinuousFeature(); Apply apply = new Apply("if") .addExpressions(PMMLUtil.createApply("lessOrEqual", continuousFeature.ref(), PMMLUtil.createConstant(transformer.getThreshold()))) .addExpressions(PMMLUtil.createConstant(0d), PMMLUtil.createConstant(1d)); DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.DOUBLE, apply); return Collections.singletonList(new CategoricalFeature(encoder, derivedField, Arrays.asList("0", "1"))); } }
Feature feature = features.get(i); ContinuousFeature continuousFeature = feature.toContinuousFeature();
@Override public NaiveBayesModel encodeModel(Schema schema){ int[] shape = getThetaShape(); int numberOfClasses = shape[0]; int numberOfFeatures = shape[1]; List<? extends Number> theta = getTheta(); List<? extends Number> sigma = getSigma(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); BayesInputs bayesInputs = new BayesInputs(); for(int i = 0; i < numberOfFeatures; i++){ Feature feature = schema.getFeature(i); List<? extends Number> means = CMatrixUtil.getColumn(theta, numberOfClasses, numberOfFeatures, i); List<? extends Number> variances = CMatrixUtil.getColumn(sigma, numberOfClasses, numberOfFeatures, i); ContinuousFeature continuousFeature = feature.toContinuousFeature(); BayesInput bayesInput = new BayesInput(continuousFeature.getName()) .setTargetValueStats(encodeTargetValueStats(categoricalLabel.getValues(), means, variances)); bayesInputs.addBayesInputs(bayesInput); } List<Integer> classCount = getClassCount(); BayesOutput bayesOutput = new BayesOutput(categoricalLabel.getName(), null) .setTargetValueCounts(encodeTargetValueCounts(categoricalLabel.getValues(), classCount)); NaiveBayesModel naiveBayesModel = new NaiveBayesModel(0d, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), bayesInputs, bayesOutput) .setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel)); return naiveBayesModel; }