.addOutputFields(ModelUtil.createProbabilityField(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), DataType.DOUBLE, categoricalLabel.getValue(i))); Schema segmentSchema = new Schema(new CategoricalLabel(null, DataType.STRING, Arrays.asList("(other)", categoricalLabel.getValue(i))), schema.getFeatures());
public List<ScoreDistribution> createScoreDistribution(CategoricalLabel categoricalLabel, double[] recordCounts){ List<ScoreDistribution> result = new ArrayList<>(); for(int i = 0; i < categoricalLabel.size(); i++){ String value = categoricalLabel.getValue(i); double recordCount = recordCounts[i]; ScoreDistribution scoreDistribution = new InternableScoreDistribution() .setValue(value) .setRecordCount(recordCount); scoreDistribution = intern(scoreDistribution); result.add(scoreDistribution); } return result; }
@Override public List<OutputField> registerOutputFields(Label label, SparkMLEncoder encoder){ MultilayerPerceptronClassificationModel model = getTransformer(); List<OutputField> result = super.registerOutputFields(label, encoder); if(!(model instanceof HasProbabilityCol)){ CategoricalLabel categoricalLabel = (CategoricalLabel)label; result = new ArrayList<>(result); result.addAll(ModelUtil.createProbabilityFields(DataType.DOUBLE, categoricalLabel.getValues())); } return result; }
for(int i = 0; i < categoricalLabel.size(); i++){ String category = String.valueOf(i); OutputField pmmlPredictedField = ModelUtil.createPredictedField(FieldName.create("pmml(" + predictionCol + ")"), categoricalLabel.getDataType(), OpType.CATEGORICAL); MapValues mapValues = PMMLUtil.createMapValues(pmmlPredictedField.getName(), categoricalLabel.getValues(), categories); for(int i = 0; i < categoricalLabel.size(); i++){ String value = categoricalLabel.getValue(i);
@Override public Label encodeLabel(FieldName targetField, List<String> targetCategories, PMMLEncoder encoder){ targetCategories = prepareTargetCategories(targetCategories); DataField dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories); return new CategoricalLabel(dataField); }
BooleanFeature booleanFeature = (BooleanFeature)feature; label = new CategoricalLabel(booleanFeature.getName(), booleanFeature.getDataType(), booleanFeature.getValues()); } else label = new CategoricalLabel(dataField); } else label = new CategoricalLabel(field.getName(), field.getDataType(), categories); } else if(numClasses != categoricalLabel.size()){ throw new IllegalArgumentException("Expected " + numClasses + " target categories, got " + categoricalLabel.size() + " target categories");
label = new CategoricalLabel(dataField); CategoricalLabel categoricalLabel = (CategoricalLabel)label; List<OutputField> predictProbaFields = ModelUtil.createProbabilityFields(DataType.DOUBLE, categoricalLabel.getValues()); List<String> values = categoricalLabel.getValues(); for(String value : values){ probabilityFields.add("probability(" + value + ")"); // XXX
@Override public String encode(Integer value){ return categoricalLabel.getValue(value - 1); } };
@Override public NaiveBayesModel encodeModel(Schema schema){ int[] shape = getThetaShape(); int numberOfClasses = shape[0]; int numberOfFeatures = shape[1]; List<? extends Number> theta = getTheta(); List<? extends Number> sigma = getSigma(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); BayesInputs bayesInputs = new BayesInputs(); for(int i = 0; i < numberOfFeatures; i++){ Feature feature = schema.getFeature(i); List<? extends Number> means = CMatrixUtil.getColumn(theta, numberOfClasses, numberOfFeatures, i); List<? extends Number> variances = CMatrixUtil.getColumn(sigma, numberOfClasses, numberOfFeatures, i); ContinuousFeature continuousFeature = feature.toContinuousFeature(); BayesInput bayesInput = new BayesInput(continuousFeature.getName()) .setTargetValueStats(encodeTargetValueStats(categoricalLabel.getValues(), means, variances)); bayesInputs.addBayesInputs(bayesInput); } List<Integer> classCount = getClassCount(); BayesOutput bayesOutput = new BayesOutput(categoricalLabel.getName(), null) .setTargetValueCounts(encodeTargetValueCounts(categoricalLabel.getValues(), classCount)); NaiveBayesModel naiveBayesModel = new NaiveBayesModel(0d, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), bayesInputs, bayesOutput) .setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel)); return naiveBayesModel; }
.setField(categoricalLabel.getName());
@Override public Label encodeLabel(FieldName targetField, List<String> targetCategories, PMMLEncoder encoder){ targetCategories = prepareTargetCategories(targetCategories); DataField dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories); return new CategoricalLabel(dataField); }
public static MiningModel encodeMiningModel(List<List<RegressionTree>> regTrees, float base_score, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); int numClasses = regTrees.size(); for (int l=0;l<numClasses;l++){ MiningModel miningModel = createMiningModel(regTrees.get(l), base_score, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("class_(" + categoricalLabel.getValue(l) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); }
if(categoricalLabel.size() != layers[layers.length - 1]){ throw new IllegalArgumentException();
CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); if(categoricalLabel.size() != probabilities.size()){ throw new IllegalArgumentException(); for(int i = 0; i < categoricalLabel.size(); i++){ String value = categoricalLabel.getValue(i); Double probability = probabilities.getValue(i);
@Override public Label encodeLabel(FieldName targetField, List<String> targetCategories, PMMLEncoder encoder){ targetCategories = prepareTargetCategories(targetCategories); DataField dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories); return new CategoricalLabel(dataField); }
@Override public void encode(Node node, LeafNode leafNode){ int index = ValueUtil.asInt(leafNode.prediction()); node.setScore(this.categoricalLabel.getValue(index)); ImpurityCalculator impurityCalculator = leafNode.impurityStats(); node.setRecordCount((double)impurityCalculator.count()); double[] stats = impurityCalculator.stats(); for(int i = 0; i < stats.length; i++){ ScoreDistribution scoreDistribution = new ScoreDistribution(this.categoricalLabel.getValue(i), stats[i]); node.addScoreDistributions(scoreDistribution); } } };
@Override public List<OutputField> registerOutputFields(Label label, SparkMLEncoder encoder){ List<OutputField> result = super.registerOutputFields(label, encoder); MiningFunction miningFunction = getMiningFunction(); switch(miningFunction){ case CLASSIFICATION: CategoricalLabel categoricalLabel = (CategoricalLabel)label; result = new ArrayList<>(result); result.addAll(ModelUtil.createProbabilityFields(DataType.DOUBLE, categoricalLabel.getValues())); break; default: break; } return result; }
if(beta.size() != categoricalLabel.size() * (features.size() + 1)){ throw new IllegalArgumentException(); for(int i = 0; i < categoricalLabel.size(); i++){ List<Double> coefficients = beta.subList(offset, offset + features.size()); Double intercept = beta.get(offset + features.size()); .setTargetCategory(categoricalLabel.getValue(i));