private MiningModel encodeMultinomialClassification(List<TreeModel> treeModels, Double initF, Schema schema){ CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<Model> miningModels = new ArrayList<>(); for(int i = 0, columns = categoricalLabel.size(), rows = (treeModels.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(treeModels, rows, columns, i), initF, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); }
@Override public Model encodeModel(Schema schema){ int[] shape = getCoefShape(); int numberOfClasses = shape[0]; int numberOfFeatures = shape[1]; boolean hasProbabilityDistribution = hasProbabilityDistribution(); List<? extends Number> coef = getCoef(); List<? extends Number> intercepts = getIntercept(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); List<? extends Feature> features = schema.getFeatures(); if(numberOfClasses == 1){ ClassifierUtil.checkSize(2, categoricalLabel); return RegressionModelUtil.createBinaryLogisticClassification(features, ValueUtil.asDoubles(CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, 0)), ValueUtil.asDouble(intercepts.get(0)), RegressionModel.NormalizationMethod.LOGIT, hasProbabilityDistribution, schema); } else if(numberOfClasses >= 3){ ClassifierUtil.checkSize(numberOfClasses, categoricalLabel); Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), Collections.emptyList()); List<RegressionModel> regressionModels = new ArrayList<>(); for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){ RegressionModel regressionModel = RegressionModelUtil.createRegression(features, ValueUtil.asDoubles(CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i)), ValueUtil.asDouble(intercepts.get(i)), RegressionModel.NormalizationMethod.LOGIT, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE)); regressionModels.add(regressionModel); } return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SIMPLEMAX, hasProbabilityDistribution, schema); } else { throw new IllegalArgumentException(); } }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, columns = categoricalLabel.size(), rows = (regTrees.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(regTrees, rows, columns, i), base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
List<Double> categoryWts = CMatrixUtil.getRow(wts.getValues(), categoricalLabel.size(), 1 + (features.size() + 1), i);
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, columns = categoricalLabel.size(), rows = (regTrees.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(regTrees, rows, columns, i), base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
RegressionModel firstRegressionModel = RegressionModelUtil.createRegression(features, ValueUtil.asDoubles(CMatrixUtil.getRow(coef, 1, numberOfFeatures, 0)), ValueUtil.asDouble(intercepts.get(0)), null, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE)); RegressionTable regressionTable = RegressionModelUtil.createRegressionTable(features, ValueUtil.asDoubles(CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i)), ValueUtil.asDouble(intercepts.get(i))) .setTargetCategory(categoricalLabel.getValue(i));
List<Double> weights = (List)CMatrixUtil.getColumn(coefMatrix, rows, columns, column); Double bias = ValueUtil.asDouble((Number)interceptVector.get(column));
List<? extends Number> component = CMatrixUtil.getRow(components, numberOfComponents, numberOfFeatures, i);
VerificationField verificationField = ModelUtil.createVerificationField(FieldName.create(activeFields.get(i))); data.put(verificationField, CMatrixUtil.getColumn(activeValues, rows, activeFields.size(), i)); .setZeroThreshold(zeroThreshold.doubleValue()); data.put(verificationField, CMatrixUtil.getColumn(probabilityValues, rows, probabilityFields.size(), i)); data.put(verificationField, CMatrixUtil.getColumn(targetValues, rows, targetFields.size(), i));
@Override public ClusteringModel encodeModel(Schema schema){ int[] shape = getClusterCentersShape(); int numberOfClusters = shape[0]; int numberOfFeatures = shape[1]; List<? extends Number> clusterCenters = getClusterCenters(); List<Integer> labels = getLabels(); Multiset<Integer> labelCounts = HashMultiset.create(); if(labels != null){ labelCounts.addAll(labels); } List<Cluster> clusters = new ArrayList<>(); for(int i = 0; i < numberOfClusters; i++){ Cluster cluster = new Cluster() .setId(String.valueOf(i)) .setSize((labelCounts.size () > 0 ? labelCounts.count(i) : null)) .setArray(PMMLUtil.createRealArray(CMatrixUtil.getRow(clusterCenters, numberOfClusters, numberOfFeatures, i))); clusters.add(cluster); } ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE) .setCompareFunction(CompareFunction.ABS_DIFF) .setMeasure(new SquaredEuclidean()); ClusteringModel clusteringModel = new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, numberOfClusters, ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters) .setOutput(ClusteringModelUtil.createOutput(FieldName.create("Cluster"), DataType.DOUBLE, clusters)); return clusteringModel; }
data.put(instanceField.getColumn(), CMatrixUtil.getColumn(fitX, numberOfInstances, numberOfFeatures, i));
.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.SUM, CMatrixUtil.getRow(treeModels, ntreesPerGroup, ntreeGroups, i))) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("drfValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));
@Override public NaiveBayesModel encodeModel(Schema schema){ int[] shape = getThetaShape(); int numberOfClasses = shape[0]; int numberOfFeatures = shape[1]; List<? extends Number> theta = getTheta(); List<? extends Number> sigma = getSigma(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); BayesInputs bayesInputs = new BayesInputs(); for(int i = 0; i < numberOfFeatures; i++){ Feature feature = schema.getFeature(i); List<? extends Number> means = CMatrixUtil.getColumn(theta, numberOfClasses, numberOfFeatures, i); List<? extends Number> variances = CMatrixUtil.getColumn(sigma, numberOfClasses, numberOfFeatures, i); ContinuousFeature continuousFeature = feature.toContinuousFeature(); BayesInput bayesInput = new BayesInput(continuousFeature.getName()) .setTargetValueStats(encodeTargetValueStats(categoricalLabel.getValues(), means, variances)); bayesInputs.addBayesInputs(bayesInput); } List<Integer> classCount = getClassCount(); BayesOutput bayesOutput = new BayesOutput(categoricalLabel.getName(), null) .setTargetValueCounts(encodeTargetValueCounts(categoricalLabel.getValues(), classCount)); NaiveBayesModel naiveBayesModel = new NaiveBayesModel(0d, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), bayesInputs, bayesOutput) .setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel)); return naiveBayesModel; }
.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.SUM, CMatrixUtil.getRow(treeModels, ntreesPerGroup, ntreeGroups, i))) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));
List<? extends TreeRegressor> columnEstimators = CMatrixUtil.getColumn(estimators, rows, columns, i);