@Override public String encode(Integer value){ return categoricalLabel.getValue(value - 1); } };
public List<ScoreDistribution> createScoreDistribution(CategoricalLabel categoricalLabel, double[] recordCounts){ List<ScoreDistribution> result = new ArrayList<>(); for(int i = 0; i < categoricalLabel.size(); i++){ String value = categoricalLabel.getValue(i); double recordCount = recordCounts[i]; ScoreDistribution scoreDistribution = new InternableScoreDistribution() .setValue(value) .setRecordCount(recordCount); scoreDistribution = intern(scoreDistribution); result.add(scoreDistribution); } return result; }
public static MiningModel encodeMiningModel(List<List<RegressionTree>> regTrees, float base_score, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); int numClasses = regTrees.size(); for (int l=0;l<numClasses;l++){ MiningModel miningModel = createMiningModel(regTrees.get(l), base_score, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("class_(" + categoricalLabel.getValue(l) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); }
@Override public void encode(Node node, LeafNode leafNode){ int index = ValueUtil.asInt(leafNode.prediction()); node.setScore(this.categoricalLabel.getValue(index)); ImpurityCalculator impurityCalculator = leafNode.impurityStats(); node.setRecordCount((double)impurityCalculator.count()); double[] stats = impurityCalculator.stats(); for(int i = 0; i < stats.length; i++){ ScoreDistribution scoreDistribution = new ScoreDistribution(this.categoricalLabel.getValue(i), stats[i]); node.addScoreDistributions(scoreDistribution); } } };
.addOutputFields(ModelUtil.createProbabilityField(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), DataType.DOUBLE, categoricalLabel.getValue(i))); Schema segmentSchema = new Schema(new CategoricalLabel(null, DataType.STRING, Arrays.asList("(other)", categoricalLabel.getValue(i))), schema.getFeatures());
.setTargetCategory(categoricalLabel.getValue(i));
.setTargetCategory(categoricalLabel.getValue(i));
private MiningModel encodeMultinomialClassification(List<TreeModel> treeModels, Double initF, Schema schema){ CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<Model> miningModels = new ArrayList<>(); for(int i = 0, columns = categoricalLabel.size(), rows = (treeModels.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(treeModels, rows, columns, i), initF, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, columns = categoricalLabel.size(), rows = (regTrees.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(regTrees, rows, columns, i), base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, columns = categoricalLabel.size(), rows = (regTrees.size() / columns); i < columns; i++){ MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(regTrees, rows, columns, i), base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.FLOAT)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
@Override public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); List<MiningModel> miningModels = new ArrayList<>(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); for(int i = 0, rows = categoricalLabel.size(), columns = (trees.size() / rows); i < rows; i++){ MiningModel miningModel = createMiningModel(FortranMatrixUtil.getRow(trees, rows, columns, i), numIteration, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE)); miningModels.add(miningModel); } return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema); } }
String value = categoricalLabel.getValue(i); Double probability = probabilities.getValue(i);
@Override public Model encodeModel(Schema schema){ int[] shape = getCoefShape(); int numberOfClasses = shape[0]; int numberOfFeatures = shape[1]; boolean hasProbabilityDistribution = hasProbabilityDistribution(); List<? extends Number> coef = getCoef(); List<? extends Number> intercepts = getIntercept(); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); List<? extends Feature> features = schema.getFeatures(); if(numberOfClasses == 1){ ClassifierUtil.checkSize(2, categoricalLabel); return RegressionModelUtil.createBinaryLogisticClassification(features, ValueUtil.asDoubles(CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, 0)), ValueUtil.asDouble(intercepts.get(0)), RegressionModel.NormalizationMethod.LOGIT, hasProbabilityDistribution, schema); } else if(numberOfClasses >= 3){ ClassifierUtil.checkSize(numberOfClasses, categoricalLabel); Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), Collections.emptyList()); List<RegressionModel> regressionModels = new ArrayList<>(); for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){ RegressionModel regressionModel = RegressionModelUtil.createRegression(features, ValueUtil.asDoubles(CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i)), ValueUtil.asDouble(intercepts.get(i)), RegressionModel.NormalizationMethod.LOGIT, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE)); regressionModels.add(regressionModel); } return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SIMPLEMAX, hasProbabilityDistribution, schema); } else { throw new IllegalArgumentException(); } }
String targetCategory = categoricalLabel.getValue(i);
String targetCategory = categoricalLabel.getValue(1);
.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(1) + ")"), OpType.CONTINUOUS, DataType.DOUBLE, loss.createTransformation())); .setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE, loss.createTransformation()));
MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(null)) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.SUM, CMatrixUtil.getRow(treeModels, ntreesPerGroup, ntreeGroups, i))) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("drfValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));
@Override public GeneralRegressionModel encodeModel(Schema schema){ GeneralizedLinearRegressionModel model = getTransformer(); String targetCategory = null; MiningFunction miningFunction = getMiningFunction(); switch(miningFunction){ case CLASSIFICATION: CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); if(categoricalLabel.size() != 2){ throw new IllegalArgumentException(); } targetCategory = categoricalLabel.getValue(1); break; default: break; } List<Feature> features = new ArrayList<>(schema.getFeatures()); List<Double> coefficients = new ArrayList<>(VectorUtil.toList(model.coefficients())); RegressionTableUtil.simplify(this, targetCategory, features, coefficients); GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, miningFunction, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null) .setDistribution(parseFamily(model.getFamily())) .setLinkFunction(parseLinkFunction(model.getLink())) .setLinkParameter(parseLinkParameter(model.getLink())); GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, coefficients, model.intercept(), targetCategory); return generalRegressionModel; }
String targetCategory = categoricalLabel.getValue(i);