RegressionModel regressionModel = new RegressionModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), regressionTables) .setNormalizationMethod(RegressionModel.NormalizationMethod.SOFTMAX) .setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));
@Override public SupportVectorMachineModel encodeModel(Schema schema){ Transformation outlier = new OutlierTransformation(){ @Override public Expression createExpression(FieldRef fieldRef){ return PMMLUtil.createApply("lessOrEqual", fieldRef, PMMLUtil.createConstant(0d)); } }; SupportVectorMachineModel supportVectorMachineModel = super.encodeModel(schema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE, outlier)); return supportVectorMachineModel; } }
static private MiningModel createMiningModel(List<TreeModel> treeModels, Double initF, Schema schema){ ContinuousLabel continuousLabel = (ContinuousLabel)schema.getLabel(); MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(continuousLabel)) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.SUM, treeModels)) .setTargets(ModelUtil.createRescaleTargets(null, initF, continuousLabel)); return miningModel; }
static public MiningModel createModelChain(List<? extends Model> models, Schema schema){ if(models.size() < 1){ throw new IllegalArgumentException(); } Segmentation segmentation = createSegmentation(Segmentation.MultipleModelMethod.MODEL_CHAIN, models); Model lastModel = Iterables.getLast(models); MiningModel miningModel = new MiningModel(lastModel.getMiningFunction(), ModelUtil.createMiningSchema(schema.getLabel())) .setMathContext(ModelUtil.simplifyMathContext(lastModel.getMathContext())) .setSegmentation(segmentation); return miningModel; }
RegressionModel regressionModel = new RegressionModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), regressionTables) .setNormalizationMethod(normalizationMethod) .setMathContext(ModelUtil.simplifyMathContext(mathContext)) .setOutput(hasProbabilityDistribution ? ModelUtil.createProbabilityOutput(mathContext, categoricalLabel) : null);
static public <E extends Estimator> MiningModel encodeBagging(List<E> estimators, List<List<Integer>> estimatorsFeatures, Segmentation.MultipleModelMethod multipleModelMethod, MiningFunction miningFunction, Schema schema){ Schema segmentSchema = schema.toAnonymousSchema(); List<Model> models = new ArrayList<>(); for(int i = 0; i < estimators.size(); i++){ E estimator = estimators.get(i); List<Integer> estimatorFeatures = estimatorsFeatures.get(i); Schema estimatorSchema = segmentSchema.toSubSchema(Ints.toArray(estimatorFeatures)); Model model = estimator.encodeModel(estimatorSchema); models.add(model); } MiningModel miningModel = new MiningModel(miningFunction, ModelUtil.createMiningSchema(schema.getLabel())) .setSegmentation(MiningModelUtil.createSegmentation(multipleModelMethod, models)); return miningModel; }
ContinuousLabel continuousLabel = (ContinuousLabel)label; MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(continuousLabel)) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.AVERAGE, treeModels)); ContinuousLabel continuousLabel = new ContinuousLabel(null, DataType.DOUBLE); MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(continuousLabel)) .setSegmentation(MiningModelUtil.createSegmentation(MultipleModelMethod.AVERAGE, treeModels)) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("drfValue"), OpType.CONTINUOUS, DataType.DOUBLE)); MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(null)) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.SUM, CMatrixUtil.getRow(treeModels, ntreesPerGroup, ntreeGroups, i))) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("drfValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));
ContinuousLabel continuousLabel = (ContinuousLabel)label; MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(continuousLabel)) .setSegmentation(MiningModelUtil.createSegmentation(MultipleModelMethod.SUM, treeModels)) .setTargets(ModelUtil.createRescaleTargets(null, (double)model._init_f, continuousLabel)); ContinuousLabel continuousLabel = new ContinuousLabel(null, DataType.DOUBLE); MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(continuousLabel)) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.SUM, treeModels)) .setTargets(ModelUtil.createRescaleTargets(null, (double)model._init_f, continuousLabel)) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue"), OpType.CONTINUOUS, DataType.DOUBLE)); MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(null)) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.SUM, CMatrixUtil.getRow(treeModels, ntreesPerGroup, ntreeGroups, i))) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));
.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE)); regressionTables.add(activeRegressionTable); RegressionModel secondRegressionModel = new RegressionModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), regressionTables) .setNormalizationMethod(RegressionModel.NormalizationMethod.SOFTMAX) .setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel)); RegressionModel regressionModel = new RegressionModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), regressionTables) .setNormalizationMethod(RegressionModel.NormalizationMethod.SOFTMAX) .setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));
output = ModelUtil.ensureOutput(finalModel); } else output = ModelUtil.ensureOutput(model); predictField = ModelUtil.createPredictedField(name, label.getDataType(), OpType.CONTINUOUS) .setFinalResult(false); } else predictField = ModelUtil.createPredictedField(name, label.getDataType(), OpType.CATEGORICAL) .setFinalResult(false); } else List<OutputField> predictProbaFields = ModelUtil.createProbabilityFields(DataType.DOUBLE, categoricalLabel.getValues()); OutputField nodeIdField = ModelUtil.createEntityIdField(FieldName.create("nodeId")) .setDataType(DataType.INTEGER); VerificationField verificationField = ModelUtil.createVerificationField(FieldName.create(activeFields.get(i))); VerificationField verificationField = ModelUtil.createVerificationField(FieldName.create(probabilityFields.get(i))) .setPrecision(precision.doubleValue()) .setZeroThreshold(zeroThreshold.doubleValue()); VerificationField verificationField = ModelUtil.createVerificationField(FieldName.create(targetFields.get(i))); model.setModelVerification(ModelUtil.createModelVerification(data));
ContinuousLabel continuousLabel = (ContinuousLabel)label; OutputField predictedField = ModelUtil.createPredictedField(FieldName.create("stack(" + i + ")"), DataType.DOUBLE, OpType.CONTINUOUS) .setFinalResult(false); OutputField probabilityField = ModelUtil.createProbabilityField(FieldName.create("stack(" + i +", " + value + ")"), DataType.DOUBLE, value) .setFinalResult(false); Output segmentOutput = ModelUtil.ensureOutput(segmentModel);
output = ModelUtil.createPredictedOutput(name, OpType.CATEGORICAL, label.getDataType()); OutputField outputField = ModelUtil.createProbabilityField(FieldName.create("probability(" + name.getValue() + ", " + category + ")"), DataType.DOUBLE, category) .setFinalResult(false); output = ModelUtil.createPredictedOutput(name, OpType.CONTINUOUS, label.getDataType());
.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE, outlier)); RDoubleVector yScaledScale = (RDoubleVector)yScale.getValue("scaled:scale"); supportVectorMachineModel.setTargets(ModelUtil.createRescaleTargets(-1d * yScaledScale.asScalar(), yScaledCenter.asScalar(), (ContinuousLabel)schema.getLabel()));
OutputField pmmlPredictedField = ModelUtil.createPredictedField(FieldName.create("pmml(" + predictionCol + ")"), categoricalLabel.getDataType(), OpType.CATEGORICAL); String value = categoricalLabel.getValue(i); OutputField probabilityField = ModelUtil.createProbabilityField(FieldName.create(probabilityCol + "(" + value + ")"), DataType.DOUBLE, value);
Output output = ModelUtil.ensureOutput(model); VerificationField verificationField = ModelUtil.createVerificationField(FieldName.create(inputColumn)); Feature feature = encoder.getOnlyFeature(predictionColumn); VerificationField verificationField = ModelUtil.createVerificationField(feature.getName()) .setPrecision(precision) .setZeroThreshold(zeroThreshold); Feature feature = features.get(i); VerificationField verificationField = ModelUtil.createVerificationField(feature.getName()) .setPrecision(precision) .setZeroThreshold(zeroThreshold); model.setModelVerification(ModelUtil.createModelVerification(data));
@Override public MiningModel encodeModel(Schema schema){ MiningModel miningModel = ForestUtil.encodeBaseForest(this, Segmentation.MultipleModelMethod.AVERAGE, MiningFunction.CLASSIFICATION, schema) .setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel())); return miningModel; }
@Override public TreeModel encodeModel(Schema schema){ S4Object binaryTree = getObject(); RGenericVector tree = (RGenericVector)binaryTree.getAttributeValue("tree"); Output output; switch(this.miningFunction){ case REGRESSION: output = new Output(); break; case CLASSIFICATION: CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel(); output = ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel); break; default: throw new IllegalArgumentException(); } output.addOutputFields(ModelUtil.createEntityIdField(FieldName.create("nodeId"))); TreeModel treeModel = encodeTreeModel(tree, schema) .setOutput(output); return treeModel; }
Output output = ModelUtil.ensureOutput(model); OutputField nodeIdField = ModelUtil.createEntityIdField(FieldName.create("nodeId")) .setDataType(DataType.INTEGER);
@Override public List<OutputField> registerOutputFields(Label label, SparkMLEncoder encoder){ T model = getTransformer(); String predictionCol = model.getPredictionCol(); OutputField predictedField = ModelUtil.createPredictedField(FieldName.create(predictionCol), DataType.STRING, OpType.CATEGORICAL); Feature feature = new StringFeature(encoder, predictedField); encoder.putOnlyFeature(predictionCol, feature); return Collections.singletonList(predictedField); } }
.addOutputFields(ModelUtil.createProbabilityField(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), DataType.DOUBLE, categoricalLabel.getValue(i)));