@Override public Schema toMojoModelSchema(Schema schema){ Label label = schema.getLabel(); List<? extends Feature> features = schema.getFeatures(); Function<Feature, Stream<Feature>> function = new Function<Feature, Stream<Feature>>(){ @Override public Stream<Feature> apply(Feature feature){ ModelEncoder encoder = (ModelEncoder)feature.getEncoder(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; List<String> values = new ArrayList<>(categoricalFeature.getValues()); values.add("missing(NA)"); ImputerUtil.encodeFeature(categoricalFeature, "missing(NA)", MissingValueTreatmentMethod.AS_VALUE); return values.stream() .map(value -> new BinaryFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), value)); } return Stream.of(feature); } }; features = features.stream() .flatMap(function) .collect(Collectors.toList()); return new Schema(label, features); }
@Override public Schema apply(Schema schema){ Label label = schema.getLabel(); if(label instanceof ContinuousLabel){ return schema.toAnonymousSchema(); } else // XXX: Ideally, the categorical target field should also be anonymized if(label instanceof CategoricalLabel){ return schema; } else { throw new IllegalArgumentException(); } } };
private List<? extends Feature> features = schema.getFeatures(); return schema.toTransformedSchema(function);
static public <E extends Estimator> MiningModel encodeBagging(List<E> estimators, List<List<Integer>> estimatorsFeatures, Segmentation.MultipleModelMethod multipleModelMethod, MiningFunction miningFunction, Schema schema){ Schema segmentSchema = schema.toAnonymousSchema(); List<Model> models = new ArrayList<>(); for(int i = 0; i < estimators.size(); i++){ E estimator = estimators.get(i); List<Integer> estimatorFeatures = estimatorsFeatures.get(i); Schema estimatorSchema = segmentSchema.toSubSchema(Ints.toArray(estimatorFeatures)); Model model = estimator.encodeModel(estimatorSchema); models.add(model); } MiningModel miningModel = new MiningModel(miningFunction, ModelUtil.createMiningSchema(schema.getLabel())) .setSegmentation(MiningModelUtil.createSegmentation(multipleModelMethod, models)); return miningModel; }
@Override public Model encodeModel(RDoubleVector a0, RExp beta, int column, Schema schema){ Double intercept = a0.getValue(column); List<Double> coefficients = getCoefficients((S4Object)beta, column); GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERAL_LINEAR, MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null) .setDistribution(GeneralRegressionModel.Distribution.POISSON); GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, schema.getFeatures(), coefficients, intercept, null); return generalRegressionModel; } }
@Override public MiningModel encodeModel(Schema schema){ MiningModel miningModel = ForestUtil.encodeBaseForest(this, Segmentation.MultipleModelMethod.AVERAGE, MiningFunction.CLASSIFICATION, schema) .setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel())); return miningModel; }
private MiningModel encodeBinaryClassification(List<TreeModel> treeModels, Double initF, double coefficient, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.DOUBLE), schema.getFeatures()); MiningModel miningModel = createMiningModel(treeModels, initF, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue"), OpType.CONTINUOUS, DataType.DOUBLE)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, -coefficient, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema); }
@Override public Model encodeModel(Schema schema){ Regressor regressor = getRegressor(); FunctionTransformer transformer = getTransformer(); UFunc func = transformer.getFunc(); UFunc inverseFunc = transformer.getInverseFunc(); if(inverseFunc == null){ return regressor.encodeModel(schema); } Label label = schema.getLabel(); List<? extends Feature> features = schema.getFeatures(); Transformation transformation = new AbstractTransformation(){ @Override public FieldName getName(FieldName name){ return FieldName.create("inverseFunc(" + name + ")"); } @Override public Expression createExpression(FieldRef fieldRef){ return FunctionTransformer.encodeUFunc(inverseFunc, fieldRef); } }; FieldName name = label.getName(); Schema segmentSchema = schema.toAnonymousSchema(); Model model = regressor.encodeModel(segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("func(" + name + ")"), OpType.CONTINUOUS, DataType.DOUBLE, transformation)); return MiningModelUtil.createRegression(model, NormalizationMethod.NONE, schema); }
Label label = schema.getLabel(); List<Feature> features = new ArrayList<>(); Schema segmentSchema = schema.toAnonymousSchema(); Converter<?> converter = converterFactory.newConverter(metaLearner); Schema metaLearnerSchema = converter.toMojoModelSchema(new Schema(label, features));
int att = splitAtt.get(index); ContinuousFeature feature = (ContinuousFeature)schema.getFeature(att - 1);
static public <E extends Estimator & HasEstimatorEnsemble<T>, T extends Estimator & HasTree> List<TreeModel> encodeTreeModelSegmentation(E estimator, PredicateManager predicateManager, ScoreDistributionManager scoreDistributionManager, MiningFunction miningFunction, Schema schema){ List<? extends T> estimators = estimator.getEstimators(); Schema segmentSchema = schema.toAnonymousSchema(); Function<T, TreeModel> function = new Function<T, TreeModel>(){ @Override public TreeModel apply(T estimator){ Schema treeModelSchema = toTreeModelSchema(estimator.getDataType(), segmentSchema); return TreeModelUtil.encodeTreeModel(estimator, predicateManager, scoreDistributionManager, miningFunction, treeModelSchema); } }; return estimators.stream() .map(function) .collect(Collectors.toList()); }
public Schema createSchema(){ Schema schema = new Schema(getLabel(), getFeatures()); return schema; }
Feature feature = schema.getFeature(featureIndex); CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();
@Override public Model encodeModel(RDoubleVector a0, RExp beta, int column, Schema schema){ Double intercept = a0.getValue(column); List<Double> coefficients = getCoefficients((S4Object)beta, column); return RegressionModelUtil.createBinaryLogisticClassification(schema.getFeatures(), coefficients, intercept, RegressionModel.NormalizationMethod.LOGIT, true, schema); } }
static public Schema toTreeModelSchema(DataType dataType, Schema schema){ Function<Feature, Feature> function = new Function<Feature, Feature>(){ @Override public Feature apply(Feature feature){ if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; return binaryFeature; } else { ContinuousFeature continuousFeature = feature.toContinuousFeature(dataType); return continuousFeature; } } }; return schema.toTransformedSchema(function); }
@Override public Model encodeModel(RDoubleVector a0, RExp beta, int column, Schema schema){ Double intercept = a0.getValue(column); List<Double> coefficients = getCoefficients((S4Object)beta, column); GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERAL_LINEAR, MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null) .setDistribution(GeneralRegressionModel.Distribution.NORMAL); GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, schema.getFeatures(), coefficients, intercept, null); return generalRegressionModel; } }
@Override public TreeModel encodeModel(Schema schema){ TreeModel treeModel = TreeModelUtil.encodeTreeModel(this, MiningFunction.CLASSIFICATION, schema) .setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel())); return TreeModelUtil.transform(this, treeModel); }
@Override public MiningModel encodeMiningModel(List<RegTree> regTrees, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = new Schema(new ContinuousLabel(null, DataType.FLOAT), schema.getFeatures()); MiningModel miningModel = createMiningModel(regTrees, base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema); } }
Predicate rightPredicate; Feature feature = schema.getFeature(var - 1);
static public <C extends ModelConverter<? extends M> & HasTreeOptions, M extends Model<M> & TreeEnsembleModel<T>, T extends Model<T> & DecisionTreeModel> List<TreeModel> encodeDecisionTreeEnsemble(C converter, PredicateManager predicateManager, Schema schema){ M model = converter.getTransformer(); Schema segmentSchema = schema.toAnonymousSchema(); List<TreeModel> treeModels = new ArrayList<>(); T[] trees = model.trees(); for(T tree : trees){ TreeModel treeModel = encodeDecisionTree(converter, tree, predicateManager, segmentSchema); treeModels.add(treeModel); } return treeModels; }