@Override public List<OutputField> registerOutputFields(Label label, SparkMLEncoder encoder){ T model = getTransformer(); String predictionCol = model.getPredictionCol(); OutputField predictedField = ModelUtil.createPredictedField(FieldName.create(predictionCol), label.getDataType(), OpType.CONTINUOUS); encoder.putOnlyFeature(predictionCol, new ContinuousFeature(encoder, predictedField)); return Collections.singletonList(predictedField); } }
@Override public Schema createSchema(){ RGenericVector recipe = getObject(); Label label = getLabel(); List<? extends Feature> features = getFeatures(); RGenericVector steps = (RGenericVector)recipe.getValue("steps"); List<FieldName> outcomeNames = this.termRoles.entrySet().stream() .filter(entry -> (Role.OUTCOME).equals(entry.getValue())) .map(entry -> entry.getKey()) .collect(Collectors.toList()); if(outcomeNames.size() == 1){ FieldName outcomeName = outcomeNames.get(0); renameDataField(label.getName(), outcomeName); label = label.toRenamedLabel(outcomeName); } else if(outcomeNames.size() >= 2){ throw new IllegalArgumentException(); } // End if if(steps != null){ throw new IllegalArgumentException(); } return new Schema(label, features); }
@Override public void encodeSchema(RExpEncoder encoder){ RGenericVector glm = getObject(); RGenericVector family = (RGenericVector)glm.getValue("family"); RGenericVector model = (RGenericVector)glm.getValue("model"); RStringVector familyFamily = (RStringVector)family.getValue("family"); super.encodeSchema(encoder); MiningFunction miningFunction = getMiningFunction(familyFamily.asScalar()); switch(miningFunction){ case CLASSIFICATION: Label label = encoder.getLabel(); RIntegerVector variable = (RIntegerVector)model.getValue((label.getName()).getValue()); DataField dataField = (DataField)encoder.toCategorical(label.getName(), RExpUtil.getFactorLevels(variable)); encoder.setLabel(dataField); break; default: break; } }
FieldName name = FieldName.create("predict(" + (label.getName()).getValue() + ")"); predictField = ModelUtil.createPredictedField(name, label.getDataType(), OpType.CONTINUOUS) .setFinalResult(false); } else predictField = ModelUtil.createPredictedField(name, label.getDataType(), OpType.CATEGORICAL) .setFinalResult(false); } else VerificationField verificationField = ModelUtil.createVerificationField(FieldName.create(targetFields.get(i))); DataType dataType = label.getDataType(); switch(dataType){ case DOUBLE:
@Override public void encodeSchema(RExpEncoder encoder){ RGenericVector lrm = getObject(); RIntegerVector freq = (RIntegerVector)lrm.getValue("freq"); RStringVector freqNames = freq.dimnames(0); super.encodeSchema(encoder); Label label = encoder.getLabel(); DataField dataField = (DataField)encoder.toCategorical(label.getName(), freqNames.getValues()); encoder.setLabel(dataField); }
InstanceField instanceField = new InstanceField(label.getName()) .setColumn("data:y");
output = ModelUtil.createPredictedOutput(name, OpType.CATEGORICAL, label.getDataType()); result.add(new CategoricalOutputFeature(encoder, output, name, label.getDataType(), categories)); output = ModelUtil.createPredictedOutput(name, OpType.CONTINUOUS, label.getDataType()); result.add(new ContinuousOutputFeature(encoder, output, name, label.getDataType()));
Label label = schema.getLabel(); FieldName name = label.getName();
@Override public Model encodeModel(Schema schema){ Regressor regressor = getRegressor(); FunctionTransformer transformer = getTransformer(); UFunc func = transformer.getFunc(); UFunc inverseFunc = transformer.getInverseFunc(); if(inverseFunc == null){ return regressor.encodeModel(schema); } Label label = schema.getLabel(); List<? extends Feature> features = schema.getFeatures(); Transformation transformation = new AbstractTransformation(){ @Override public FieldName getName(FieldName name){ return FieldName.create("inverseFunc(" + name + ")"); } @Override public Expression createExpression(FieldRef fieldRef){ return FunctionTransformer.encodeUFunc(inverseFunc, fieldRef); } }; FieldName name = label.getName(); Schema segmentSchema = schema.toAnonymousSchema(); Model model = regressor.encodeModel(segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("func(" + name + ")"), OpType.CONTINUOUS, DataType.DOUBLE, transformation)); return MiningModelUtil.createRegression(model, NormalizationMethod.NONE, schema); }