@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ IndexToString transformer = getTransformer(); DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels())); return Collections.singletonList(new CategoricalFeature(encoder, dataField)); } }
public void addFeature(Field<?> field){ Feature feature; OpType opType = field.getOpType(); switch(opType){ case CATEGORICAL: feature = new CategoricalFeature(this, (DataField)field); break; case CONTINUOUS: feature = new ContinuousFeature(this, field); break; default: throw new IllegalArgumentException(); } addFeature(feature); }
public void addFeature(DataField dataField){ OpType opType = dataField.getOpType(); switch(opType){ case CONTINUOUS: addFeature(new ContinuousFeature(this, dataField)); break; case CATEGORICAL: addFeature(new CategoricalFeature(this, dataField)); break; default: throw new IllegalArgumentException(); } }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<?> classes = getClasses(); ClassDictUtil.checkSize(1, features); Feature feature = features.get(0); List<String> inputCategories = new ArrayList<>(); List<String> outputCategories = new ArrayList<>(); for(int i = 0; i < classes.size(); i++){ inputCategories.add(ValueUtil.formatValue(classes.get(i))); outputCategories.add(ValueUtil.formatValue(i)); } Supplier<MapValues> mapValuesSupplier = () -> { encoder.toCategorical(feature.getName(), inputCategories); return PMMLUtil.createMapValues(feature.getName(), inputCategories, outputCategories); }; DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName("label_encoder", feature), OpType.CATEGORICAL, DataType.INTEGER, mapValuesSupplier); Feature encodedFeature = new CategoricalFeature(encoder, derivedField, outputCategories); Feature result = new CategoricalFeature(encoder, feature, inputCategories){ @Override public ContinuousFeature toContinuousFeature(){ return encodedFeature.toContinuousFeature(); } }; return Collections.singletonList(result); }
result.add(new CategoricalFeature(encoder, derivedField, values)); } else
public void addField(Field<?> field, List<String> categoryNames, List<String> categoryValues){ RExpEncoder encoder = getEncoder(); if(categoryNames.size() != categoryValues.size()){ throw new IllegalArgumentException(); } CategoricalFeature categoricalFeature; if((DataType.BOOLEAN).equals(field.getDataType()) && (BooleanFeature.VALUES).equals(categoryValues)){ categoricalFeature = new BooleanFeature(encoder, field); } else { categoricalFeature = new CategoricalFeature(encoder, field, categoryValues); } putFeature(field.getName(), categoricalFeature); for(int i = 0; i < categoryNames.size(); i++){ String categoryName = categoryNames.get(i); String categoryValue = categoryValues.get(i); BinaryFeature binaryFeature = new BinaryFeature(encoder, field, categoryValue); putFeature(FieldName.create((field.getName()).getValue() + categoryName), binaryFeature); } this.fields.add(field); }
result.add(new CategoricalFeature(encoder, derivedField, labelCategories));
feature = new CategoricalFeature(encoder, dataField); } else
return Collections.singletonList(new CategoricalFeature(encoder, derivedField, categories));
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ Bucketizer transformer = getTransformer(); Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); ContinuousFeature continuousFeature = feature.toContinuousFeature(); Discretize discretize = new Discretize(continuousFeature.getName()); List<String> categories = new ArrayList<>(); double[] splits = transformer.getSplits(); for(int i = 0; i < (splits.length - 1); i++){ String category = String.valueOf(i); categories.add(category); Interval interval = new Interval((i < (splits.length - 2)) ? Interval.Closure.CLOSED_OPEN : Interval.Closure.CLOSED_CLOSED) .setLeftMargin(formatMargin(splits[i])) .setRightMargin(formatMargin(splits[i + 1])); DiscretizeBin discretizeBin = new DiscretizeBin(category, interval); discretize.addDiscretizeBins(discretizeBin); } DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.INTEGER, discretize); return Collections.singletonList(new CategoricalFeature(encoder, derivedField, categories)); }
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ Binarizer transformer = getTransformer(); Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); ContinuousFeature continuousFeature = feature.toContinuousFeature(); Apply apply = new Apply("if") .addExpressions(PMMLUtil.createApply("lessOrEqual", continuousFeature.ref(), PMMLUtil.createConstant(transformer.getThreshold()))) .addExpressions(PMMLUtil.createConstant(0d), PMMLUtil.createConstant(1d)); DerivedField derivedField = encoder.createDerivedField(formatName(transformer), OpType.CATEGORICAL, DataType.DOUBLE, apply); return Collections.singletonList(new CategoricalFeature(encoder, derivedField, Arrays.asList("0", "1"))); } }
return Collections.singletonList(new CategoricalFeature(encoder, field, categories));
encoder.putOnlyFeature(labelCol, new CategoricalFeature(encoder, field, categories));