/** * @param dictionary {@link DataDictionary} from model * @return names of features in order */ public static List<String> getFeatureNames(DataDictionary dictionary) { List<DataField> dataFields = dictionary.getDataFields(); Preconditions.checkArgument(dataFields != null && !dataFields.isEmpty(), "No fields in DataDictionary"); return dataFields.stream().map(field -> field.getName().getValue()).collect(Collectors.toList()); }
public static CategoricalValueEncodings buildCategoricalValueEncodings( DataDictionary dictionary) { Map<Integer,Collection<String>> indexToValues = new HashMap<>(); List<DataField> dataFields = dictionary.getDataFields(); for (int featureIndex = 0; featureIndex < dataFields.size(); featureIndex++) { TypeDefinitionField field = dataFields.get(featureIndex); Collection<Value> values = field.getValues(); if (values != null && !values.isEmpty()) { Collection<String> categoricalValues = values.stream().map(Value::getValue).collect(Collectors.toList()); indexToValues.put(featureIndex, categoricalValues); } } return new CategoricalValueEncodings(indexToValues); }
@Test public void testBuildDataDictionary() { Map<Integer,Collection<String>> distinctValues = new HashMap<>(); distinctValues.put(1, Arrays.asList("one", "two", "three", "four", "five")); CategoricalValueEncodings categoricalValueEncodings = new CategoricalValueEncodings(distinctValues); DataDictionary dictionary = AppPMMLUtils.buildDataDictionary(buildTestSchema(), categoricalValueEncodings); assertEquals(4, dictionary.getNumberOfFields().intValue()); checkDataField(dictionary.getDataFields().get(0), "foo", null); checkDataField(dictionary.getDataFields().get(1), "bar", true); checkDataField(dictionary.getDataFields().get(2), "baz", null); checkDataField(dictionary.getDataFields().get(3), "bing", false); List<Value> dfValues = dictionary.getDataFields().get(1).getValues(); assertEquals(5, dfValues.size()); String[] categoricalValues = { "one", "two", "three", "four", "five" }; for (int i = 0; i < categoricalValues.length; i++) { assertEquals(categoricalValues[i], dfValues.get(i).getValue()); } }
protected static void checkDataDictionary(InputSchema schema, DataDictionary dataDictionary) { assertNotNull(dataDictionary); assertEquals("Wrong number of features", schema.getNumFeatures(), dataDictionary.getNumberOfFields().intValue()); List<DataField> dataFields = dataDictionary.getDataFields(); assertEquals(schema.getNumFeatures(), dataFields.size()); for (DataField dataField : dataFields) { String featureName = dataField.getName().getValue(); if (schema.isNumeric(featureName)) { assertEquals("Wrong op type for feature " + featureName, OpType.CONTINUOUS, dataField.getOpType()); assertEquals("Wrong data type for feature " + featureName, DataType.DOUBLE, dataField.getDataType()); } else if (schema.isCategorical(featureName)) { assertEquals("Wrong op type for feature " + featureName, OpType.CATEGORICAL, dataField.getOpType()); assertEquals("Wrong data type for feature " + featureName, DataType.STRING, dataField.getDataType()); } else { assertNull(dataField.getOpType()); assertNull(dataField.getDataType()); } } }
@Override public Collection<?> getCollection(){ return dataDictionary.getDataFields(); } });
@Override public Map<FieldName, DataField> load(DataDictionary dataDictionary){ return IndexableUtil.buildMap(dataDictionary.getDataFields()); } });
/** * @param dictionary {@link DataDictionary} from model * @return names of features in order */ public static List<String> getFeatureNames(DataDictionary dictionary) { List<DataField> dataFields = dictionary.getDataFields(); Preconditions.checkArgument(dataFields != null && !dataFields.isEmpty(), "No fields in DataDictionary"); return dataFields.stream().map(field -> field.getName().getValue()).collect(Collectors.toList()); }
public static Map<FieldName, Integer> getFieldNumMap(DataDictionary dataDictionary) { Map<FieldName, Integer> fieldNumMap = new HashMap<FieldName, Integer>(); int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); fieldNumMap.put(dataField.getName(), i); } return fieldNumMap; }
public static Map<FieldName, Integer> getFieldNumMap(DataDictionary dataDictionary) { Map<FieldName, Integer> fieldNumMap = new HashMap<FieldName, Integer>(); int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); fieldNumMap.put(dataField.getName(), i); } return fieldNumMap; }
@Override public VisitorAction visit(DataDictionary dataDictionary){ if(dataDictionary.hasDataFields()){ List<DataField> dataFields = dataDictionary.getDataFields(); for(ListIterator<DataField> it = dataFields.listIterator(); it.hasNext(); ){ it.set(new RichDataField(it.next())); } } return super.visit(dataDictionary); } }
@Override public VisitorAction visit(DataDictionary dataDictionary){ if(dataDictionary.hasDataFields()){ this.dataFields.addAll(dataDictionary.getDataFields()); } return super.visit(dataDictionary); }
@Override public VisitorAction visit(DataDictionary dataDictionary){ if(dataDictionary.hasDataFields()){ List<DataField> dataFields = dataDictionary.getDataFields(); for(ListIterator<DataField> it = dataFields.listIterator(); it.hasNext(); ){ it.set(new RichDataField(it.next())); } } return super.visit(dataDictionary); } }
private void processDataDictionary(DataDictionary dataDictionary){ if(dataDictionary.hasDataFields()){ List<DataField> dataFields = dataDictionary.getDataFields(); Set<DataField> usedDataFields = getUsedDataFields(); dataFields.retainAll(usedDataFields); } }
@Override public VisitorAction visit(DataDictionary dataDictionary){ if(dataDictionary.hasDataFields()){ this.dataFields.addAll(dataDictionary.getDataFields()); } return super.visit(dataDictionary); }
@Override public VisitorAction visit(DataDictionary dataDictionary){ if(dataDictionary.hasDataFields()){ List<DataField> dataFields = dataDictionary.getDataFields(); for(ListIterator<DataField> it = dataFields.listIterator(); it.hasNext(); ){ it.set(new RichDataField(it.next())); } } return super.visit(dataDictionary); } }
public static Integer getTargetFieldNumByName(DataDictionary dataDictionary, String name) { int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); if(dataField.getName().getValue().equals(name)) { return i; } } throw new RuntimeException("Target Field Not Found: " + name); }
private void processDataDictionary(DataDictionary dataDictionary){ if(dataDictionary.hasDataFields()){ List<DataField> dataFields = dataDictionary.getDataFields(); Set<DataField> usedDataFields = getUsedDataFields(); dataFields.retainAll(usedDataFields); } }
@Test public void cleanChained() throws Exception { PMML pmml = ResourceUtil.unmarshal(ChainedSegmentationTest.class); DataDictionary dataDictionary = pmml.getDataDictionary(); checkFields(FieldNameUtil.create("y", "x1", "x2", "x3", "x4"), dataDictionary.getDataFields()); DataDictionaryCleaner cleaner = new DataDictionaryCleaner(); cleaner.applyTo(pmml); checkFields(FieldNameUtil.create("y", "x1", "x2", "x3"), dataDictionary.getDataFields()); List<Model> models = pmml.getModels(); models.clear(); cleaner.applyTo(pmml); checkFields(Collections.emptySet(), dataDictionary.getDataFields()); }
@Override public VisitorAction visit(PMML pmml){ DataDictionary dataDictionary = pmml.getDataDictionary(); if(dataDictionary != null && dataDictionary.hasDataFields()){ declare(pmml, dataDictionary.getDataFields()); } TransformationDictionary transformationDictionary = pmml.getTransformationDictionary(); if(transformationDictionary != null && transformationDictionary.hasDerivedFields()){ declare(pmml, transformationDictionary.getDerivedFields()); } return super.visit(pmml); }
@Override public VisitorAction visit(PMML pmml){ DataDictionary dataDictionary = pmml.getDataDictionary(); if(dataDictionary != null && dataDictionary.hasDataFields()){ declare(pmml, dataDictionary.getDataFields()); } TransformationDictionary transformationDictionary = pmml.getTransformationDictionary(); if(transformationDictionary != null && transformationDictionary.hasDerivedFields()){ declare(pmml, transformationDictionary.getDerivedFields()); } return super.visit(pmml); }