/** * @param dictionary {@link DataDictionary} from model * @return names of features in order */ public static List<String> getFeatureNames(DataDictionary dictionary) { List<DataField> dataFields = dictionary.getDataFields(); Preconditions.checkArgument(dataFields != null && !dataFields.isEmpty(), "No fields in DataDictionary"); return dataFields.stream().map(field -> field.getName().getValue()).collect(Collectors.toList()); }
private static void checkDataField(DataField field, String name, Boolean categorical) { assertEquals(name, field.getName().getValue()); if (categorical == null) { assertNull(field.getOpType()); assertNull(field.getDataType()); } else if (categorical) { assertEquals(OpType.CATEGORICAL, field.getOpType()); assertEquals(DataType.STRING, field.getDataType()); } else { assertEquals(OpType.CONTINUOUS, field.getOpType()); assertEquals(DataType.DOUBLE, field.getDataType()); } }
protected static void checkDataDictionary(InputSchema schema, DataDictionary dataDictionary) { assertNotNull(dataDictionary); assertEquals("Wrong number of features", schema.getNumFeatures(), dataDictionary.getNumberOfFields().intValue()); List<DataField> dataFields = dataDictionary.getDataFields(); assertEquals(schema.getNumFeatures(), dataFields.size()); for (DataField dataField : dataFields) { String featureName = dataField.getName().getValue(); if (schema.isNumeric(featureName)) { assertEquals("Wrong op type for feature " + featureName, OpType.CONTINUOUS, dataField.getOpType()); assertEquals("Wrong data type for feature " + featureName, DataType.DOUBLE, dataField.getDataType()); } else if (schema.isCategorical(featureName)) { assertEquals("Wrong op type for feature " + featureName, OpType.CATEGORICAL, dataField.getOpType()); assertEquals("Wrong data type for feature " + featureName, DataType.STRING, dataField.getDataType()); } else { assertNull(dataField.getOpType()); assertNull(dataField.getDataType()); } } }
/** * @param dictionary {@link DataDictionary} from model * @return names of features in order */ public static List<String> getFeatureNames(DataDictionary dictionary) { List<DataField> dataFields = dictionary.getDataFields(); Preconditions.checkArgument(dataFields != null && !dataFields.isEmpty(), "No fields in DataDictionary"); return dataFields.stream().map(field -> field.getName().getValue()).collect(Collectors.toList()); }
public static Map<FieldName, Integer> getFieldNumMap(DataDictionary dataDictionary) { Map<FieldName, Integer> fieldNumMap = new HashMap<FieldName, Integer>(); int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); fieldNumMap.put(dataField.getName(), i); } return fieldNumMap; }
public static Map<FieldName, Integer> getFieldNumMap(DataDictionary dataDictionary) { Map<FieldName, Integer> fieldNumMap = new HashMap<FieldName, Integer>(); int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); fieldNumMap.put(dataField.getName(), i); } return fieldNumMap; }
public static Integer getTargetFieldNumByName(DataDictionary dataDictionary, String name) { int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); if(dataField.getName().getValue().equals(name)) { return i; } } throw new RuntimeException("Target Field Not Found: " + name); }
public static Integer getTargetFieldNumByName(DataDictionary dataDictionary, String name) { int size = dataDictionary.getNumberOfFields(); for(int i = 0; i < size; i++) { DataField dataField = dataDictionary.getDataFields().get(i); if(dataField.getName().getValue().equals(name)) { return i; } } throw new RuntimeException("Target Field Not Found: " + name); }
/** * get the header names from the PMML data dictionary * * @param pmml * the pmml model * @return headers */ public static String[] getDataDicHeaders(final PMML pmml) { DataDictionary dictionary = pmml.getDataDictionary(); List<DataField> fields = dictionary.getDataFields(); int len = fields.size(); String[] headers = new String[len]; for(int i = 0; i < len; i++) { headers[i] = fields.get(i).getName().getValue(); } return headers; }
/** * Based on the usage type, get the column indexes for corresponding fields * in the input data set * * @param pmml * the pmml model * @param type * the type * @return dic fields */ public static int[] getDicFieldIDViaType(PMML pmml, FieldUsageType type) { List<Integer> activeFields = new ArrayList<Integer>(); HashMap<String, Integer> dMap = new HashMap<String, Integer>(); int index = 0; for(DataField dField: pmml.getDataDictionary().getDataFields()) dMap.put(dField.getName().getValue(), index++); for(MiningField mField: pmml.getModels().get(0).getMiningSchema().getMiningFields()) { if(mField.getUsageType() == type) activeFields.add(dMap.get(mField.getName().getValue())); } return Ints.toArray(activeFields); }
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<Feature> result = new ArrayList<>(); OpType opType = getOpType(); DataType dataType = getDataType(); for(Feature feature : features){ WildcardFeature wildcardFeature = (WildcardFeature)feature; DataField dataField = (DataField)encoder.getField(wildcardFeature.getName()); dataField .setOpType(opType) .setDataType(dataType); feature = new ObjectFeature(encoder, dataField.getName(), dataField.getDataType()); result.add(feature); } return super.encodeFeatures(result, encoder); } }
Target target = getTarget(dataField.getName());
static public void setLabel(Formula formula, RExp terms, RExp levels, RExpEncoder encoder){ RIntegerVector response = (RIntegerVector)terms.getAttributeValue("response"); int responseIndex = response.asScalar(); if(responseIndex != 0){ DataField dataField = (DataField)formula.getField(responseIndex - 1); FieldName name = dataField.getName(); if(encoder.getDataField(name) == null){ encoder.addDataField(dataField); } // End if if(levels instanceof RStringVector){ RStringVector stringLevels = (RStringVector)levels; dataField = (DataField)encoder.toCategorical(name, stringLevels.getValues()); } else if(levels instanceof RIntegerVector){ RIntegerVector factorLevels = (RIntegerVector)levels; if(!factorLevels.isFactor()){ throw new IllegalArgumentException(); } dataField = (DataField)encoder.toCategorical(name, factorLevels.getLevelValues()); } else if(levels != null){ throw new IllegalArgumentException(); } encoder.setLabel(dataField); } else { throw new IllegalArgumentException(); } }
encoder.addDecorator(dataField.getName(), invalidValueDecorator); } else