@Test public void testNominalAttribute() { NominalAttribute attr = NominalAttribute.defaultAttr() .withName("size").withIndex(1).withValues("small", "medium", "large"); Assert.assertEquals(attr.withoutIndex(), Attribute.fromStructField(attr.toStructField())); }
@Test public void testAttributeGroup() { Attribute[] attrs = new Attribute[]{ NumericAttribute.defaultAttr(), NominalAttribute.defaultAttr(), BinaryAttribute.defaultAttr().withIndex(0), NumericAttribute.defaultAttr().withName("age").withSparsity(0.8), NominalAttribute.defaultAttr().withName("size").withValues("small", "medium", "large"), BinaryAttribute.defaultAttr().withName("clicked").withValues("no", "yes"), NumericAttribute.defaultAttr(), NumericAttribute.defaultAttr() }; AttributeGroup group = new AttributeGroup("user", attrs); Assert.assertEquals(8, group.size()); Assert.assertEquals("user", group.name()); Assert.assertEquals(NumericAttribute.defaultAttr().withIndex(0), group.getAttr(0)); Assert.assertEquals(3, group.indexOf("age")); Assert.assertFalse(group.hasAttr("abc")); Assert.assertEquals(group, AttributeGroup.fromStructField(group.toStructField())); } }
@Test public void testAttributeType() { AttributeType numericType = AttributeType.Numeric(); AttributeType nominalType = AttributeType.Nominal(); AttributeType binaryType = AttributeType.Binary(); Assert.assertEquals(numericType, NumericAttribute.defaultAttr().attrType()); Assert.assertEquals(nominalType, NominalAttribute.defaultAttr().attrType()); Assert.assertEquals(binaryType, BinaryAttribute.defaultAttr().attrType()); }
@Override public OneHotEncoderModelInfo getModelInfo(final OneHotEncoder from, DataFrame df) { OneHotEncoderModelInfo modelInfo = new OneHotEncoderModelInfo(); String inputColumn = from.getInputCol(); //Ugly but the only way to deal with spark here int numTypes = -1; Attribute attribute = Attribute.fromStructField(df.schema().apply(inputColumn)); if (attribute.attrType() == AttributeType.Nominal()) { numTypes = ((NominalAttribute) Attribute.fromStructField(df.schema().apply(inputColumn))).values().get().length; } else if (attribute.attrType() == AttributeType.Binary()) { numTypes = ((BinaryAttribute) Attribute.fromStructField(df.schema().apply(inputColumn))).values().get().length; } //TODO: Since dropLast is not accesible here, We are deliberately setting numTypes. This is the reason, we should use CustomOneHotEncoder modelInfo.setNumTypes(numTypes - 1); Set<String> inputKeys = new LinkedHashSet<String>(); inputKeys.add(from.getInputCol()); modelInfo.setInputKeys(inputKeys); Set<String> outputKeys = new LinkedHashSet<String>(); outputKeys.add(from.getOutputCol()); modelInfo.setOutputKeys(outputKeys); return modelInfo; }
@Test public void testAttributeGroup() { Attribute[] attrs = new Attribute[]{ NumericAttribute.defaultAttr(), NominalAttribute.defaultAttr(), BinaryAttribute.defaultAttr().withIndex(0), NumericAttribute.defaultAttr().withName("age").withSparsity(0.8), NominalAttribute.defaultAttr().withName("size").withValues("small", "medium", "large"), BinaryAttribute.defaultAttr().withName("clicked").withValues("no", "yes"), NumericAttribute.defaultAttr(), NumericAttribute.defaultAttr() }; AttributeGroup group = new AttributeGroup("user", attrs); Assert.assertEquals(8, group.size()); Assert.assertEquals("user", group.name()); Assert.assertEquals(NumericAttribute.defaultAttr().withIndex(0), group.getAttr(0)); Assert.assertEquals(3, group.indexOf("age")); Assert.assertFalse(group.hasAttr("abc")); Assert.assertEquals(group, AttributeGroup.fromStructField(group.toStructField())); } }
@Test public void testAttributeType() { AttributeType numericType = AttributeType.Numeric(); AttributeType nominalType = AttributeType.Nominal(); AttributeType binaryType = AttributeType.Binary(); Assert.assertEquals(numericType, NumericAttribute.defaultAttr().attrType()); Assert.assertEquals(nominalType, NominalAttribute.defaultAttr().attrType()); Assert.assertEquals(binaryType, BinaryAttribute.defaultAttr().attrType()); }
@Test public void testNominalAttribute() { NominalAttribute attr = NominalAttribute.defaultAttr() .withName("size").withIndex(1).withValues("small", "medium", "large"); Assert.assertEquals(attr.withoutIndex(), Attribute.fromStructField(attr.toStructField())); }
@Test public void testAttributeGroup() { Attribute[] attrs = new Attribute[]{ NumericAttribute.defaultAttr(), NominalAttribute.defaultAttr(), BinaryAttribute.defaultAttr().withIndex(0), NumericAttribute.defaultAttr().withName("age").withSparsity(0.8), NominalAttribute.defaultAttr().withName("size").withValues("small", "medium", "large"), BinaryAttribute.defaultAttr().withName("clicked").withValues("no", "yes"), NumericAttribute.defaultAttr(), NumericAttribute.defaultAttr() }; AttributeGroup group = new AttributeGroup("user", attrs); Assert.assertEquals(8, group.size()); Assert.assertEquals("user", group.name()); Assert.assertEquals(NumericAttribute.defaultAttr().withIndex(0), group.getAttr(0)); Assert.assertEquals(3, group.indexOf("age")); Assert.assertFalse(group.hasAttr("abc")); Assert.assertEquals(group, AttributeGroup.fromStructField(group.toStructField())); } }
@Test public void testAttributeType() { AttributeType numericType = AttributeType.Numeric(); AttributeType nominalType = AttributeType.Nominal(); AttributeType binaryType = AttributeType.Binary(); Assert.assertEquals(numericType, NumericAttribute.defaultAttr().attrType()); Assert.assertEquals(nominalType, NominalAttribute.defaultAttr().attrType()); Assert.assertEquals(binaryType, BinaryAttribute.defaultAttr().attrType()); }
@Test public void testNominalAttribute() { NominalAttribute attr = NominalAttribute.defaultAttr() .withName("size").withIndex(1).withValues("small", "medium", "large"); Assert.assertEquals(attr.withoutIndex(), Attribute.fromStructField(attr.toStructField())); }