/** * Constructor. * * @param att the attribute that backs this item. * @param valueIndex the index of the value for this item. * @throws Exception if the backing attribute is not binary or unary. */ public BinaryItem(Attribute att, int valueIndex) throws Exception { super(att, valueIndex); if (att.isNumeric() || (att.isNominal() && att.numValues() > 2)) { throw new Exception("BinaryItem must be constructed using a nominal attribute" + " with at most 2 values!"); } }
/** * Constructs a vector using a given data format. * The vector has an element for each numerical attribute. * The other attributes (nominal, string) are ignored. * Random is used to initialize the attributes. * * @param format the data format to use * @param random for initializing the attributes * @throws Exception if something goes wrong */ public AlgVector(Instances format, Random random) throws Exception { int len = format.numAttributes(); for (int i = 0; i < format.numAttributes(); i++) { if (!format.attribute(i).isNumeric()) len--; } if (len > 0) { m_Elements = new double[len]; initialize(random); } }
/** * Pretty hokey heuristic to try and set t2 distance automatically based on * standard deviation * * @param trainingBatch the training instances * @throws Exception if a problem occurs */ protected void setT2T1BasedOnStdDev(Instances trainingBatch) throws Exception { double normalizedStdDevSum = 0; for (int i = 0; i < trainingBatch.numAttributes(); i++) { if (trainingBatch.attribute(i).isNominal()) { normalizedStdDevSum += 0.25; } else if (trainingBatch.attribute(i).isNumeric()) { AttributeStats stats = trainingBatch.attributeStats(i); if (trainingBatch.numInstances() - stats.missingCount > 2) { double stdDev = stats.numericStats.stdDev; double min = stats.numericStats.min; double max = stats.numericStats.max; if (!Utils.isMissingValue(stdDev) && max - min > 0) { stdDev = 0.5 * stdDev / (max - min); normalizedStdDevSum += stdDev; } } } } normalizedStdDevSum = Math.sqrt(normalizedStdDevSum); if (normalizedStdDevSum > 0) { m_t2 = normalizedStdDevSum; } }
/** * Sets split point to greatest value in given data smaller or equal to old * split point. (C4.5 does this for some strange reason). */ public final void setSplitPoint(Instances allInstances) { double newSplitPoint = -Double.MAX_VALUE; if ((allInstances.attribute(m_attIndex).isNumeric()) && (m_numSubsets > 1)) { for (int i = 0; i < allInstances.numInstances(); i++) { Instance instance = allInstances.instance(i); double tempValue = instance.value(m_attIndex); if (!Utils.isMissingValue(tempValue)) { if ((tempValue > newSplitPoint) && (tempValue <= m_splitPoint)) { newSplitPoint = tempValue; } } } m_splitPoint = newSplitPoint; } }
@Override public void set(int rowId, int col, Object val) { if (!isValueValid(rowId, col)) { return; } Attribute att = instances.attribute(col); if (att.isNumeric()) { instances.instance(rowId).setValue(att, Double.parseDouble( val.toString())); return; } else if (att.isString() || att.isNominal()) { instances.instance(rowId).setValue(att, val.toString()); return; } else if (att.isDate()) { try { instances.instance(rowId).setValue(att, att.parseDate(val.toString())); } catch (ParseException e) { e.printStackTrace(); } return; } }
text.append("Class ("); if (m_trainInstances.attribute(m_trainInstances.classIndex()).isNumeric()) { text.append("numeric): "); } else { text.append((m_trainInstances.classIndex() + 1) + " " + m_trainInstances.attribute(m_trainInstances.classIndex()).name() + "):\n"); } else {
@Override public Object getValue(int rowId, int col) { if (!isValueValid(rowId, col)) { return null; } Attribute att = instances.attribute(col); if (att.isNumeric()) { return instances.attributeToDoubleArray(col)[rowId]; } else if (att.isNominal() || att.isString()) { return instances.instance(rowId).stringValue(col); } else if (att.isDate()) { double dateValue = instances.attributeToDoubleArray(col)[rowId]; return att.formatDate(dateValue); } return null; }
private void Initialize(MultiLabelInstances mlData) { Instances dataSet = mlData.getDataSet(); int[] featureIndices = mlData.getFeatureIndices(); for (int attIndex : featureIndices) { Attribute feature = dataSet.attribute(attIndex); if (feature.isNumeric()) { Stats stats = dataSet.attributeStats(attIndex).numericStats; attStats.put(attIndex, new double[]{stats.min, stats.max}); } } } }
/** * Pretty hokey heuristic to try and set t2 distance automatically based on * standard deviation * * @param trainingBatch the training instances * @throws Exception if a problem occurs */ protected void setT2T1BasedOnStdDev(Instances trainingBatch) throws Exception { double normalizedStdDevSum = 0; for (int i = 0; i < trainingBatch.numAttributes(); i++) { if (trainingBatch.attribute(i).isNominal()) { normalizedStdDevSum += 0.25; } else if (trainingBatch.attribute(i).isNumeric()) { AttributeStats stats = trainingBatch.attributeStats(i); if (trainingBatch.numInstances() - stats.missingCount > 2) { double stdDev = stats.numericStats.stdDev; double min = stats.numericStats.min; double max = stats.numericStats.max; if (!Utils.isMissingValue(stdDev) && max - min > 0) { stdDev = 0.5 * stdDev / (max - min); normalizedStdDevSum += stdDev; } } } } normalizedStdDevSum = Math.sqrt(normalizedStdDevSum); if (normalizedStdDevSum > 0) { m_t2 = normalizedStdDevSum; } }
/** * Set the index of the attribute to display coloured labels for * * @param cIndex the index of the attribute to display coloured labels for */ public void setCindex(int cIndex) { if (m_Instances.numAttributes() > 0) { m_cIndex = cIndex; if (m_Instances.attribute(m_cIndex).isNumeric()) { setNumeric(); } else { if (m_Instances.attribute(m_cIndex).numValues() > m_colorList.size()) { extendColourMap(); } setNominal(); } } }
/** * Sets split point to greatest value in given data smaller or equal to old * split point. (C4.5 does this for some strange reason). */ public final void setSplitPoint(Instances allInstances) { double newSplitPoint = -Double.MAX_VALUE; if ((allInstances.attribute(m_attIndex).isNumeric()) && (m_numSubsets > 1)) { for (int i = 0; i < allInstances.numInstances(); i++) { Instance instance = allInstances.instance(i); double tempValue = instance.value(m_attIndex); if (!Utils.isMissingValue(tempValue)) { if ((tempValue > newSplitPoint) && (tempValue <= m_splitPoint)) { newSplitPoint = tempValue; } } } m_splitPoint = newSplitPoint; } }
/** * Make sure that the filter binarizes the index we specify. */ public void testSpecificIndex() { int att1 = m_Instances.attribute("NumericAtt1").index(); int att2 = m_Instances.attribute("NumericAtt2").index(); // Set the attribute index to point to NumericAtt1, so we expect that only this // attribute will be binarized. ((NumericToBinary)m_Filter).setAttributeIndices( String.valueOf(att1+1) ); Instances result = useFilter(); assertTrue("NumericAtt1 should be nominal", result.attribute(att1).isNominal()); assertTrue("NumericAtt2 should be numeric", result.attribute(att2).isNumeric()); }
/** * Constructor. * * @param att the attribute that backs this item. * @param valueIndex the index of the value for this item. * @throws Exception if the backing attribute is not binary or unary. */ public BinaryItem(Attribute att, int valueIndex) throws Exception { super(att, valueIndex); if (att.isNumeric() || (att.isNominal() && att.numValues() > 2)) { throw new Exception("BinaryItem must be constructed using a nominal attribute" + " with at most 2 values!"); } }
text.append("Class ("); if (m_trainInstances.attribute(m_trainInstances.classIndex()).isNumeric()) { text.append("numeric): "); } else { text.append((m_trainInstances.classIndex() + 1) + " " + m_trainInstances.attribute(m_trainInstances.classIndex()).name() + "):\n"); } else {
/** * Converts the header info of the given set of instances into a set of item * sets (singletons). The ordering of values in the header file determines the * lexicographic order. * * @param instances the set of instances whose header info is to be used * @return a set of item sets, each containing a single item * @exception Exception if singletons can't be generated successfully */ public static ArrayList<Object> singletons(Instances instances, boolean treatZeroAsMissing) throws Exception { ArrayList<Object> setOfItemSets = new ArrayList<Object>(); AprioriItemSet current; for (int i = 0; i < instances.numAttributes(); i++) { if (instances.attribute(i).isNumeric()) { throw new Exception("Can't handle numeric attributes!"); } int j = (treatZeroAsMissing) ? 1 : 0; for (; j < instances.attribute(i).numValues(); j++) { current = new AprioriItemSet(instances.numInstances()); current.m_items = new int[instances.numAttributes()]; for (int k = 0; k < instances.numAttributes(); k++) { current.m_items[k] = -1; } current.m_items[i] = j; setOfItemSets.add(current); } } return setOfItemSets; }
/** Generate the cutpoints for each attribute */ protected void calculateCutPoints() { Instances copy = null; m_CutPoints = new double[getInputFormat().numAttributes()][]; for (int i = getInputFormat().numAttributes() - 1; i >= 0; i--) { if ((m_DiscretizeCols.isInRange(i)) && (getInputFormat().attribute(i).isNumeric())) { // Use copy to preserve order if (copy == null) { copy = new Instances(getInputFormat()); } calculateCutPointsByMDL(i, copy); } } }
/** * Sets split point to greatest value in given data smaller or equal to old * split point. (C4.5 does this for some strange reason). */ public final void setSplitPoint(Instances allInstances) { double newSplitPoint = -Double.MAX_VALUE; if ((allInstances.attribute(m_attIndex).isNumeric()) && (m_numSubsets > 1)) { for (int i = 0; i < allInstances.numInstances(); i++) { Instance instance = allInstances.instance(i); double tempValue = instance.value(m_attIndex); if (!Utils.isMissingValue(tempValue)) { if ((tempValue > newSplitPoint) && (tempValue <= m_splitPoint)) { newSplitPoint = tempValue; } } } m_splitPoint = newSplitPoint; } }
/** * Make sure that the filter binarizes the index we specify. */ public void testSpecificIndex() { int att1 = m_Instances.attribute("NumericAtt1").index(); int att2 = m_Instances.attribute("NumericAtt2").index(); // Set the attribute index to point to NumericAtt1, so we expect that only this // attribute will be binarized. ((NumericToBinary)m_Filter).setAttributeIndices( String.valueOf(att1+1) ); Instances result = useFilter(); assertTrue("NumericAtt1 should be nominal", result.attribute(att1).isNominal()); assertTrue("NumericAtt2 should be numeric", result.attribute(att2).isNumeric()); }