/** * Define a source column. These define the raw input. Use this function if * you know the index of the column in a non-header file. * * @param name * The name of the column. * @param index * The index of the column, needed for non-headered files. * @param colType * The column type. * @return The column definition */ public ColumnDefinition defineSourceColumn(String name, int index, ColumnType colType) { ColumnDefinition result = new ColumnDefinition(name, colType); result.setIndex(index); addSourceColumn(result); return result; }
/** * Analyze the specified value. * @param value The value to analyze. */ public void analyze(String value) { switch(this.dataType) { case continuous: analyzeContinuous(value); break; case ordinal: analyzeOrdinal(value); break; case nominal: analyzeNominal(value); break; } }
/** * Define an array of classes for a catagorical value. * @param str The classes to add. */ public void defineClass(String[] str) { for(String s: str) { defineClass(s); } }
/** * {@inheritDoc} */ @Override public int normalizeColumn(ColumnDefinition colDef, double value, double[] outputData, int outputColumn) { double result = ((value - colDef.getLow()) / (colDef.getHigh() - colDef.getLow())) * (this.normalizedHigh - this.normalizedLow) + this.normalizedLow; // typically caused by a number that should not have been normalized // (i.e. normalization or actual range is infinitely small. if( Double.isNaN(result) ) { result = ((this.normalizedHigh-this.normalizedLow)/2)+this.normalizedLow; } outputData[outputColumn] = result; return outputColumn+1; }
int index = findIndex(colDef); String value = line[index]; colDef.analyze(value); if (colDef.getDataType() == ColumnType.continuous) { colDef.setMean(colDef.getMean() / colDef.getCount()); colDef.setSd(0); for (int i = 0; i < this.helper.getSourceColumns().size(); i++) { ColumnDefinition colDef = this.helper.getSourceColumns().get(i); String value = line[colDef.getIndex()]; if (colDef.getDataType() == ColumnType.continuous) { double d = this.helper.parseDouble(value); d = colDef.getMean() - d; d = d * d; colDef.setSd(colDef.getSd() + d); if (colDef.getDataType() == ColumnType.continuous) { colDef.setSd(Math.sqrt(colDef.getSd() / colDef.getCount()));
/** * {@inheritDoc} */ @Override public int outputSize(ColumnDefinition colDef) { return colDef.getClasses().size(); }
/** * Find a normalizer for the specified column definition, and if it is input or output. * @param colDef The column definition. * @param isInput True if the column is input. * @return The normalizer to use. */ private Normalizer findNormalizer(ColumnDefinition colDef, boolean isInput) { Normalizer norm = null; if(isInput) { if( this.inputNormalizers.containsKey(colDef.getDataType())) { norm = this.inputNormalizers.get(colDef.getDataType()); } } else { if( this.outputNormalizers.containsKey(colDef.getDataType())) { norm = this.outputNormalizers.get(colDef.getDataType()); } } if( norm==null ) { throw new EncogError("No normalizer defined for input="+isInput+", type=" + colDef.getDataType()); } return norm; }
throw new EncogError( "Do not know how to process missing value \"" + value + "\" in field: " + colDef.getName()); if (colDef.getDataType() == ColumnType.continuous) { double d = parseDouble(value); if (handler != null) {
/** * {@inheritDoc} */ @Override public int normalizeColumn(ColumnDefinition colDef, String value, double[] outputData, int outputColumn) { for (int i = 0; i < colDef.getClasses().size(); i++) { double d = this.normalizedLow; if (colDef.getClasses().get(i).equals(value)) { d = this.normalizedHigh; } outputData[outputColumn + i] = d; } return outputColumn + colDef.getClasses().size(); }
/** * {@inheritDoc} */ @Override public String suggestModelArchitecture(VersatileMLDataSet dataset) { int outputColumns = dataset.getNormHelper().getOutputColumns().size(); if( outputColumns>1 ) { throw new EncogError("SVM does not support multiple output columns."); } ColumnType ct = dataset.getNormHelper().getOutputColumns().get(0).getDataType(); StringBuilder result = new StringBuilder(); result.append("?->"); if( ct==ColumnType.nominal ) { result.append("C"); } else { result.append("R"); } result.append("->?"); return result.toString(); }
/** * {@inheritDoc} */ @Override public String denormalizeColumn(ColumnDefinition colDef, MLData data, int dataColumn) { double value = data.getData(dataColumn); final double result = ((colDef.getLow() - colDef.getHigh()) * value - this.normalizedHigh * colDef.getLow() + colDef.getHigh() * this.normalizedLow) / (this.normalizedLow - this.normalizedHigh); // typically caused by a number that should not have been normalized // (i.e. normalization or actual range is infinitely small. if( Double.isNaN(result) ) { return ""+(((this.normalizedHigh-this.normalizedLow)/2)+this.normalizedLow); } return ""+result; } }
/** * {@inheritDoc} */ @Override public int normalizeColumn(ColumnDefinition colDef, String value, double[] outputData, int outputColumn) { if (!colDef.getClasses().contains(value)) { throw new EncogError("Undefined value: " + value); } outputData[outputColumn] = colDef.getClasses().indexOf(value); return outputColumn + 1; }
/** * Define a single column as an output column, all others as inputs. * @param outputColumn The output column. */ public void defineSingleOutputOthersInput(ColumnDefinition outputColumn) { this.helper.clearInputOutput(); for (ColumnDefinition colDef : this.helper.getSourceColumns()) { if (colDef == outputColumn) { defineOutput(colDef); } else if (colDef.getDataType() != ColumnType.ignore) { defineInput(colDef); } } }
/** * {@inheritDoc} */ @Override public String denormalizeColumn(ColumnDefinition colDef, MLData data, int dataColumn) { return colDef.getClasses().get((int) data.getData(dataColumn)); }
/** * Calculate the error for the given method and dataset. * @param method The method to use. * @param data The data to use. * @return The error. */ public double calculateError(MLMethod method, MLDataSet data) { if (this.dataset.getNormHelper().getOutputColumns().size() == 1) { ColumnDefinition cd = this.dataset.getNormHelper() .getOutputColumns().get(0); if (cd.getDataType() == ColumnType.nominal) { return EncogUtility.calculateClassificationError( (MLClassification) method, data); } } return EncogUtility.calculateRegressionError((MLRegression) method, data); }
/** * {@inheritDoc} */ @Override public String denormalizeColumn(ColumnDefinition colDef, MLData data, int dataColumn) { double high = colDef.getClasses().size(); double low = 0; double value = data.getData(dataColumn); final double result = ((low - high) * value - this.normalizedHigh * low + high * this.normalizedLow) / (this.normalizedLow - this.normalizedHigh); // typically caused by a number that should not have been normalized // (i.e. normalization or actual range is infinitely small. if (Double.isNaN(result)) { return colDef.getClasses().get(0); } return colDef.getClasses().get((int) result); } }
/** * Define multiple output columns, all others as inputs. * @param outputColumns The output columns. */ public void defineMultipleOutputsOthersInput(ColumnDefinition[] outputColumns) { this.helper.clearInputOutput(); for (ColumnDefinition colDef : this.helper.getSourceColumns()) { boolean isOutput = false; for(ColumnDefinition col : outputColumns) { if( col==colDef) { isOutput = true; } } if ( isOutput) { defineOutput(colDef); } else if (colDef.getDataType() != ColumnType.ignore) { defineInput(colDef); } } }
/** * {@inheritDoc} */ @Override public int normalizeColumn(ColumnDefinition colDef, String theValue, double[] outputData, int outputColumn) { // Find the index of the ordinal int v = colDef.getClasses().indexOf(theValue); if (v == -1) { throw new EncogError("Unknown ordinal: " + theValue); } double high = colDef.getClasses().size(); double value = v; double result = (value / high) * (this.normalizedHigh - this.normalizedLow) + this.normalizedLow; // typically caused by a number that should not have been normalized // (i.e. normalization or actual range is infinitely small. if (Double.isNaN(result)) { result = ((this.normalizedHigh - this.normalizedLow) / 2) + this.normalizedLow; } outputData[outputColumn] = result; return outputColumn + 1; }
/** * {@inheritDoc} */ @Override public NormalizationStrategy suggestNormalizationStrategy(VersatileMLDataSet dataset, String architecture) { int outputColumns = dataset.getNormHelper().getOutputColumns().size(); ColumnType ct = dataset.getNormHelper().getOutputColumns().get(0).getDataType(); BasicNormalizationStrategy result = new BasicNormalizationStrategy(); result.assignInputNormalizer(ColumnType.continuous,new RangeNormalizer(0,1)); result.assignInputNormalizer(ColumnType.nominal,new OneOfNNormalizer(0,1)); result.assignInputNormalizer(ColumnType.ordinal,new OneOfNNormalizer(0,1)); result.assignOutputNormalizer(ColumnType.continuous,new RangeNormalizer(0,1)); result.assignOutputNormalizer(ColumnType.nominal,new OneOfNNormalizer(0,1)); result.assignOutputNormalizer(ColumnType.ordinal,new OneOfNNormalizer(0,1)); return result; }
/** * {@inheritDoc} */ @Override public int determineOutputCount(VersatileMLDataSet dataset) { return dataset.getNormHelper().getOutputColumns().get(0).getClasses().size(); } }