/** * Asserts that all of the dimensionalities of the vectors in the * given set of data are the same. * * @param data * A collection of data. * @throws DimensionalityMismatchException * If the dimensionalities are not all equal. */ public static void assertDimensionalitiesAllEqual( final Iterable<? extends Vectorizable> data) { VectorUtil.assertDimensionalitiesAllEqual(data, getDimensionality(data)); }
/** * Appends a bias (constant 1.0) to the end of each Vector in the dataset, * the original dataset is unmodified. The resulting Vectors will have one * greater dimension and look like: [ x1 x2 ] -> [ x1 x2 1.0 ] * @param dataset * Dataset to append a bias term to, Vectors can be of different * dimensionality * @return * Dataset with 1.0 appended to each Vector in the dataset */ public static ArrayList<Vector> appendBias( Collection<? extends Vector> dataset) { return DatasetUtil.appendBias(dataset, 1.0); }
public LearnedType learn( final Collection<? extends DataType> data) { // Convert the data to a multi-collection (if it is one). return this.learn(DatasetUtil.asMultiCollection(data)); }
/** * Asserts that all of the dimensionalities of the input vectors in the * given set of input-output pairs are the same. * * @param data * A collection of input-output pairs. * @throws DimensionalityMismatchException * If the dimensionalities are not all equal. */ public static void assertInputDimensionalitiesAllEqual( final Iterable<? extends InputOutputPair<? extends Vectorizable, ?>> data) { assertInputDimensionalitiesAllEqual(data, getInputDimensionality(data)); }
@Override protected WeightedVotingCategorizerEnsemble<InputType, CategoryType, Evaluator<? super InputType, ? extends CategoryType>> createInitialEnsemble() { final Set<CategoryType> categories = DatasetUtil.findUniqueOutputs(this.getData()); return new WeightedVotingCategorizerEnsemble<InputType, CategoryType, Evaluator<? super InputType, ? extends CategoryType>>( categories); }
@Override protected boolean initializeAlgorithm() { if (CollectionUtil.isEmpty(this.getData())) { // No data to learn from. return false; } // Get the dimensionality of the data. final int dimensionality = DatasetUtil.getInputDimensionality( this.getData()); // Create the categorizer we will learn and create the prototypes for // each category. this.result = new LinearMultiCategorizer<CategoryType>(); final Set<CategoryType> categories = DatasetUtil.findUniqueOutputs( this.getData()); for (CategoryType category : categories) { final LinearBinaryCategorizer prototype = new LinearBinaryCategorizer( this.getVectorFactory().createVector(dimensionality), 0.0); this.result.getPrototypes().put(category, prototype); } // The algorithm is now initialized. return true; }
new DefaultWeightedInputOutputPair<InputType, CategoryType>( example.getInput(), example.getOutput(), DatasetUtil.getWeight(example))); numExamples++; final Set<CategoryType> categories = DatasetUtil.findUniqueOutputs( this.weightedData); this.ensemble = new WeightedVotingCategorizerEnsemble<InputType, CategoryType, Evaluator<? super InputType, ? extends CategoryType>>(
final double baseVariance = DatasetUtil.computeOutputVariance(data); final int dimensionality = DatasetUtil.getInputDimensionality(data);
/** * Creates a constant evaluator that returns the mean output value of the * given dataset. * * @param data The dataset of input-output pairs to use. * @return A constant evaluator for the mean output value. */ @Override public ConstantEvaluator<Double> learn( final Collection<? extends InputOutputPair<?, Double>> data) { // Compute the mean. final double mean = DatasetUtil.computeOutputMean(data); return new ConstantEvaluator<Double>(mean); } }
/** * Creates a constant evaluator for the weighted mean output value of the * given dataset. * * @param data The dataset of input-output pairs to use. * @return A constant evaluator for the weighted mean output value. */ @Override public ConstantEvaluator<Double> learn( final Collection<? extends InputOutputPair<?, Double>> data) { // Compute the weighted mean. final double mean = DatasetUtil.computeWeightedOutputMean(data); return new ConstantEvaluator<Double>(mean); } }
/** * Learns a normalization based on a mean and full covariance matrix from * the given data. * * @param values * The values to learn the decorrelator from. * @param defaultCovariance * The default value for the covariance. Added to the diagonal of the * covariance matrix to prevent singular values. * @return * The MultivariateDecorrelator created from the multivariate mean and * variance. */ public static MultivariateDecorrelator learnFullCovariance( final Collection<? extends Vectorizable> values, final double defaultCovariance) { // Convert the values to vector form. final Collection<Vector> vectorValues = DatasetUtil.asVectorCollection(values); // Learn the maximum likelihood estimator of the Gaussian. final MultivariateGaussian.PDF pdf = MultivariateGaussian.MaximumLikelihoodEstimator.learn( vectorValues, defaultCovariance); return new MultivariateDecorrelator(pdf); }
Matrix XXt = DatasetUtil.computeOuterProductDataMatrix(dataArray);
/** * Asserts that all of the dimensionalities of the input vectors in the * given set of input-output pairs are the same. * * @param data * A collection of input-output pairs. * @throws DimensionalityMismatchException * If the dimensionalities are not all equal. */ public static void assertInputDimensionalitiesAllEqual( final Iterable<? extends InputOutputPair<? extends Vectorizable, ?>> data) { assertInputDimensionalitiesAllEqual(data, getInputDimensionality(data)); }
@Override protected boolean initializeAlgorithm() { boolean result = super.initializeAlgorithm(); if (result) { // Map each category to a list of indices for it. final int dataSize = this.dataList.size(); final Set<CategoryType> categories = DatasetUtil.findUniqueOutputs( this.dataList); this.categoryList = new ArrayList<CategoryType>(categories); this.dataPerCategory = new LinkedHashMap<CategoryType, ArrayList<Integer>>( categories.size()); for (CategoryType category : categories) { this.dataPerCategory.put(category, new ArrayList<Integer>()); } for (int i = 0; i < dataSize; i++) { final CategoryType category = this.dataList.get(i).getOutput(); this.dataPerCategory.get(category).add(i); } } return result; }
@Override protected boolean initializeAlgorithm() { if (CollectionUtil.isEmpty(this.getData())) { // No data to learn from. return false; } // Get the dimensionality of the data. final int dimensionality = DatasetUtil.getInputDimensionality( this.getData()); // Create the categorizer we will learn and create the prototypes for // each category. this.result = new LinearMultiCategorizer<CategoryType>(); final Set<CategoryType> categories = DatasetUtil.findUniqueOutputs( this.getData()); for (CategoryType category : categories) { final LinearBinaryCategorizer prototype = new LinearBinaryCategorizer( this.getVectorFactory().createVector(dimensionality), 0.0); this.result.getPrototypes().put(category, prototype); } // The algorithm is now initialized. return true; }
new DefaultWeightedInputOutputPair<InputType, CategoryType>( example.getInput(), example.getOutput(), DatasetUtil.getWeight(example))); numExamples++; final Set<CategoryType> categories = DatasetUtil.findUniqueOutputs( this.weightedData); this.ensemble = new WeightedVotingCategorizerEnsemble<InputType, CategoryType, Evaluator<? super InputType, ? extends CategoryType>>(
final double baseVariance = DatasetUtil.computeOutputVariance(data); final int dimensionality = DatasetUtil.getInputDimensionality(data);
/** * Creates a constant evaluator that returns the mean output value of the * given dataset. * * @param data The dataset of input-output pairs to use. * @return A constant evaluator for the mean output value. */ @Override public ConstantEvaluator<Double> learn( final Collection<? extends InputOutputPair<?, Double>> data) { // Compute the mean. final double mean = DatasetUtil.computeOutputMean(data); return new ConstantEvaluator<Double>(mean); } }
/** * Creates a constant evaluator for the weighted mean output value of the * given dataset. * * @param data The dataset of input-output pairs to use. * @return A constant evaluator for the weighted mean output value. */ @Override public ConstantEvaluator<Double> learn( final Collection<? extends InputOutputPair<?, Double>> data) { // Compute the weighted mean. final double mean = DatasetUtil.computeWeightedOutputMean(data); return new ConstantEvaluator<Double>(mean); } }
/** * Learns a normalization based on a mean and full covariance matrix from * the given data. * * @param values * The values to learn the decorrelator from. * @param defaultCovariance * The default value for the covariance. Added to the diagonal of the * covariance matrix to prevent singular values. * @return * The MultivariateDecorrelator created from the multivariate mean and * variance. */ public static MultivariateDecorrelator learnFullCovariance( final Collection<? extends Vectorizable> values, final double defaultCovariance) { // Convert the values to vector form. final Collection<Vector> vectorValues = DatasetUtil.asVectorCollection(values); // Learn the maximum likelihood estimator of the Gaussian. final MultivariateGaussian.PDF pdf = MultivariateGaussian.MaximumLikelihoodEstimator.learn( vectorValues, defaultCovariance); return new MultivariateDecorrelator(pdf); }