@Test public void classification() throws IOException { Table moneyball = Table.read().csv("../data/baseball.csv"); RandomForest playoffsModel = new RandomForest(moneyball.smile().nominalDataset("Playoffs", "RS", "RA", "OBP"), 1); assertNotNull(playoffsModel.toString()); }
/** * Predict the class of an instance. * * @param x the instance to be classified. * @return the predicted class label. For MULTINOMIAL and BERNOULLI models, * returns -1 if the instance does not contain any feature words. */ public int predict(SparseArray x) { return predict(x, null); }
@Override public DecisionTree train(double[][] x, int[] y) { return new DecisionTree(attributes, x, y, maxNodes, nodeSize, rule); } }
@Override public LogisticRegression train(double[][] x, int[] y) { return new LogisticRegression(x, y, lambda, tol, maxIter); } }
@Override int predictFromModel(double[] data) { return classifierModel.predict(data); } }
/** * Learn the 1-NN classifier from data of type double[]. * @param x the training samples. * @param y training labels in [0, c), where c is the number of classes. */ public static KNN<double[]> learn(double[][] x, int[] y) { return learn(x, y, 1); }
/** * Compute the network output error. * @param output the desired output. */ private double computeOutputError(double[] output) { return computeOutputError(output, outputLayer.error); }
@Override public Maxent train(int[][] x, int[] y) { return new Maxent(p, x, y, lambda, tol, maxIter); } }
@Override int predictFromModel(double[] data) { return classifierModel.predict(data); } }
@Override public Node call() { // An array to store sample count in each class for false child node. int[] falseCount = new int[k]; return findBestSplit(n, count, falseCount, impurity, j); } }
@Override public AdaBoost train(double[][] x, int[] y) { return new AdaBoost(attributes, x, y, ntrees, maxNodes); } }
@Override public GradientTreeBoost train(double[][] x, int[] y) { return new GradientTreeBoost(attributes, x, y, ntrees, maxNodes, shrinkage, subsample); } }
@Override public RDA train(double[][] x, int[] y) { return new RDA(x, y, priori, alpha, tol); } }
/** * Predicts the class label of an instance and also calculate a posteriori * probabilities. The posteriori estimation is based on sample distribution * in the leaf node. It is not accurate at all when be used in a single tree. * It is mainly used by RandomForest in an ensemble way. */ @Override public int predict(double[] x, double[] posteriori) { return root.predict(x, posteriori); }
@Override public KNN<T> train(T[] x, int[] y) { return new KNN<>(x, y, distance, k); } }
/** * Propagates the errors back through the network. */ private void backpropagate() { for (int l = net.length; --l > 0;) { backpropagate(net[l], net[l - 1]); } }
@Override public FLD train(double[][] x, int[] y) { return new FLD(x, y, L, tol); } }
/** * Constructor. The activation function of output layer will be chosen * by natural pairing based on the error function and the number of * classes. * * @param error the error function. * @param numUnits the number of units in each layer. */ public NeuralNetwork(ErrorFunction error, int... numUnits) { this(error, natural(error, numUnits[numUnits.length-1]), numUnits); }
/** * Returns the maximum depth" of the tree -- the number of * nodes along the longest path from the root node * down to the farthest leaf node.*/ public int maxDepth() { return maxDepth(root); }