@Override public RDD<Object> predict(RDD<Vector> testData) { return model.predict(testData); }
DecisionTreeModel tree = trees[i]; IntLongHashMap nodeIDCount = treeNodeIDCounts.get(i); org.apache.spark.mllib.tree.model.Node node = tree.topNode();
@Override public double predict(Vector testData) { return model.predict(testData); }
/** * @param trainPointData data to run down trees * @param model random decision forest model to count on * @return map of predictor index to the number of training examples that reached a * node whose decision is based on that feature. The index is among predictors, not all * features, since there are fewer predictors than features. That is, the index will * match the one used in the {@link RandomForestModel}. */ private static IntLongHashMap predictorExampleCounts(JavaRDD<? extends LabeledPoint> trainPointData, RandomForestModel model) { return trainPointData.mapPartitions(data -> { IntLongHashMap featureIndexCount = new IntLongHashMap(); data.forEachRemaining(datum -> { double[] featureVector = datum.features().toArray(); for (DecisionTreeModel tree : model.trees()) { org.apache.spark.mllib.tree.model.Node node = tree.topNode(); // This logic cloned from Node.predict: while (!node.isLeaf()) { Split split = node.split().get(); int featureIndex = split.feature(); // Count feature featureIndexCount.addToValue(featureIndex, 1); node = nextNode(featureVector, node, split, featureIndex); } } }); return Collections.singleton(featureIndexCount).iterator(); }).reduce(RDFUpdate::merge); }
@Override public JavaRDD<Double> predict(JavaRDD<Vector> testData) { return model.predict(testData.rdd()).toJavaRDD().map(new DoubleValueMapper()); } }
private static int validatePrediction( List<LabeledPoint> validationData, DecisionTreeModel model) { int numCorrect = 0; for (LabeledPoint point : validationData) { Double prediction = model.predict(point.features()); if (prediction == point.label()) { numCorrect++; } } return numCorrect; }
private static int validatePrediction( List<LabeledPoint> validationData, DecisionTreeModel model) { int numCorrect = 0; for (LabeledPoint point : validationData) { Double prediction = model.predict(point.features()); if (prediction == point.label()) { numCorrect++; } } return numCorrect; }
private static int validatePrediction( List<LabeledPoint> validationData, DecisionTreeModel model) { int numCorrect = 0; for (LabeledPoint point : validationData) { Double prediction = model.predict(point.features()); if (prediction == point.label()) { numCorrect++; } } return numCorrect; }
@Test public void runDTUsingStaticMethods() { List<LabeledPoint> arr = DecisionTreeSuite.generateCategoricalDataPointsAsJavaList(); JavaRDD<LabeledPoint> rdd = jsc.parallelize(arr); HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<>(); categoricalFeaturesInfo.put(1, 2); // feature 1 has 2 categories int maxDepth = 4; int numClasses = 2; int maxBins = 100; Strategy strategy = new Strategy(Algo.Classification(), Gini.instance(), maxDepth, numClasses, maxBins, categoricalFeaturesInfo); DecisionTreeModel model = DecisionTree$.MODULE$.train(rdd.rdd(), strategy); // java compatibility test JavaRDD<Double> predictions = model.predict(rdd.map(LabeledPoint::features)); int numCorrect = validatePrediction(arr, model); Assert.assertEquals(numCorrect, rdd.count()); }
@Test public void runDTUsingStaticMethods() { List<LabeledPoint> arr = DecisionTreeSuite.generateCategoricalDataPointsAsJavaList(); JavaRDD<LabeledPoint> rdd = jsc.parallelize(arr); HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<>(); categoricalFeaturesInfo.put(1, 2); // feature 1 has 2 categories int maxDepth = 4; int numClasses = 2; int maxBins = 100; Strategy strategy = new Strategy(Algo.Classification(), Gini.instance(), maxDepth, numClasses, maxBins, categoricalFeaturesInfo); DecisionTreeModel model = DecisionTree$.MODULE$.train(rdd.rdd(), strategy); // java compatibility test JavaRDD<Double> predictions = model.predict(rdd.map(LabeledPoint::features)); int numCorrect = validatePrediction(arr, model); Assert.assertEquals(numCorrect, rdd.count()); }
@Test public void runDTUsingStaticMethods() { List<LabeledPoint> arr = DecisionTreeSuite.generateCategoricalDataPointsAsJavaList(); JavaRDD<LabeledPoint> rdd = jsc.parallelize(arr); HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<>(); categoricalFeaturesInfo.put(1, 2); // feature 1 has 2 categories int maxDepth = 4; int numClasses = 2; int maxBins = 100; Strategy strategy = new Strategy(Algo.Classification(), Gini.instance(), maxDepth, numClasses, maxBins, categoricalFeaturesInfo); DecisionTreeModel model = DecisionTree$.MODULE$.train(rdd.rdd(), strategy); // java compatibility test JavaRDD<Double> predictions = model.predict(rdd.map(LabeledPoint::features)); int numCorrect = validatePrediction(arr, model); Assert.assertEquals(numCorrect, rdd.count()); }