org.apache.spark.mllib.tree.model.DecisionTreeModel java code examples

              IntLongMap nodeIDCounts) {
boolean classificationTask = dtModel.algo().equals(Algo.Classification());
Preconditions.checkState(classificationTask == inputSchema.isClassification());
treeNodes.add(new Pair<>(dtModel.topNode(), null));

@Override
public RDD<Object> predict(RDD<Vector> testData) {
 return model.predict(testData);
}

DecisionTreeModel tree = trees[i];
IntLongHashMap nodeIDCount = treeNodeIDCounts.get(i);
org.apache.spark.mllib.tree.model.Node node = tree.topNode();

@Override
public double predict(Vector testData) {
 return model.predict(testData);
}

/**
 * @param trainPointData data to run down trees
 * @param model random decision forest model to count on
 * @return map of predictor index to the number of training examples that reached a
 *  node whose decision is based on that feature. The index is among predictors, not all
 *  features, since there are fewer predictors than features. That is, the index will
 *  match the one used in the {@link RandomForestModel}.
 */
private static IntLongHashMap predictorExampleCounts(JavaRDD<? extends LabeledPoint> trainPointData,
                           RandomForestModel model) {
 return trainPointData.mapPartitions(data -> {
   IntLongHashMap featureIndexCount = new IntLongHashMap();
   data.forEachRemaining(datum -> {
    double[] featureVector = datum.features().toArray();
    for (DecisionTreeModel tree : model.trees()) {
     org.apache.spark.mllib.tree.model.Node node = tree.topNode();
     // This logic cloned from Node.predict:
     while (!node.isLeaf()) {
      Split split = node.split().get();
      int featureIndex = split.feature();
      // Count feature
      featureIndexCount.addToValue(featureIndex, 1);
      node = nextNode(featureVector, node, split, featureIndex);
     }
    }
   });
   return Collections.singleton(featureIndexCount).iterator();
 }).reduce(RDFUpdate::merge);
}

 @Override
 public JavaRDD<Double> predict(JavaRDD<Vector> testData) {
  return model.predict(testData.rdd()).toJavaRDD().map(new DoubleValueMapper());
 }
}

private static int validatePrediction(
  List<LabeledPoint> validationData, DecisionTreeModel model) {
 int numCorrect = 0;
 for (LabeledPoint point : validationData) {
  Double prediction = model.predict(point.features());
  if (prediction == point.label()) {
   numCorrect++;
  }
 }
 return numCorrect;
}

private static int validatePrediction(
  List<LabeledPoint> validationData, DecisionTreeModel model) {
 int numCorrect = 0;
 for (LabeledPoint point : validationData) {
  Double prediction = model.predict(point.features());
  if (prediction == point.label()) {
   numCorrect++;
  }
 }
 return numCorrect;
}

private static int validatePrediction(
  List<LabeledPoint> validationData, DecisionTreeModel model) {
 int numCorrect = 0;
 for (LabeledPoint point : validationData) {
  Double prediction = model.predict(point.features());
  if (prediction == point.label()) {
   numCorrect++;
  }
 }
 return numCorrect;
}

@Test
public void runDTUsingStaticMethods() {
 List<LabeledPoint> arr = DecisionTreeSuite.generateCategoricalDataPointsAsJavaList();
 JavaRDD<LabeledPoint> rdd = jsc.parallelize(arr);
 HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
 categoricalFeaturesInfo.put(1, 2); // feature 1 has 2 categories
 int maxDepth = 4;
 int numClasses = 2;
 int maxBins = 100;
 Strategy strategy = new Strategy(Algo.Classification(), Gini.instance(), maxDepth, numClasses,
  maxBins, categoricalFeaturesInfo);
 DecisionTreeModel model = DecisionTree$.MODULE$.train(rdd.rdd(), strategy);
 // java compatibility test
 JavaRDD<Double> predictions = model.predict(rdd.map(LabeledPoint::features));
 int numCorrect = validatePrediction(arr, model);
 Assert.assertEquals(numCorrect, rdd.count());
}

@Test
public void runDTUsingStaticMethods() {
 List<LabeledPoint> arr = DecisionTreeSuite.generateCategoricalDataPointsAsJavaList();
 JavaRDD<LabeledPoint> rdd = jsc.parallelize(arr);
 HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
 categoricalFeaturesInfo.put(1, 2); // feature 1 has 2 categories
 int maxDepth = 4;
 int numClasses = 2;
 int maxBins = 100;
 Strategy strategy = new Strategy(Algo.Classification(), Gini.instance(), maxDepth, numClasses,
  maxBins, categoricalFeaturesInfo);
 DecisionTreeModel model = DecisionTree$.MODULE$.train(rdd.rdd(), strategy);
 // java compatibility test
 JavaRDD<Double> predictions = model.predict(rdd.map(LabeledPoint::features));
 int numCorrect = validatePrediction(arr, model);
 Assert.assertEquals(numCorrect, rdd.count());
}

@Test
public void runDTUsingStaticMethods() {
 List<LabeledPoint> arr = DecisionTreeSuite.generateCategoricalDataPointsAsJavaList();
 JavaRDD<LabeledPoint> rdd = jsc.parallelize(arr);
 HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
 categoricalFeaturesInfo.put(1, 2); // feature 1 has 2 categories
 int maxDepth = 4;
 int numClasses = 2;
 int maxBins = 100;
 Strategy strategy = new Strategy(Algo.Classification(), Gini.instance(), maxDepth, numClasses,
  maxBins, categoricalFeaturesInfo);
 DecisionTreeModel model = DecisionTree$.MODULE$.train(rdd.rdd(), strategy);
 // java compatibility test
 JavaRDD<Double> predictions = model.predict(rdd.map(LabeledPoint::features));
 int numCorrect = validatePrediction(arr, model);
 Assert.assertEquals(numCorrect, rdd.count());
}

How to useDecisionTreeModel in org.apache.spark.mllib.tree.model

Best Java code snippets using org.apache.spark.mllib.tree.model.DecisionTreeModel (Showing top 12 results out of 315)

How to use
DecisionTreeModel
in
org.apache.spark.mllib.tree.model