org.deeplearning4j.eval.Evaluation java code examples

Evaluation eval = new Evaluation(outputNum);
while(mnistTest.hasNext()){
  DataSet ds = mnistTest.next();
  INDArray output = model.output(ds.getFeatures(), false);
  eval.eval(ds.getLabels(), output);
log.info(eval.stats());
mnistTest.reset();

IEvaluation[] evals = net.doEvaluation(test, new Evaluation(), new ROCMultiClass());
long endEval = System.currentTimeMillis();
    .append(" subsetMiniBatches ").append(iterAfter - itersBefore)      //Note: "end of epoch" effect - may be smaller than subset size
    .append(" trainMS ").append(end - start).append(" evalMS ").append(endEval - startEval)
    .append(" accuracy ").append(e.accuracy()).append(" f1 ").append(e.f1())
    .append(" AvgAUC ").append(r.calculateAverageAUC()).append(" AvgAUPRC ").append(r.calculateAverageAUCPR()).append("\n");

private static void evaluatePerformance(MultiLayerNetwork net, int testStartIdx, int nExamples, String outputDirectory) throws Exception {
  //Assuming here that the full test data set doesn't fit in memory -> load 10 examples at a time
  Map<Integer, String> labelMap = new HashMap<>();
  labelMap.put(0, "circle");
  labelMap.put(1, "square");
  labelMap.put(2, "arc");
  labelMap.put(3, "line");
  Evaluation evaluation = new Evaluation(labelMap);
  DataSetIterator testData = getDataSetIterator(outputDirectory, testStartIdx, nExamples, 1000);
  while(testData.hasNext()) {
    DataSet dsTest = testData.next();
    INDArray predicted = net.output(dsTest.getFeatures(), false);
    INDArray actual = dsTest.getLabels();
    evaluation.evalTimeSeries(actual, predicted);
  }
  System.out.println(evaluation.stats());
}

Evaluation e = new Evaluation(TinyImageNetDataSetIterator.getLabels(false), 5); //Set up for top 5 accuracy
net.doEvaluation(test, e);
log.info(e.stats());
  FileUtils.writeStringToFile(new File(saveDir, "evaulation.txt"), e.stats(), StandardCharsets.UTF_8);

log.info(eval.stats(true));

/**
 * Returns the f1 score for the given examples.
 * Think of this to be like a percentage right.
 * The higher the number the more it got right.
 * This is on a scale from 0 to 1.
 *
 * @param examples te the examples to classify (one example in each row)
 * @param labels   the true labels
 * @return the scores for each ndarray
 */
@Override
public double f1Score(INDArray examples, INDArray labels) {
  Evaluation eval = new Evaluation();
  eval.eval(labels, labelProbabilities(examples));
  return eval.f1();
}

/**
 * Evaluation will be launched after each *frequency* iteration
 * @param iterator
 * @param frequency
 */
public EvaluativeListener(@NonNull DataSetIterator iterator, int frequency) {
  this(iterator, frequency, InvocationType.ITERATION_END, new Evaluation());
}

Evaluation eval = new Evaluation(numberOfClasses);
eval.eval(testData.getLabels(), output, testMetaData);          //Note we are passing in the test set metadata here
List<Prediction> predictionErrors = eval.getPredictionErrors();
System.out.println("\n\n+++++ Prediction Errors +++++");
for(Prediction p : predictionErrors){
System.out.println(eval.stats());

@Override
public String evaluate(FederatedDataSet federatedDataSet) {
  //evaluate the model on the test set
  DataSet testData = (DataSet) federatedDataSet.getNativeDataSet();
  double score = model.score(testData);
  Evaluation eval = new Evaluation(numClasses);
  INDArray output = model.output(testData.getFeatureMatrix());
  eval.eval(testData.getLabels(), output);
  return "Score: " + score;
}

StringBuilder builder = new StringBuilder().append("\n");
StringBuilder warnings = new StringBuilder();
List<Integer> classes = confusion().getClasses();
  actual = resolveLabelForClass(clazz);
    int count = confusion().getCount(clazz, clazz2);
    if (count != 0) {
      predicted = resolveLabelForClass(clazz2);
      builder.append(String.format("Examples labeled as %s classified by model as %s: %d times%n", actual,
              predicted, count));
  warningHelper(warnings, falsePositivesWarningClasses, "precision");
  warningHelper(warnings, falseNegativesWarningClasses, "recall");
int nClasses = confusion().getClasses().size();
DecimalFormat df = new DecimalFormat("0.0000");
double acc = accuracy();
double precisionMacro = precision(EvaluationAveraging.Macro);
double recallMacro = recall(EvaluationAveraging.Macro);
double f1Macro = f1(EvaluationAveraging.Macro);
builder.append("\n==========================Scores========================================");
builder.append("\n # of classes:    ").append(nClasses);
builder.append("\n Accuracy:        ").append(format(df, acc));
if (topN > 1) {
  double topNAcc = topNAccuracy();
  builder.append("\n Top ").append(topN).append(" Accuracy:  ").append(format(df, topNAcc));

/**
 * Collects statistics on the real outcomes vs the
 * guesses. This is for logistic outcome matrices.
 * <p>
 * Note that an IllegalArgumentException is thrown if the two passed in
 * matrices aren't the same length.
 *
 * @param realOutcomes the real outcomes (labels - usually binary)
 * @param guesses      the guesses/prediction (usually a probability vector)
 */
public void eval(INDArray realOutcomes, INDArray guesses) {
  eval(realOutcomes, guesses, (List<Serializable>) null);
}

/**
 * Top N accuracy of the predictions so far. For top N = 1 (default), equivalent to {@link #accuracy()}
 * @return Top N accuracy
 */
public double topNAccuracy() {
  if (topN <= 1)
    return accuracy();
  if (topNTotalCount == 0)
    return 0.0;
  return topNCorrectCount / (double) topNTotalCount;
}

/**
 * Calculate the (macro) average F1 score across all classes
 *
 * TP: true positive
 * FP: False Positive
 * FN: False Negative
 * F1 score: 2 * TP / (2TP + FP + FN)
 *
 * @return the f1 score or harmonic mean of precision and recall based on current guesses
 */
public double f1() {
  return f1(EvaluationAveraging.Macro);
}

Evaluation evaluation = new Evaluation(TinyImageNetDataSetIterator.getLabels(false), 5); //Set up for top 5 accuracy
evaluation = (Evaluation) sparkNet.doEvaluation(pathsTest, loader, evaluation)[0];
log.info("Evaluation statistics: {}", evaluation.stats());
  SparkUtils.writeStringToFile(evalPath, evaluation.stats(), sc);

log.info(eval.stats());

/**
 * Returns the f1 score for the given examples.
 * Think of this to be like a percentage right.
 * The higher the number the more it got right.
 * This is on a scale from 0 to 1.
 *
 * @param examples te the examples to classify (one example in each row)
 * @param labels   the true labels
 * @return the scores for each ndarray
 */
@Override
public double f1Score(INDArray examples, INDArray labels) {
  Evaluation eval = new Evaluation();
  eval.eval(labels, labelProbabilities(examples));
  return eval.f1();
}

/**
 * Evaluation will be launched after each *frequency* iteration
 * @param iterator
 * @param frequency
 */
public EvaluativeListener(@NonNull MultiDataSetIterator iterator, int frequency) {
  this(iterator, frequency, InvocationType.ITERATION_END, new Evaluation());
}

/**
 * Evaluate the output
 * using the given true labels,
 * the input to the multi layer network
 * and the multi layer network to
 * use for evaluation
 * @param trueLabels the labels to ise
 * @param input the input to the network to use
 *              for evaluation
 * @param network the network to use for output
 */
public void eval(INDArray trueLabels, INDArray input, ComputationGraph network) {
  eval(trueLabels, network.output(false, input)[0]);
}

@Override
public double score(ComputationGraph graph, MultiDataSetIterator iterator) {
  Evaluation e = graph.evaluate(iterator);
  return e.accuracy();
}

@Override
public double score(ComputationGraph graph, DataSetIterator iterator) {
  Evaluation e = graph.evaluate(iterator);
  return e.f1();
}

Javadoc

Evaluation metrics:
- precision, recall, f1, fBeta, accuracy, Matthews correlation coefficient, gMeasure
- Top N accuracy (if using constructor #Evaluation(List,int))
- Custom binary evaluation decision threshold (use constructor #Evaluation(double) (default if not set is argmax / 0.5)
- Custom cost array, using #Evaluation(INDArray) or #Evaluation(List,INDArray) for multi-class

Note that setting a custom binary decision threshold is only possible for the binary case (1 or 2 outputs) and cannot be used if the number of classes exceeds 2. Predictions with probablity > threshold are considered to be class 1, and are considered class 0 otherwise.

Cost arrays (a row vector, of size equal to the number of outputs) modify the evaluation process: instead of simply doing predictedClass = argMax(probabilities), we do predictedClass = argMax(cost * probabilities). Consequently, an array of all 1s (or, indeed any array of equal values) will result in the same performance as no cost array; non- equal values will bias the predictions for or against certain classes.

Most used methods

stats
Method to obtain the classification report as a String
<init>
Created evaluation instance with the specified cost array. A cost array can be used to bias the mult
eval
Evaluate the output using the given true labels, the input to the multi layer network and the multi
accuracy
Accuracy: (TP + TN) / (P + N)
f1
Calculate the average F1 score across all classes, using macro or micro averaging
evalTimeSeries
addMapsByKey
addToConfusion
Adds to the confusion matrix
addToMetaConfusionMatrix
averageF1NumClassesExcluded
When calculating the (macro) average F1, how many classes are excluded from the average due to no pr
averagePrecisionNumClassesExcluded
When calculating the (macro) average precision, how many classes are excluded from the average due t
averageRecallNumClassesExcluded
When calculating the (macro) average Recall, how many classes are excluded from the average due to n

Popular in Java

Start an intent from android
getSupportFragmentManager (FragmentActivity)
notifyDataSetChanged (ArrayAdapter)
putExtra (Intent)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
JPanel (javax.swing)
Top 12 Jupyter Notebook extensions

How to useEvaluation in org.deeplearning4j.eval

Best Java code snippets using org.deeplearning4j.eval.Evaluation (Showing top 20 results out of 315)

How to use
Evaluation
in
org.deeplearning4j.eval