Evaluation eval = new Evaluation(outputNum); while(mnistTest.hasNext()){ DataSet ds = mnistTest.next(); INDArray output = model.output(ds.getFeatures(), false); eval.eval(ds.getLabels(), output); log.info(eval.stats()); mnistTest.reset();
IEvaluation[] evals = net.doEvaluation(test, new Evaluation(), new ROCMultiClass()); long endEval = System.currentTimeMillis(); .append(" subsetMiniBatches ").append(iterAfter - itersBefore) //Note: "end of epoch" effect - may be smaller than subset size .append(" trainMS ").append(end - start).append(" evalMS ").append(endEval - startEval) .append(" accuracy ").append(e.accuracy()).append(" f1 ").append(e.f1()) .append(" AvgAUC ").append(r.calculateAverageAUC()).append(" AvgAUPRC ").append(r.calculateAverageAUCPR()).append("\n");
private static void evaluatePerformance(MultiLayerNetwork net, int testStartIdx, int nExamples, String outputDirectory) throws Exception { //Assuming here that the full test data set doesn't fit in memory -> load 10 examples at a time Map<Integer, String> labelMap = new HashMap<>(); labelMap.put(0, "circle"); labelMap.put(1, "square"); labelMap.put(2, "arc"); labelMap.put(3, "line"); Evaluation evaluation = new Evaluation(labelMap); DataSetIterator testData = getDataSetIterator(outputDirectory, testStartIdx, nExamples, 1000); while(testData.hasNext()) { DataSet dsTest = testData.next(); INDArray predicted = net.output(dsTest.getFeatures(), false); INDArray actual = dsTest.getLabels(); evaluation.evalTimeSeries(actual, predicted); } System.out.println(evaluation.stats()); }
Evaluation e = new Evaluation(TinyImageNetDataSetIterator.getLabels(false), 5); //Set up for top 5 accuracy net.doEvaluation(test, e); log.info(e.stats()); FileUtils.writeStringToFile(new File(saveDir, "evaulation.txt"), e.stats(), StandardCharsets.UTF_8);
log.info(eval.stats(true));
/** * Returns the f1 score for the given examples. * Think of this to be like a percentage right. * The higher the number the more it got right. * This is on a scale from 0 to 1. * * @param examples te the examples to classify (one example in each row) * @param labels the true labels * @return the scores for each ndarray */ @Override public double f1Score(INDArray examples, INDArray labels) { Evaluation eval = new Evaluation(); eval.eval(labels, labelProbabilities(examples)); return eval.f1(); }
/** * Evaluation will be launched after each *frequency* iteration * @param iterator * @param frequency */ public EvaluativeListener(@NonNull DataSetIterator iterator, int frequency) { this(iterator, frequency, InvocationType.ITERATION_END, new Evaluation()); }
Evaluation eval = new Evaluation(numberOfClasses); eval.eval(testData.getLabels(), output, testMetaData); //Note we are passing in the test set metadata here List<Prediction> predictionErrors = eval.getPredictionErrors(); System.out.println("\n\n+++++ Prediction Errors +++++"); for(Prediction p : predictionErrors){ System.out.println(eval.stats());
@Override public String evaluate(FederatedDataSet federatedDataSet) { //evaluate the model on the test set DataSet testData = (DataSet) federatedDataSet.getNativeDataSet(); double score = model.score(testData); Evaluation eval = new Evaluation(numClasses); INDArray output = model.output(testData.getFeatureMatrix()); eval.eval(testData.getLabels(), output); return "Score: " + score; }
StringBuilder builder = new StringBuilder().append("\n"); StringBuilder warnings = new StringBuilder(); List<Integer> classes = confusion().getClasses(); actual = resolveLabelForClass(clazz); int count = confusion().getCount(clazz, clazz2); if (count != 0) { predicted = resolveLabelForClass(clazz2); builder.append(String.format("Examples labeled as %s classified by model as %s: %d times%n", actual, predicted, count)); warningHelper(warnings, falsePositivesWarningClasses, "precision"); warningHelper(warnings, falseNegativesWarningClasses, "recall"); int nClasses = confusion().getClasses().size(); DecimalFormat df = new DecimalFormat("0.0000"); double acc = accuracy(); double precisionMacro = precision(EvaluationAveraging.Macro); double recallMacro = recall(EvaluationAveraging.Macro); double f1Macro = f1(EvaluationAveraging.Macro); builder.append("\n==========================Scores========================================"); builder.append("\n # of classes: ").append(nClasses); builder.append("\n Accuracy: ").append(format(df, acc)); if (topN > 1) { double topNAcc = topNAccuracy(); builder.append("\n Top ").append(topN).append(" Accuracy: ").append(format(df, topNAcc));
/** * Collects statistics on the real outcomes vs the * guesses. This is for logistic outcome matrices. * <p> * Note that an IllegalArgumentException is thrown if the two passed in * matrices aren't the same length. * * @param realOutcomes the real outcomes (labels - usually binary) * @param guesses the guesses/prediction (usually a probability vector) */ public void eval(INDArray realOutcomes, INDArray guesses) { eval(realOutcomes, guesses, (List<Serializable>) null); }
/** * Top N accuracy of the predictions so far. For top N = 1 (default), equivalent to {@link #accuracy()} * @return Top N accuracy */ public double topNAccuracy() { if (topN <= 1) return accuracy(); if (topNTotalCount == 0) return 0.0; return topNCorrectCount / (double) topNTotalCount; }
/** * Calculate the (macro) average F1 score across all classes * * TP: true positive * FP: False Positive * FN: False Negative * F1 score: 2 * TP / (2TP + FP + FN) * * @return the f1 score or harmonic mean of precision and recall based on current guesses */ public double f1() { return f1(EvaluationAveraging.Macro); }
Evaluation evaluation = new Evaluation(TinyImageNetDataSetIterator.getLabels(false), 5); //Set up for top 5 accuracy evaluation = (Evaluation) sparkNet.doEvaluation(pathsTest, loader, evaluation)[0]; log.info("Evaluation statistics: {}", evaluation.stats()); SparkUtils.writeStringToFile(evalPath, evaluation.stats(), sc);
log.info(eval.stats());
/** * Returns the f1 score for the given examples. * Think of this to be like a percentage right. * The higher the number the more it got right. * This is on a scale from 0 to 1. * * @param examples te the examples to classify (one example in each row) * @param labels the true labels * @return the scores for each ndarray */ @Override public double f1Score(INDArray examples, INDArray labels) { Evaluation eval = new Evaluation(); eval.eval(labels, labelProbabilities(examples)); return eval.f1(); }
/** * Evaluation will be launched after each *frequency* iteration * @param iterator * @param frequency */ public EvaluativeListener(@NonNull MultiDataSetIterator iterator, int frequency) { this(iterator, frequency, InvocationType.ITERATION_END, new Evaluation()); }
/** * Evaluate the output * using the given true labels, * the input to the multi layer network * and the multi layer network to * use for evaluation * @param trueLabels the labels to ise * @param input the input to the network to use * for evaluation * @param network the network to use for output */ public void eval(INDArray trueLabels, INDArray input, ComputationGraph network) { eval(trueLabels, network.output(false, input)[0]); }
@Override public double score(ComputationGraph graph, MultiDataSetIterator iterator) { Evaluation e = graph.evaluate(iterator); return e.accuracy(); }
@Override public double score(ComputationGraph graph, DataSetIterator iterator) { Evaluation e = graph.evaluate(iterator); return e.f1(); }