/** Returns tuple: {Gradient,Delta,Output} given preOut */ private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut) { ILossFunction lossFunction = layerConf().getLossFn(); INDArray labels2d = getLabels2d(); //INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFunction(), maskArray); INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray); Gradient gradient = new DefaultGradient(); INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY); Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); //Equivalent to: weightGradView.assign(input.transpose().mmul(delta)); delta.sum(biasGradView, 0); //biasGradView is initialized/zeroed first in sum op gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView); gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView); return new Pair<>(gradient, delta); }
/** Compute score after labels and input have been set. * @param fullNetworkL1 L1 regularization term for the entire network * @param fullNetworkL2 L2 regularization term for the entire network * @param training whether score should be calculated at train or test time (this affects things like application of * dropout, etc) * @return score (loss function) */ @Override public double computeScore(double fullNetworkL1, double fullNetworkL2, boolean training) { if (input == null || labels == null) throw new IllegalStateException("Cannot calculate score without input and labels " + layerId()); this.fullNetworkL1 = fullNetworkL1; this.fullNetworkL2 = fullNetworkL2; INDArray preOut = preOutput2d(training); ILossFunction lossFunction = layerConf().getLossFn(); //double score = lossFunction.computeScore(getLabels2d(), preOut, layerConf().getActivationFunction(), maskArray, false); double score = lossFunction.computeScore(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray, false); score += fullNetworkL1 + fullNetworkL2; score /= getInputMiniBatchSize(); this.score = score; return score; }
/**Compute the score for each example individually, after labels and input have been set. * * @param fullNetworkL1 L1 regularization term for the entire network (or, 0.0 to not include regularization) * @param fullNetworkL2 L2 regularization term for the entire network (or, 0.0 to not include regularization) * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example */ @Override public INDArray computeScoreForExamples(double fullNetworkL1, double fullNetworkL2) { if (input == null || labels == null) throw new IllegalStateException("Cannot calculate score without input and labels " + layerId()); INDArray preOut = preOutput2d(false); ILossFunction lossFunction = layerConf().getLossFn(); INDArray scoreArray = lossFunction.computeScoreArray(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray); double l1l2 = fullNetworkL1 + fullNetworkL2; if (l1l2 != 0.0) { scoreArray.addi(l1l2); } return scoreArray; }