protected double regularGradientAndValue() { int totalLen = data.length; List<Integer> docIDs = new ArrayList<>(totalLen); for (int m=0; m < totalLen; m++) docIDs.add(m); return multiThreadGradient(docIDs, false); }
docIDs.add(item); double prob = multiThreadGradient(docIDs, true); // the log prob of the sequence given the model, which is the negation of value at this point
docIDs.add(item); multiThreadGradient(docIDs, true);
docIDs.add(item); double prob = multiThreadGradient(docIDs, false); // the log prob of the sequence given the model, which is the negation of value at this point
protected double regularGradientAndValue() { int totalLen = data.length; List<Integer> docIDs = new ArrayList<Integer>(totalLen); for (int m=0; m < totalLen; m++) docIDs.add(m); return multiThreadGradient(docIDs, false); }
protected double regularGradientAndValue() { int totalLen = data.length; List<Integer> docIDs = new ArrayList<>(totalLen); for (int m=0; m < totalLen; m++) docIDs.add(m); return multiThreadGradient(docIDs, false); }
prob = multiThreadGradient(docIDs, true);
docIDs.add(item); double prob = multiThreadGradient(docIDs, true); // the log prob of the sequence given the model, which is the negation of value at this point
prob = multiThreadGradient(docIDs, false);
docIDs.add(item); multiThreadGradient(docIDs, true);
/** * Performs stochastic gradient update based * on samples indexed by batch, but does not apply regularization. * * @param x - unscaled weights * @param batch - indices of which samples to compute function over */ @Override public void calculateStochasticGradient(double[] x, int[] batch) { if (derivative == null) { derivative = new double[domainDimension()]; } // int[][] wis = getWeightIndices(); // was: double[][] weights = to2D(x, 1.0); // but 1.0 should be the same as omitting 2nd parameter.... to2D(x, weights); setWeights(weights); // iterate over all the documents List<Integer> docIDs = new ArrayList<Integer>(batch.length); for (int m=0; m < batch.length; m++) docIDs.add(batch[m]); multiThreadGradient(docIDs, true); int index = 0; for (int i = 0; i < E.length; i++) { for (int j = 0; j < E[i].length; j++) { // real gradient should be empirical-expected; // but since we minimize -L(\theta), the gradient is -(empirical-expected) derivative[index++] = (E[i][j]-Ehat[i][j]); } } }
docIDs.add(item); double prob = multiThreadGradient(docIDs, false); // the log prob of the sequence given the model, which is the negation of value at this point