protected CRFLogConditionalObjectiveFunction getObjectiveFunction(int[][][][] data, int[][] labels) { return new CRFLogConditionalObjectiveFunction(data, labels, windowSize, classIndex, labelIndices, map, flags.priorType, flags.backgroundSymbol, flags.sigma, null, flags.multiThreadGrad); }
@Override public void calculateStochastic(double[] x, double [] v, int[] batch) { to2D(x, weights); setWeights(weights); double batchScale = ((double) batch.length)/((double) this.dataDimension()); docIDs.add(item); double prob = multiThreadGradient(docIDs, false); // the log prob of the sequence given the model, which is the negation of value at this point applyPrior(x, batchScale);
public void calculateStochasticGradient(double[] x, int[] batch) { if (derivative == null) { derivative = new double[domainDimension()]; to2D(x, weights); setWeights(weights); docIDs.add(item); multiThreadGradient(docIDs, true);
public double[] to1D(double[][] weights) { return to1D(weights, domainDimension()); }
/** * Computes value of function for specified value of x (scaled by xScale) * only over samples indexed by batch. * NOTE: This function does not do regularization (regularization is done by the minimizer). * * @param x - unscaled weights * @param xScale - how much to scale x by when performing calculations * @param batch - indices of which samples to compute function over * @return value of function at specified x (scaled by xScale) for samples */ @Override public double valueAt(double[] x, double xScale, int[] batch) { double prob = 0.0; // the log prob of the sequence given the model, which is the negation of value at this point // int[][] wis = getWeightIndices(); to2D(x, xScale, weights); setWeights(weights); // iterate over all the documents for (int ind : batch) { prob += valueForADoc(ind); } if (Double.isNaN(prob)) { // shouldn't be the case throw new RuntimeException("Got NaN for prob in CRFLogConditionalObjectiveFunction.calculate()"); } value = -prob; return value; }
to2D(x, weights); setWeights(weights); clear2D(E); double prob = regularGradientAndValue(); // the log prob of the sequence given the model, which is the negation of value at this point applyPrior(x, 1.0);
CRFLogConditionalObjectiveFunction(int[][][][] data, int[][] labels, int window, Index<String> classIndex, List<Index<CRFLabel>> labelIndices, int[] map, String priorType, String backgroundSymbol, double sigma, double[][][][] featureVal, int multiThreadGrad, boolean calcEmpirical) { this.window = window; this.classIndex = classIndex; this.numClasses = classIndex.size(); this.labelIndices = labelIndices; this.map = map; this.data = data; this.featureVal = featureVal; this.labels = labels; this.prior = getPriorType(priorType); this.backgroundSymbol = backgroundSymbol; this.sigma = sigma; this.multiThreadGrad = multiThreadGrad; // takes docIndex, returns Triple<prob, E, dropoutGrad> Ehat = empty2D(); E = empty2D(); weights = empty2D(); if (calcEmpirical) empiricalCounts(Ehat); int myDomainDimension = 0; for (int dim : map) { myDomainDimension += labelIndices.get(dim).size(); } domainDimension = myDomainDimension; }
public double calculateStochasticUpdate(double[] x, double xScale, int[] batch, double gScale) { to2D(x, xScale, weights); setWeights(weights); docIDs.add(item); double prob = multiThreadGradient(docIDs, true); // the log prob of the sequence given the model, which is the negation of value at this point
count++; func.setFeatureGrouping(fg); initialWeights = func.initial(); } else { try { if (func.gradientCheck()) { log.info("gradient check passed"); } else {
/** Beware: this changes the input weights array in place. */ public double[][] to2D(double[] weights1D, double wScale) { for (int i = 0; i < weights1D.length; i++) weights1D[i] = weights1D[i] * wScale; return to2D(weights1D, this.labelIndices, this.map); }
public void calculateStochasticGradientOnly(double[] x, int[] batch) { double[][] weights = to2D(x); double batchScale = ((double) batch.length)/((double) this.dataDimension()); double[][] E = empty2D();
@Override public Pair<Integer, Double> process(Pair<Integer, List<Integer>> threadIDAndDocIndices) { int tID = threadIDAndDocIndices.first(); if (tID < 0 || tID >= multiThreadGrad) throw new IllegalArgumentException("threadID must be with in range 0 <= tID < multiThreadGrad(="+multiThreadGrad+")"); List<Integer> docIDs = threadIDAndDocIndices.second(); double[][] partE; // initialized below double[][] partEhat = null; // initialized below if (multiThreadGrad == 1) { partE = E; if (calculateEmpirical) partEhat = Ehat; } else { partE = parallelE[tID]; // TODO: if we put this on the heap, this clearing will be unnecessary clear2D(partE); if (calculateEmpirical) { partEhat = parallelEhat[tID]; clear2D(partEhat); } } double probSum = 0; for (int docIndex: docIDs) { if (calculateEmpirical) probSum += expectedAndEmpiricalCountsAndValueForADoc(partE, partEhat, docIndex); else probSum += expectedCountsAndValueForADoc(partE, docIndex); } return new Pair<>(tID, probSum); }
CRFLogConditionalObjectiveFunction func = new CRFLogConditionalObjectiveFunction( data, labels, featureIndex, windowSize, classIndex, labelIndices, map, flags.backgroundSymbol, flags.sigma); func.crfType = flags.crfType; initialWeights = func.initial(); } else { try { this.weights = func.to2D(weights);
@Override public double[] initial() { return initial(rand); } public double[] initial(boolean useRandomSeed) {
parallelE = new double[multiThreadGrad][][]; for (int i=0; i<multiThreadGrad; i++) parallelE[i] = empty2D(); parallelEhat = new double[multiThreadGrad][][]; for (int i=0; i<multiThreadGrad; i++) parallelEhat[i] = empty2D(); objective += result.second(); if (multiThreadGrad > 1) { combine2DArr(E, parallelE[tID]); if (calculateEmpirical) combine2DArr(Ehat, parallelEhat[tID]);
public double[] initial(Random randGen) { double[] initial = new double[domainDimension()]; for (int i = 0; i < initial.length; i++) { initial[i] = randGen.nextDouble() + smallConst; // initial[i] = generator.nextDouble() * largeConst; // initial[i] = -1+2*(i); // initial[i] = (i == 0 ? 1 : 0); } return initial; }
CRFLogConditionalObjectiveFunction(int[][][][] data, int[][] labels, Index featureIndex, int window, Index classIndex, Index[] labelIndices, int[] map, int prior, String backgroundSymbol, double sigma) { this.featureIndex = featureIndex; this.window = window; this.classIndex = classIndex; this.numClasses = classIndex.size(); this.labelIndices = labelIndices; this.map = map; this.data = data; this.labels = labels; this.prior = prior; this.backgroundSymbol = backgroundSymbol; this.sigma = sigma; empiricalCounts(data, labels); }
private void empiricalCounts(int[][][][] data, int[][] labels) { Ehat = empty2D();
/** * Calculates both value and partial derivatives at the point x, and save them internally. */ @Override public void calculate(double[] x) { clear2D(Ehat); super.calculate(x); } }