@Override public DoubleMatrix gradient(DoubleMatrix matrix) { DoubleMatrix newInstance = newInstance(matrix); if (matrix.isSparse()) { // if we have a sparse matrix, it is more efficient to loop over the // sparse column vectors int[] columnIndices = matrix.columnIndices(); for (int col : columnIndices) { newInstance.setColumnVector(col, gradient(matrix.getColumnVector(col))); } } else { // on dense matrices we can be faster by directly looping over the items for (int i = 0; i < matrix.getRowCount(); i++) { for (int j = 0; j < matrix.getColumnCount(); j++) { newInstance.set(i, j, gradient(matrix.get(i, j))); } } } return newInstance; }
@Override public DoubleMatrix apply(DoubleMatrix matrix) { DoubleMatrix newInstance = newInstance(matrix); if (matrix.isSparse()) { // if we have a sparse matrix, it is more efficient to loop over the // sparse row vectors int[] rows = matrix.rowIndices(); for (int row : rows) { DoubleVector rowVector = matrix.getRowVector(row); if (rowVector.getLength() > 0) { DoubleVector apply = apply(rowVector); newInstance.setRowVector(row, apply); } } } else { // on dense matrices we can be faster by directly looping over the items for (int i = 0; i < matrix.getRowCount(); i++) { for (int j = 0; j < matrix.getColumnCount(); j++) { newInstance.set(i, j, apply(matrix.get(i, j))); } } } return newInstance; }
@Override public double calculateLoss(DoubleMatrix y, DoubleMatrix hypothesis) { double sum = 0d; for (int col = 0; col < y.getColumnCount(); col++) { for (int row = 0; row < y.getRowCount(); row++) { double diff = y.get(row, col) - hypothesis.get(row, col); sum += (diff * diff); } } return sum / y.getRowCount(); }
public ConditionalLikelihoodCostFunction(DoubleMatrix features, DoubleMatrix outcome) { this.features = features; this.outcome = outcome; this.m = outcome.getRowCount(); this.classes = outcome.getColumnCount() == 1 ? 2 : outcome.getColumnCount(); }
public static double calculateRegularization(DoubleMatrix[] thetas, final int m, NetworkConfiguration conf) { double regularization = 0d; // only calculate the regularization term if lambda is not 0 if (conf.lambda != 0d) { for (DoubleMatrix theta : thetas) { regularization += (theta.slice(0, theta.getRowCount(), 1, theta.getColumnCount())).pow(2).sum(); } regularization = (conf.lambda / (2.0d * m)) * regularization; } return regularization; }
static DoubleMatrix binarize(Random r, DoubleMatrix hiddenActivations) { for (int i = 0; i < hiddenActivations.getRowCount(); i++) { for (int j = 0; j < hiddenActivations.getColumnCount(); j++) { hiddenActivations.set(i, j, hiddenActivations.get(i, j) > r.nextDouble() ? 1d : 0d); } } return hiddenActivations; }
.deepCopy(); DoubleMatrix emissionProbabilityMatrix = this.emissionProbabilityMatrix .deepCopy(); DoubleVector hiddenPriorProbability = this.hiddenPriorProbability .deepCopy(); hiddenPriorProbability = alpha.getRowVector(0).multiply( beta.getRowVector(0)); final double modelLikelihood = estimateLikelihood(alpha); .iterateNonZero(); while (iterateNonZero.hasNext()) { temp += alpha.get(t, i) * emissionProbabilityMatrix.get(j, iterateNonZero.next() .getIndex()) * beta.get(t + 1, j); transitionProbabilityMatrix.set(i, j, transitionProbabilityMatrix.get(i, j) * temp / modelLikelihood); DoubleVectorElement next = iterateNonZero.next(); if (next.getIndex() == j) { temp += alpha.get(t, i) * beta.get(t, i); emissionProbabilityMatrix.set(i, j, temp / modelLikelihood); .subtract(transitionProbabilityMatrix).pow(2).sum() + this.emissionProbabilityMatrix.subtract(emissionProbabilityMatrix) .pow(2).sum() + this.getHiddenPriorProbability().subtract(hiddenPriorProbability)
unfoldParameters)[0].transpose(); positiveHiddenProbs.setColumnVector(0, DenseDoubleVector.ones(positiveHiddenProbs.getRowCount())); DoubleMatrix positiveAssociations = multiply(data, positiveHiddenProbs, true, false); negativeData.setColumnVector(0, DenseDoubleVector.ones(negativeData.getRowCount())); DoubleMatrix negativeHiddenProbs = activationFunction.apply(multiply( negativeData, theta, false, false)); negativeHiddenProbs.setColumnVector(0, DenseDoubleVector.ones(negativeHiddenProbs.getRowCount())); DoubleMatrix negativeAssociations = multiply(negativeData, negativeHiddenProbs, true, false); double j = data.subtract(negativeData).pow(2).sum(); DoubleMatrix thetaGradient = positiveAssociations.subtract( negativeAssociations).divide(data.getRowCount()); DoubleVector bias = thetaGradient.getColumnVector(0); thetaGradient = thetaGradient.subtract(thetaGradient.multiply(lambda / data.getRowCount())); thetaGradient.setColumnVector(0, bias); .multiply(-1).transpose()));
DoubleMatrix emissionProbabilityMatrix, DoubleVector hiddenPriorProbability, DoubleVector[] features) { final int numHiddenStates = beta.getColumnCount(); beta.setRowVector(features.length - 1, DenseDoubleVector.ones(numHiddenStates)); while (iterateNonZero.hasNext()) { DoubleVectorElement next = iterateNonZero.next(); sum += beta.get(t + 1, j) * transitionProbabilityMatrix.get(i, j) * emissionProbabilityMatrix.get(j, next.getIndex()); beta.set(t, i, sum);
public static void calculateGradients(DoubleMatrix[] thetas, DoubleMatrix[] thetaGradients, DoubleMatrix[] ax, DoubleMatrix[] deltaX, final int m, NetworkConfiguration conf) { // calculate the gradients of the weights for (int i = 0; i < thetaGradients.length; i++) { DoubleMatrix gradDXA = multiply(deltaX[i + 1], ax[i], true, false, conf); if (m != 1) { thetaGradients[i] = gradDXA.divide(m); } else { thetaGradients[i] = gradDXA; } if (conf.lambda != 0d) { thetaGradients[i] = thetaGradients[i].add((thetas[i] .multiply(conf.lambda / m))); // subtract the regularized bias DoubleVector regBias = thetas[i] .slice(0, thetas[i].getRowCount(), 0, 1).multiply(conf.lambda / m) .getColumnVector(0); thetaGradients[i].setColumnVector(0, regBias); } } }
DoubleMatrix emissionProbabilityMatrix, DoubleVector hiddenPriorProbability, DoubleVector[] features) { final int numHiddenStates = alpha.getColumnCount(); for (int i = 0; i < numHiddenStates; i++) { double emissionSum = 0d; while (firstFeatures.hasNext()) { emissionSum += emissionProbabilityMatrix.get(i, firstFeatures.next() .getIndex()); alpha.set(0, i, hiddenPriorProbability.get(i) * emissionSum); double sum = 0.0d; for (int j = 0; j < numHiddenStates; j++) { sum += alpha.get(t - 1, j) * transitionProbabilityMatrix.get(j, i); emissionSum += emissionProbabilityMatrix.get(i, featureIterator .next().getIndex()); alpha.set(t, i, sum * emissionSum);
DoubleVector rowVector = probabilityMatrix.getRowVector(row); + probabilityMatrix.getColumnCount() - 1); while (iterateNonZero.hasNext()) { DoubleVectorElement next = iterateNonZero.next(); double currentWordCount = next.getValue(); double logProbability = FastMath.log(currentWordCount) - normalizer; probabilityMatrix.set(row, next.getIndex(), logProbability);
@Override public DoubleMatrix apply(DoubleMatrix matrix) { DoubleMatrix dm = newInstance(matrix); for (int row = 0; row < matrix.getRowCount(); row++) { DoubleVector apply = apply(matrix.getRowVector(row)); if (apply.getLength() != 0) { dm.setRowVector(row, apply); } } return dm; }
DenseDoubleMatrix gradient = new DenseDoubleMatrix(theta.getRowCount(), theta.getColumnCount()); DoubleVector rowVector = features.getRowVector(row); double[] logProbabilities = new double[classes]; DoubleVectorElement next = iterateNonZero.next(); for (int i = 0; i < classes; i++) { logProbabilities[i] += theta.get(i, next.getIndex()); gradient.set(i, next.getIndex(), gradient.get(i, next.getIndex()) + prob); if (correctPrediction(i, outcome.getRowVector(row))) { gradient.set(i, next.getIndex(), gradient.get(i, next.getIndex()) - 1d); if (correctPrediction(i, outcome.getRowVector(row))) { cost -= Math.log(prob);
int count = (int) transitionProbabilities.get(array[i], array[i + 1]); transitionProbabilities.set(array[i], array[i + 1], ++count); final int[] rowEntries = transitionProbabilities.rowIndices(); for (int rowIndex : rowEntries) { DoubleVector rowVector = transitionProbabilities.getRowVector(rowIndex); double sum = rowVector.sum(); Iterator<DoubleVectorElement> iterateNonZero = rowVector.iterateNonZero(); double probability = FastMath.log(columnElement.getValue()) - FastMath.log(sum); transitionProbabilities.set(rowIndex, columnIndex, probability);
/** * Scales a matrix into the interval given by min and max. * * @param input the input value. * @param fromMin the lower bound of the input interval. * @param fromMax the upper bound of the input interval. * @param toMin the lower bound of the target interval. * @param toMax the upper bound of the target interval. * @return the new matrix with scaled values. */ public static DoubleMatrix minMaxScale(DoubleMatrix input, double fromMin, double fromMax, double toMin, double toMax) { DoubleMatrix newOne = new DenseDoubleMatrix(input.getRowCount(), input.getColumnCount()); double[][] array = input.toArray(); for (int row = 0; row < newOne.getRowCount(); row++) { for (int col = 0; col < newOne.getColumnCount(); col++) { newOne.set(row, col, minMaxScale(array[row][col], fromMin, fromMax, toMin, toMax)); } } return newOne; }
/** * Sets the weights in the whole matrix uniformly between -eInit and eInit * (eInit is the standard deviation) with zero mean. */ private void setWeightsUniformly(RandomDataImpl rnd, double eInit) { for (int i = 0; i < weights.getColumnCount(); i++) { for (int j = 0; j < weights.getRowCount(); j++) { weights.set(j, i, rnd.nextUniform(-eInit, eInit)); } } }
/** * Creates a new matrix with the given vector into the first column and the * other matrix to the other columns. This is usually used in machine learning * algorithms that add a bias on the zero-index column. * * @param first the new first column. * @param otherMatrix the other matrix to set on from the second column. */ public DenseDoubleMatrix(DenseDoubleVector first, DoubleMatrix otherMatrix) { this(otherMatrix.getRowCount(), otherMatrix.getColumnCount() + 1); // copy the first column System.arraycopy(first.toArray(), 0, matrix, 0, first.getDimension()); int offset = first.getDimension(); for (int col : otherMatrix.columnIndices()) { double[] clv = otherMatrix.getColumnVector(col).toArray(); System.arraycopy(clv, 0, matrix, offset, clv.length); offset += clv.length; } }
/** * Row-copies the given matrix to this sparse implementation. * * @param mat the matrix to copy. */ public SparseDoubleRowMatrix(DoubleMatrix mat) { this(mat.getRowCount(), mat.getColumnCount()); for (int i = 0; i < numColumns; i++) { setRowVector(i, mat.getRowVector(i)); } }