@Override public DoubleVector multiply(double scalar) { return vector.multiply(scalar); }
@Override public DoubleVector multiply(DoubleVector vector) { return vector.multiply(vector); }
@Override public DoubleVector multiply(double scalar) { return this.mainVector.multiply(scalar); }
@Override public DoubleVector multiply(DoubleVector vector) { return vector.multiply(vector); }
@Override public DoubleVector multiply(double scalar) { return vector.multiply(scalar); }
@Override public DoubleVector multiply(DoubleVector vector) { return this.mainVector.multiply(vector); }
@Override public DoubleVector calculateGradient(DoubleVector feature, DoubleVector y, DoubleVector hypothesis) { return feature.multiply(hypothesis.subtract(y).get(0)); } }
@Override public double calculateLoss(DoubleVector y, DoubleVector hypothesis) { DoubleVector negativeOutcome = y.subtractFrom(1.0d); DoubleVector inverseOutcome = y.multiply(-1d); DoubleVector negativeHypo = hypothesis.subtractFrom(1d); DoubleVector negativeLogHypo = MathUtils.logVector(negativeHypo); DoubleVector positiveLogHypo = MathUtils.logVector(hypothesis); DoubleVector negativePenalty = negativeOutcome.multiply(negativeLogHypo); DoubleVector positivePenalty = inverseOutcome.multiply(positiveLogHypo); return (positivePenalty.subtract(negativePenalty)).sum(); }
@Override public double calculateLoss(DoubleVector y, DoubleVector hypothesis) { DoubleVector v = y.multiply(hypothesis); return FastMath.max(0, 1 - v.get(0)); }
public void computeMomentum() { // compute momentum if (lastTheta != null && momentum != 0d) { // we add momentum as the parameter "m" multiplied by the // difference of both theta vectors theta = theta.add((lastTheta.subtract(theta)).multiply(momentum)); } }
@Override public DoubleVector calculateGradient(DoubleVector feature, DoubleVector y, DoubleVector hypothesis) { return feature.multiply(hypothesis.subtract(y).get(0)); }
@Override public DoubleVector calculateGradient(DoubleVector feature, DoubleVector y, DoubleVector hypothesis) { return feature.multiply(hypothesis.subtract(y).get(0)); }
@Override public DoubleVector calculateGradient(DoubleVector feature, DoubleVector y, DoubleVector hypothesis) { return feature.multiply(hypothesis.subtract(y).get(0)); }
@Override public double calculateLoss(DoubleVector y, DoubleVector hypothesis) { return y.multiply(MathUtils.logVector(hypothesis)).sum(); }
@Override public CostGradientTuple updateGradient(DoubleVector theta, DoubleVector gradient, double learningRate, long iteration, double cost) { if (movingAvg == null) { // initialize same types with zeros movingAvg = gradient.deepCopy().multiply(0); squaredGradient = gradient.deepCopy().multiply(0); } DoubleVector oneMinusBeta1Grad = gradient.multiply(1d - movingAvgDecay); movingAvg = movingAvg.multiply(movingAvgDecay).add(oneMinusBeta1Grad); DoubleVector oneMinusBeta2GradSquared = gradient.pow(2d).multiply( 1 - squaredDecay); squaredGradient = squaredGradient.multiply(squaredDecay).add( oneMinusBeta2GradSquared); double beta1t = FastMath.pow(movingAvgDecay, iteration); double beta2t = FastMath.pow(squaredDecay, iteration); double alphat = alpha * FastMath.sqrt(1 - beta2t) / (1 - beta1t); if (Double.isNaN(alphat) || alphat == 0.0) { alphat = EPS; } DoubleVector sqrtV = squaredGradient.sqrt().add(eps); gradient = movingAvg.multiply(alphat).divide(sqrtV); return new CostGradientTuple(cost, gradient); }
@Override public CostGradientTuple evaluateCost(DoubleVector input) { CostGradientTuple evaluateCost = minableCostFunction.evaluateCost(input); return new CostGradientTuple(-evaluateCost.getCost(), evaluateCost .getGradient().multiply(-1)); }
/** * Simplistic gradient descent without regularization. */ @Override public CostWeightTuple computeNewWeights(DoubleVector theta, DoubleVector gradient, double learningRate, long iteration, double cost) { CostGradientTuple gradientTuple = updateGradient(theta, gradient, learningRate, iteration, cost); DoubleVector dampened = gradientTuple.getGradient().multiply(learningRate); DoubleVector newWeights = theta.subtract(dampened); return new CostWeightTuple(gradientTuple.getCost(), newWeights); }
@Override public CostGradientTuple updateGradient(DoubleVector weights, DoubleVector gradient, double learningRate, long iteration, double cost) { if (l2 != 0d) { DoubleVector powered = weights.pow(2d); DoubleVector regGrad = weights.multiply(l2); // assume bias is on the first dimension powered.set(0, 0); regGrad.set(0, 0); cost += l2 * powered.sum() / 2d; gradient = gradient.add(regGrad); } return new CostGradientTuple(cost, gradient); } }
@Override public CostGradientTuple evaluateCost(DoubleVector theta) { DoubleVector activation = SIGMOID.get().apply(x.multiplyVectorRow(theta)); DenseDoubleMatrix hypo = new DenseDoubleMatrix(Arrays.asList(activation)); double error = ERROR_FUNCTION.calculateLoss(y, hypo); DoubleMatrix loss = hypo.subtract(y); double j = error / m; DoubleVector gradient = xTransposed.multiplyVectorRow(loss.getRowVector(0)) .divide(m); if (lambda != 0d) { DoubleVector reg = theta.multiply(lambda / m); // don't regularize the bias reg.set(0, 0d); gradient = gradient.add(reg); j += lambda * theta.pow(2).sum() / m; } return new CostGradientTuple(j, gradient); } }