@Override public DoubleVector sqrt() { return this.mainVector.sqrt(); }
@Override public DoubleVector sqrt() { return vector.sqrt(); }
@Override public DoubleVector sqrt() { return vector.sqrt(); }
@Override public CostGradientTuple updateGradient(DoubleVector theta, DoubleVector gradient, double learningRate, long iteration, double cost) { if (movingAvg == null) { // initialize same types with zeros movingAvg = gradient.deepCopy().multiply(0); squaredGradient = gradient.deepCopy().multiply(0); } DoubleVector oneMinusBeta1Grad = gradient.multiply(1d - movingAvgDecay); movingAvg = movingAvg.multiply(movingAvgDecay).add(oneMinusBeta1Grad); DoubleVector oneMinusBeta2GradSquared = gradient.pow(2d).multiply( 1 - squaredDecay); squaredGradient = squaredGradient.multiply(squaredDecay).add( oneMinusBeta2GradSquared); double beta1t = FastMath.pow(movingAvgDecay, iteration); double beta2t = FastMath.pow(squaredDecay, iteration); double alphat = alpha * FastMath.sqrt(1 - beta2t) / (1 - beta1t); if (Double.isNaN(alphat) || alphat == 0.0) { alphat = EPS; } DoubleVector sqrtV = squaredGradient.sqrt().add(eps); gradient = movingAvg.multiply(alphat).divide(sqrtV); return new CostGradientTuple(cost, gradient); }
stdVector = stdVector.divide(numSamples).sqrt()