/** * Apply scaling to gradient and Hessian */ protected void applyHessianScaling() { CommonOps_DDRM.elementDiv(gradient, hessianScaling); hessian.divideRowsCols(hessianScaling); }
/** * Sets scaling to the sqrt() of the diagonal elements in the Hessian matrix */ protected void computeHessianScaling() { hessian.extractDiagonals(hessianScaling); computeHessianScaling(hessianScaling); }
/** * Adjusts the Hessian's diagonal elements value and computes the next step * * @param lambda (Input) tuning * @param gradient (Input) gradient * @param step (Output) step * @return true if solver could compute the next step */ protected boolean computeStep( double lambda, DMatrixRMaj gradient , DMatrixRMaj step ) { final double mixture = config.mixture; for (int i = 0; i < diagOrig.numRows; i++) { double v = min(config.diagonal_max, max(config.diagonal_min,diagOrig.data[i])); diagStep.data[i] = v + lambda*(mixture + (1.0-mixture)*v); } hessian.setDiagonals( diagStep ); if( !hessian.initializeSolver()) { return false; } // In the book formulation it solves something like (B + lambda*I)*p = -g // but we don't want to modify g, so we apply the negative to the step instead if( hessian.solve(gradient,step) ) { CommonOps_DDRM.scale(-1, step); return true; } else { return false; } }
protected boolean solveGaussNewtonPoint(DMatrixRMaj pointGN ) { if( !owner.hessian.initializeSolver() ) { return false; } // using direction instead of gradient "should" have better scaling if( !owner.hessian.solve(direction, pointGN) ) { return false; } CommonOps_DDRM.scale(owner.gradientNorm,pointGN); return true; }
@Test public void solve() { DMatrixRMaj M = RandomMatrices_DDRM.symmetricPosDef(10,rand); DMatrixRMaj v = RandomMatrices_DDRM.rectangle(10,1,rand); DMatrixRMaj origv = v.copy(); DMatrixRMaj expected = v.createLike(); CommonOps_DDRM.solve(M,v,expected); DMatrixRMaj found = v.createLike(); alg.init(M.numCols); setHessian(alg,M); assertTrue(alg.initializeSolver()); assertTrue(alg.solve(v,found)); // make sure it didn't modify the input assertTrue(MatrixFeatures_DDRM.isIdentical(origv,origv,UtilEjml.TEST_F64)); // check the solution assertTrue(MatrixFeatures_DDRM.isIdentical(expected,found,UtilEjml.TEST_F64)); // run it again, if nothing was modified it should produce the same solution assertTrue(alg.initializeSolver()); assertTrue(alg.solve(v,found)); assertTrue(MatrixFeatures_DDRM.isIdentical(expected,found,UtilEjml.TEST_F64)); }
protected double solveCauchyStepLength() { double gBg = hessian.innerVectorHessian(gradient); return gradientNorm*gradientNorm/gBg; }
@Test public void setDiagonals() { DMatrixRMaj M = RandomMatrices_DDRM.rectangle(6,6,rand); setHessian(alg,M); DMatrixRMaj v = RandomMatrices_DDRM.rectangle(6,1,rand); alg.setDiagonals(v); DMatrixRMaj found = RandomMatrices_DDRM.rectangle(6,1,rand); alg.extractDiagonals(found); for (int i = 0; i < M.numRows; i++) { assertEquals(found.get(i),v.get(i), UtilEjml.TEST_F64); } }
@Test public void divideRowsCols() { DMatrixRMaj M = RandomMatrices_DDRM.symmetricPosDef(10,rand); DMatrixRMaj scale = RandomMatrices_DDRM.rectangle(10,1,0,1,rand); DMatrixRMaj expected = M.copy(); CommonOps_DDRM.divideRows(scale.data,expected); CommonOps_DDRM.divideCols(expected,scale.data); setHessian(alg,M); alg.divideRowsCols(scale); // Not a great unit test since it doesn't check the off diagonal elements DMatrixRMaj found = RandomMatrices_DDRM.rectangle(10,1,rand); alg.extractDiagonals(found); for (int i = 0; i < M.numRows; i++) { assertEquals(expected.get(i,i),found.get(i), UtilEjml.TEST_F64); } }
public void initialize(double initial[] , int numberOfParameters ) { x.reshape(numberOfParameters,1); x_next.reshape(numberOfParameters,1); p.reshape(numberOfParameters,1); gradient.reshape(numberOfParameters,1); // initialize scaling to 1, which is no scaling hessianScaling.reshape(numberOfParameters,1); Arrays.fill(hessianScaling.data,0,numberOfParameters,1); hessian.init(numberOfParameters); System.arraycopy(initial,0,x.data,0,numberOfParameters); sameStateAsCost = true; totalFullSteps = 0; totalSelectSteps = 0; }
protected double solveCauchyStepLength() { double gBg = hessian.innerVectorHessian(gradient); return gradientNorm*gradientNorm/gBg; }
protected boolean solveGaussNewtonPoint(DMatrixRMaj pointGN ) { if( !owner.hessian.initializeSolver() ) { return false; } // using direction instead of gradient "should" have better scaling if( !owner.hessian.solve(direction, pointGN) ) { return false; } CommonOps_DDRM.scale(owner.gradientNorm,pointGN); return true; }
public void initialize(double initial[] , int numberOfParameters ) { x.reshape(numberOfParameters,1); x_next.reshape(numberOfParameters,1); p.reshape(numberOfParameters,1); gradient.reshape(numberOfParameters,1); // initialize scaling to 1, which is no scaling hessianScaling.reshape(numberOfParameters,1); Arrays.fill(hessianScaling.data,0,numberOfParameters,1); hessian.init(numberOfParameters); System.arraycopy(initial,0,x.data,0,numberOfParameters); sameStateAsCost = true; totalFullSteps = 0; totalSelectSteps = 0; }
/** * Adjusts the Hessian's diagonal elements value and computes the next step * * @param lambda (Input) tuning * @param gradient (Input) gradient * @param step (Output) step * @return true if solver could compute the next step */ protected boolean computeStep( double lambda, DMatrixRMaj gradient , DMatrixRMaj step ) { final double mixture = config.mixture; for (int i = 0; i < diagOrig.numRows; i++) { double v = min(config.diagonal_max, max(config.diagonal_min,diagOrig.data[i])); diagStep.data[i] = v + lambda*(mixture + (1.0-mixture)*v); } hessian.setDiagonals( diagStep ); if( !hessian.initializeSolver()) { return false; } // In the book formulation it solves something like (B + lambda*I)*p = -g // but we don't want to modify g, so we apply the negative to the step instead if( hessian.solve(gradient,step) ) { CommonOps_DDRM.scale(-1, step); return true; } else { return false; } }
/** * Computes predicted reduction for step 'p' * * @param p Change in state or the step * @return predicted reduction in quadratic model */ public double computePredictedReduction( DMatrixRMaj p ) { return -CommonOps_DDRM.dot(gradient,p) - 0.5*hessian.innerVectorHessian(p); }
/** * Sets scaling to the sqrt() of the diagonal elements in the Hessian matrix */ protected void computeHessianScaling() { hessian.extractDiagonals(hessianScaling); computeHessianScaling(hessianScaling); }
@Test public void initialize() { alg.init(8); HessianBFGS_DDRM a = (HessianBFGS_DDRM)alg; assertTrue(MatrixFeatures_DDRM.isIdentity(a.hessian,UtilEjml.TEST_F64)); assertTrue(MatrixFeatures_DDRM.isIdentity(a.hessianInverse,UtilEjml.TEST_F64)); }
/** * Apply scaling to gradient and Hessian */ protected void applyHessianScaling() { CommonOps_DDRM.elementDiv(gradient, hessianScaling); hessian.divideRowsCols(hessianScaling); }
/** * Computes predicted reduction for step 'p' * * @param p Change in state or the step * @return predicted reduction in quadratic model */ public double computePredictedReduction( DMatrixRMaj p ) { return -CommonOps_DDRM.dot(gradient,p) - 0.5*hessian.innerVectorHessian(p); }
/** * Computes derived * @return true if it has converged or false if it has not */ protected boolean updateDerivates() { functionGradientHessian(x,true, gradient, hessian); if( config.hessianScaling) { computeHessianScaling(); applyHessianScaling(); } hessian.extractDiagonals(diagOrig); if( checkConvergenceGTest(gradient) ) { if( verbose != null ) { verbose.println("Converged g-test"); } return true; } mode = Mode.DETERMINE_STEP; return false; }
@Override public void initializeUpdate() { // use the direction instead of gradient for reduced overflow/underflow issues CommonOps_DDRM.divide(owner.gradient,owner.gradientNorm,direction); gBg = owner.hessian.innerVectorHessian(direction); if(UtilEjml.isUncountable(gBg)) throw new OptimizationException("Uncountable. gBg="+gBg); }