/* * @file levenberg_marquardt_solver.h * * Levenberg-Marquardt Algorithm, and efficient non-linear optimization solver used for regression analysis ("least squares problem"). * It basically combines Gauss-Newton method and gradient descent, but using an approximation for computing a Hessian matrix! * The code is based on "W.Press, S.Teukolsky, W.Vetterling, B.Flannery. Numerical Recipes in Fortran 77, 1992." * * @date Sep 27, 2019 * @author andreika, (c) 2019 */ #pragma once #include "global.h" #include "matrix_helper.h" #ifndef LM_USE_CACHE #define LM_USE_CACHE #endif /* LM_USE_CACHE */ // Used to get all the values for the Levenberg-Marquardt solver. template class LMSFunction { public: // Get the total number of data points virtual int getNumPoints() const = 0; virtual void justifyParams(double_t *params) const = 0; /// Returns the y value of the function for the given x and vector of parameters virtual double_t getEstimatedValueAtPoint(int i, const double_t *params) const { #ifdef LM_USE_CACHE return points[0][i]; #else return calcEstimatedValuesAtPoint(i, params); #endif } /// Returns the y value of the function for the given x and vector of parameters virtual double_t calcEstimatedValuesAtPoint(int i, const double_t *params) const = 0; /// Returns the residual (error delta) of the function (return (dataPoints[i] - estimatedPoint(i))) virtual double_t getResidual(int i, const double_t *params) const = 0; // Calculate the sum of the squares of the residuals virtual double_t calcMerit(double_t *params) const { double_t res = 0; for (int i = 0; i < getNumPoints(); i++) { double_t r = getResidual(i, params); res += r * r; } return res; } /// Return the partial derivate of the function with respect to parameter pIndex at point [i]. /// Can be overridden if analytical gradient function's representation is available virtual double_t getPartialDerivative(int i, const double_t *params, int pIndex) const { // we need to alter parameters around the neighborhood of 'pIndex', so we make a working copy #ifdef LM_USE_CACHE int p = 1 + pIndex * 2; double_t dplusResult = points[p][i]; double_t dminusResult = points[p + 1][i]; #else double_t tmpParams[numParams]; for (int k = 0; k < numParams; k++) tmpParams[k] = params[k]; tmpParams[pIndex] = params[pIndex] + delta; double_t dplusResult = getEstimatedValueAtPoint(i, tmpParams); tmpParams[pIndex] = params[pIndex] - delta; double_t dminusResult = getEstimatedValueAtPoint(i, tmpParams); #endif return (dplusResult - dminusResult) / (delta * 2.0); } void calculateAllPoints(const double_t *params) { #ifdef LM_USE_CACHE for (int i = 0; i < numPointGroups; i++) { points[i].resize(getNumPoints()); } // calculate displaced points for partial derivatives for (int pIndex = 0; pIndex < numParams; pIndex++) { double_t tmpParams[numParams]; for (int k = 0; k < numParams; k++) tmpParams[k] = params[k]; // 2 points for each derivative: +delta and -delta for (int plusMinus = 0; plusMinus < 2; plusMinus++) { tmpParams[pIndex] = params[pIndex] + (plusMinus == 0 ? delta : -delta); int p = 1 + pIndex * 2 + plusMinus; for (int i = 0; i < getNumPoints(); i++) { points[p][i] = calcEstimatedValuesAtPoint(i, tmpParams); } } for (int i = 0; i < getNumPoints(); i++) { points[0][i] = calcEstimatedValuesAtPoint(i, params); } } #endif } private: #ifdef LM_USE_CACHE static const int numPointGroups = numParams * 2 + 1; std::vector points[numPointGroups]; #endif protected: // some magic value for the differentiation step to calculate a partial derivative double_t delta = 1.0e-6; }; template class LevenbergMarquardtSolver { public: // ctor LevenbergMarquardtSolver(LMSFunction *func, double_t *parameters) { this->func = func; this->parameters = parameters; } // lambda - magic coef. // maxIterations - if too many iterations (loop exit condition #1) // minDelta - if the progress on iteration is too small (loop exit condition #2) int solve(double_t lambda_ = 0.001, double_t minDelta = 1e-15, int maxIterations = 100) { this->lambda = lambda_; iterationCount = 0; func->calculateAllPoints(parameters); double_t delta = 0; do { double_t merit = func->calcMerit(parameters); calcGradient(); calcHessian(); bool isSolved = calcNewParameters(); func->calculateAllPoints(newParameters); double_t newMerit = func->calcMerit(newParameters); if (!isSolved) { return -iterationCount; } // if we don't like the new parameters if (newMerit >= merit) { // decrease the step lambda *= lambdaMultiplier; } // if we're good, accept them else { // update via copy memcpy(parameters, newParameters, sizeof(newParameters)); // let's increase the step even more lambda /= lambdaMultiplier; } // find out if we progressed enough in this iteration delta = fabs(newMerit - merit); #ifdef LMS_DEBUG printf("[%d] (%Lg,%Lg,%Lg,%Lg) l=%Lg merit %Lg->%Lg, dm=%Lg\r\n", iterationCount, (long double)parameters[0], (long double)parameters[1], (long double)parameters[2], (long double)parameters[3], (long double)lambda, (long double)merit, (long double)newMerit, (long double)(newMerit - merit)); #endif iterationCount++; } while (delta > minDelta && iterationCount < maxIterations); return iterationCount; } double_t *getParameters() const { return parameters; } protected: // Find the parameter increments by solving the Hessian x Gradient equation bool calcNewParameters() { // get H^-1 matrix (inverse Hessian) double_t hinv[numParams][numParams]; bool ret = MatrixHelper::inverseMatrix(hinv, hessian); if (!ret) return false; for (int row = 0; row < numParams; row++) { double_t increment = 0; for (int col = 0; col < numParams; col++) { increment += hinv[row][col] * gradient[col]; } newParameters[row] = parameters[row] + increment; } func->justifyParams(newParameters); return true; } /// Calculate the Hessian matrix (2nd derivative) approximation void calcHessian() { for (int row = 0; row < numParams; row++) { for (int col = 0; col < numParams; col++) { double_t res = 0; for (int i = 0; i < func->getNumPoints(); i++) { res += func->getPartialDerivative(i, parameters, row) * func->getPartialDerivative(i, parameters, col); } hessian[row][col] = (row == col) ? res * (lambda + 1.0) : res; } } } // Calculate the 1st derivatives of the residual func void calcGradient() { for (int row = 0; row < numParams; row++) { double_t res = 0; for (int i = 0; i < func->getNumPoints(); i++) { res += func->getResidual(i, parameters) * func->getPartialDerivative(i, parameters, row); } gradient[row] = res; } } private: // optimization function LMSFunction *func; // Current (accepted) parameters vector double_t *parameters; // [numParams] // Incremented (next step) parameters vector double_t newParameters[numParams]; // Hessian matrix double_t hessian[numParams][numParams]; // Gradients vector double_t gradient[numParams]; // some magic number const double_t lambdaMultiplier = 10.0; // coeff used to adapt the descent step (and speed) double_t lambda; // Total number of iterations int iterationCount; };