2019-10-11 11:14:56 -07:00
|
|
|
|
/*
|
2019-10-05 07:54:26 -07:00
|
|
|
|
* @file levenberg_marquardt_solver.h
|
|
|
|
|
*
|
|
|
|
|
* Levenberg-Marquardt Algorithm, and efficient non-linear optimization solver used for regression analysis ("least squares problem").
|
|
|
|
|
* It basically combines Gauss-Newton method and gradient descent, but using an approximation for computing a Hessian matrix!
|
|
|
|
|
* The code is based on "W.Press, S.Teukolsky, W.Vetterling, B.Flannery. Numerical Recipes in Fortran 77, 1992."
|
|
|
|
|
*
|
|
|
|
|
* @date Sep 27, 2019
|
|
|
|
|
* @author andreika, (c) 2019
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include "global.h"
|
|
|
|
|
|
|
|
|
|
#include "matrix_helper.h"
|
|
|
|
|
|
2019-10-11 11:14:56 -07:00
|
|
|
|
#ifndef LM_USE_CACHE
|
|
|
|
|
#define LM_USE_CACHE
|
|
|
|
|
#endif /* LM_USE_CACHE */
|
|
|
|
|
|
|
|
|
|
// Used to get all the values for the Levenberg-Marquardt solver.
|
2019-10-05 07:54:26 -07:00
|
|
|
|
template <int numParams>
|
|
|
|
|
class LMSFunction {
|
|
|
|
|
public:
|
|
|
|
|
// Get the total number of data points
|
2019-10-11 11:14:56 -07:00
|
|
|
|
virtual int getNumPoints() const = 0;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
2019-10-23 07:45:12 -07:00
|
|
|
|
virtual void justifyParams(double_t *params) const = 0;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
|
|
|
|
/// Returns the y value of the function for the given x and vector of parameters
|
2019-10-23 07:45:12 -07:00
|
|
|
|
virtual double_t getEstimatedValueAtPoint(int i, const double_t *params) const {
|
|
|
|
|
#ifdef LM_USE_CACHE
|
|
|
|
|
return points[0][i];
|
|
|
|
|
#else
|
|
|
|
|
return calcEstimatedValuesAtPoint(i, params);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Returns the y value of the function for the given x and vector of parameters
|
|
|
|
|
virtual double_t calcEstimatedValuesAtPoint(int i, const double_t *params) const = 0;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
|
|
|
|
/// Returns the residual (error delta) of the function (return (dataPoints[i] - estimatedPoint(i)))
|
2019-10-23 07:45:12 -07:00
|
|
|
|
virtual double_t getResidual(int i, const double_t *params) const = 0;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
2019-10-11 11:14:56 -07:00
|
|
|
|
// Calculate the sum of the squares of the residuals
|
2019-10-23 07:45:12 -07:00
|
|
|
|
virtual double_t calcMerit(double_t *params) const {
|
|
|
|
|
double_t res = 0;
|
2019-10-11 11:14:56 -07:00
|
|
|
|
for (int i = 0; i < getNumPoints(); i++) {
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t r = getResidual(i, params);
|
2019-10-11 11:14:56 -07:00
|
|
|
|
res += r * r;
|
|
|
|
|
}
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-05 07:54:26 -07:00
|
|
|
|
/// Return the partial derivate of the function with respect to parameter pIndex at point [i].
|
|
|
|
|
/// Can be overridden if analytical gradient function's representation is available
|
2019-10-23 07:45:12 -07:00
|
|
|
|
virtual double_t getPartialDerivative(int i, const double_t *params, int pIndex) const {
|
2019-10-05 07:54:26 -07:00
|
|
|
|
// we need to alter parameters around the neighborhood of 'pIndex', so we make a working copy
|
2019-10-23 07:45:12 -07:00
|
|
|
|
#ifdef LM_USE_CACHE
|
|
|
|
|
int p = 1 + pIndex * 2;
|
|
|
|
|
double_t dplusResult = points[p][i];
|
|
|
|
|
double_t dminusResult = points[p + 1][i];
|
|
|
|
|
#else
|
|
|
|
|
double_t tmpParams[numParams];
|
2019-10-05 07:54:26 -07:00
|
|
|
|
for (int k = 0; k < numParams; k++)
|
|
|
|
|
tmpParams[k] = params[k];
|
|
|
|
|
|
|
|
|
|
tmpParams[pIndex] = params[pIndex] + delta;
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t dplusResult = getEstimatedValueAtPoint(i, tmpParams);
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
|
|
|
|
tmpParams[pIndex] = params[pIndex] - delta;
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t dminusResult = getEstimatedValueAtPoint(i, tmpParams);
|
|
|
|
|
#endif
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
|
|
|
|
return (dplusResult - dminusResult) / (delta * 2.0);
|
|
|
|
|
}
|
2019-10-11 11:14:56 -07:00
|
|
|
|
|
2019-10-23 07:45:12 -07:00
|
|
|
|
void calculateAllPoints(const double_t *params) {
|
2019-10-11 11:14:56 -07:00
|
|
|
|
#ifdef LM_USE_CACHE
|
2019-10-23 07:45:12 -07:00
|
|
|
|
for (int i = 0; i < numPointGroups; i++) {
|
|
|
|
|
points[i].resize(getNumPoints());
|
2019-10-11 11:14:56 -07:00
|
|
|
|
}
|
2019-10-23 07:45:12 -07:00
|
|
|
|
// calculate displaced points for partial derivatives
|
|
|
|
|
for (int pIndex = 0; pIndex < numParams; pIndex++) {
|
|
|
|
|
double_t tmpParams[numParams];
|
|
|
|
|
for (int k = 0; k < numParams; k++)
|
|
|
|
|
tmpParams[k] = params[k];
|
|
|
|
|
// 2 points for each derivative: +delta and -delta
|
|
|
|
|
for (int plusMinus = 0; plusMinus < 2; plusMinus++) {
|
|
|
|
|
tmpParams[pIndex] = params[pIndex] + (plusMinus == 0 ? delta : -delta);
|
|
|
|
|
int p = 1 + pIndex * 2 + plusMinus;
|
|
|
|
|
for (int i = 0; i < getNumPoints(); i++) {
|
|
|
|
|
points[p][i] = calcEstimatedValuesAtPoint(i, tmpParams);
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-10-11 11:14:56 -07:00
|
|
|
|
for (int i = 0; i < getNumPoints(); i++) {
|
2019-10-23 07:45:12 -07:00
|
|
|
|
points[0][i] = calcEstimatedValuesAtPoint(i, params);
|
2019-10-11 11:14:56 -07:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2019-10-23 07:45:12 -07:00
|
|
|
|
}
|
2019-10-11 11:14:56 -07:00
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
#ifdef LM_USE_CACHE
|
2019-10-23 07:45:12 -07:00
|
|
|
|
static const int numPointGroups = numParams * 2 + 1;
|
|
|
|
|
std::vector<double_t> points[numPointGroups];
|
2019-10-11 11:14:56 -07:00
|
|
|
|
#endif
|
2019-10-23 07:45:12 -07:00
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
// some magic value for the differentiation step to calculate a partial derivative
|
|
|
|
|
double_t delta = 1.0e-6;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template<int numParams>
|
|
|
|
|
class LevenbergMarquardtSolver {
|
|
|
|
|
public:
|
|
|
|
|
// ctor
|
2019-10-23 07:45:12 -07:00
|
|
|
|
LevenbergMarquardtSolver(LMSFunction<numParams> *func, double_t *parameters) {
|
2019-10-05 07:54:26 -07:00
|
|
|
|
this->func = func;
|
|
|
|
|
this->parameters = parameters;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// lambda - magic coef.
|
|
|
|
|
// maxIterations - if too many iterations (loop exit condition #1)
|
|
|
|
|
// minDelta - if the progress on iteration is too small (loop exit condition #2)
|
2019-10-23 07:45:12 -07:00
|
|
|
|
int solve(double_t lambda_ = 0.001, double_t minDelta = 1e-15, int maxIterations = 100) {
|
2019-10-05 07:54:26 -07:00
|
|
|
|
this->lambda = lambda_;
|
|
|
|
|
|
|
|
|
|
iterationCount = 0;
|
|
|
|
|
|
2019-10-23 07:45:12 -07:00
|
|
|
|
func->calculateAllPoints(parameters);
|
|
|
|
|
double_t delta = 0;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
do {
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t merit = func->calcMerit(parameters);
|
2019-10-11 11:14:56 -07:00
|
|
|
|
|
2019-10-05 07:54:26 -07:00
|
|
|
|
calcGradient();
|
|
|
|
|
calcHessian();
|
|
|
|
|
bool isSolved = calcNewParameters();
|
2019-10-23 07:45:12 -07:00
|
|
|
|
func->calculateAllPoints(newParameters);
|
|
|
|
|
double_t newMerit = func->calcMerit(newParameters);
|
2019-10-05 07:54:26 -07:00
|
|
|
|
if (!isSolved) {
|
2019-10-09 10:11:18 -07:00
|
|
|
|
return -iterationCount;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
}
|
|
|
|
|
// if we don't like the new parameters
|
|
|
|
|
if (newMerit >= merit) {
|
|
|
|
|
// decrease the step
|
|
|
|
|
lambda *= lambdaMultiplier;
|
|
|
|
|
}
|
|
|
|
|
// if we're good, accept them
|
|
|
|
|
else {
|
|
|
|
|
// update via copy
|
|
|
|
|
memcpy(parameters, newParameters, sizeof(newParameters));
|
|
|
|
|
// let's increase the step even more
|
|
|
|
|
lambda /= lambdaMultiplier;
|
|
|
|
|
}
|
|
|
|
|
// find out if we progressed enough in this iteration
|
|
|
|
|
delta = fabs(newMerit - merit);
|
|
|
|
|
#ifdef LMS_DEBUG
|
2019-10-23 07:45:12 -07:00
|
|
|
|
printf("[%d] (%Lg,%Lg,%Lg,%Lg) l=%Lg merit %Lg->%Lg, dm=%Lg\r\n", iterationCount, (long double)parameters[0], (long double)parameters[1], (long double)parameters[2], (long double)parameters[3],
|
|
|
|
|
(long double)lambda, (long double)merit, (long double)newMerit, (long double)(newMerit - merit));
|
2019-10-05 07:54:26 -07:00
|
|
|
|
#endif
|
|
|
|
|
iterationCount++;
|
|
|
|
|
} while (delta > minDelta && iterationCount < maxIterations);
|
|
|
|
|
return iterationCount;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t *getParameters() const {
|
2019-10-05 07:54:26 -07:00
|
|
|
|
return parameters;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
// Find the parameter increments by solving the Hessian x Gradient equation
|
|
|
|
|
bool calcNewParameters() {
|
|
|
|
|
// get H^-1 matrix (inverse Hessian)
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t hinv[numParams][numParams];
|
|
|
|
|
bool ret = MatrixHelper<double_t, numParams>::inverseMatrix(hinv, hessian);
|
2019-10-05 07:54:26 -07:00
|
|
|
|
if (!ret)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
for (int row = 0; row < numParams; row++) {
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t increment = 0;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
for (int col = 0; col < numParams; col++) {
|
|
|
|
|
increment += hinv[row][col] * gradient[col];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
newParameters[row] = parameters[row] + increment;
|
|
|
|
|
}
|
|
|
|
|
func->justifyParams(newParameters);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Calculate the Hessian matrix (2nd derivative) approximation
|
|
|
|
|
void calcHessian() {
|
|
|
|
|
for (int row = 0; row < numParams; row++) {
|
|
|
|
|
for (int col = 0; col < numParams; col++) {
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t res = 0;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
for (int i = 0; i < func->getNumPoints(); i++) {
|
|
|
|
|
res += func->getPartialDerivative(i, parameters, row) * func->getPartialDerivative(i, parameters, col);
|
|
|
|
|
}
|
|
|
|
|
hessian[row][col] = (row == col) ? res * (lambda + 1.0) : res;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Calculate the 1st derivatives of the residual func
|
|
|
|
|
void calcGradient() {
|
|
|
|
|
for (int row = 0; row < numParams; row++) {
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t res = 0;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
for (int i = 0; i < func->getNumPoints(); i++) {
|
|
|
|
|
res += func->getResidual(i, parameters) * func->getPartialDerivative(i, parameters, row);
|
|
|
|
|
}
|
|
|
|
|
gradient[row] = res;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
// optimization function
|
|
|
|
|
LMSFunction<numParams> *func;
|
|
|
|
|
|
|
|
|
|
// Current (accepted) parameters vector
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t *parameters; // [numParams]
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
|
|
|
|
// Incremented (next step) parameters vector
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t newParameters[numParams];
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
|
|
|
|
// Hessian matrix
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t hessian[numParams][numParams];
|
2019-10-05 07:54:26 -07:00
|
|
|
|
// Gradients vector
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t gradient[numParams];
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
|
|
|
|
// some magic number
|
2019-10-23 07:45:12 -07:00
|
|
|
|
const double_t lambdaMultiplier = 10.0;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
|
|
|
|
// coeff used to adapt the descent step (and speed)
|
2019-10-23 07:45:12 -07:00
|
|
|
|
double_t lambda;
|
2019-10-05 07:54:26 -07:00
|
|
|
|
|
|
|
|
|
// Total number of iterations
|
|
|
|
|
int iterationCount;
|
|
|
|
|
};
|