# numkit --- data fitting
# Copyright (c) 2010 Oliver Beckstein <orbeckst@gmail.com>
# Released under the "Modified BSD Licence" (see COPYING).
import numpy
import logging
logger = logging.getLogger("numkit.fitting")
[docs]
def Pearson_r(x,y):
"""Pearson's r (correlation coefficient).
Pearson(x,y) --> correlation coefficient
*x* and *y* are arrays of same length.
Historical note -- Naive implementation of Pearson's r ::
Ex = scipy.stats.mean(x)
Ey = scipy.stats.mean(y)
covxy = numpy.sum((x-Ex)*(y-Ey))
r = covxy/math.sqrt(numpy.sum((x-Ex)**2)*numpy.sum((y-Ey)**2))
"""
return numpy.corrcoef(x,y)[1,0]
[docs]
def linfit(x,y,dy=None):
"""Fit a straight line y = a + bx to the data in *x* and *y*.
Errors on y should be provided in dy in order to assess the
goodness of the fit and derive errors on the parameters.
linfit(x,y[,dy]) --> result_dict
Fit y = a + bx to the data in x and y by analytically minimizing
chi^2. dy holds the standard deviations of the individual y_i. If
dy is not given, they are assumed to be constant (note that in
this case Q is set to 1 and it is meaningless and chi2 is
normalised to unit standard deviation on all points!).
Returns the parameters a and b, their uncertainties sigma_a and
sigma_b, and their correlation coefficient r_ab; it also returns
the chi-squared statistic and the goodness-of-fit probability Q
(that the fit would have chi^2 this large or larger; Q < 10^-2
indicates that the model is bad --- Q is the probability that a
value of chi-square as _poor_ as the calculated statistic chi2
should occur by chance.)
:Returns: result_dict with components
intercept, sigma_intercept
a +/- sigma_a
slope, sigma_slope
b +/- sigma_b
parameter_correlation
correlation coefficient r_ab between a and b
chi_square
chi^2 test statistic
Q_fit
goodness-of-fit probability
Based on 'Numerical Recipes in C', Ch 15.2.
"""
if dy is None:
dy = []
import scipy.stats
n = len(x)
m = len(y)
if n != m:
raise ValueError("lengths of x and y must match: {0!s} != {1!s}".format(n, m))
try:
have_dy = (len(dy) > 0)
except TypeError:
have_dy = False
if not have_dy:
dy = numpy.ones((n), numpy.float64)
x = numpy.asarray(x)
y = numpy.asarray(y)
dy = numpy.asarray(dy)
s2 = dy*dy
S = numpy.add.reduce(1/s2)
Sx = numpy.add.reduce(x/s2)
Sy = numpy.add.reduce(y/s2)
Sxx = numpy.add.reduce(x*x/s2)
Sxy = numpy.add.reduce(x*y/s2)
t = (x - Sx/S)/dy
Stt = numpy.add.reduce(t*t)
b = numpy.add.reduce(t*y/dy)/Stt
a = (Sy - Sx*b)/S
sa = numpy.sqrt((1 + (Sx*Sx)/(S*Stt))/S)
sb = numpy.sqrt(1/Stt)
covab = -Sx/(S*Stt)
r = covab/(sa*sb)
chi2 = numpy.add.reduce(((y-a-b*x)/dy)**2)
if not have_dy:
# estimate error if none were provided
sigmadata = numpy.sqrt(chi2/(n-2))
sa *= sigmadata
sb *= sigmadata
Q = 1.0
else:
Q = scipy.stats.distributions.chi2.sf(chi2, n-2)
return {"intercept":a,"slope":b,
"sigma_intercept":sa,"sigma_slope":sb,
"parameter_correlation":r, "chi_square":chi2, "Q":Q}
[docs]
class FitFunc(object):
"""Fit a function f to data (x,y) using the method of least squares.
The function is fitted when the object is created, using
:func:`scipy.optimize.leastsq`. One must derive from the base class
:class:`FitFunc` and override the :meth:`FitFunc.f_factory` (including
the definition of an appropriate local :func:`fitfunc` function) and
:meth:`FitFunc.initial_values` appropriately. See the examples for a
linear fit :class:`FitLin`, a 1-parameter exponential fit :class:`FitExp`,
or a 3-parameter double exponential fit :class:`FitExp2`.
The object provides two attributes
:attr:`FitFunc.parameters`
list of parameters of the fit
:attr:`FitFunc.message`
message from :func:`scipy.optimize.leastsq`
After a successful fit, the fitted function can be applied to any data (a
1D-numpy array) with :meth:`FitFunc.fit`.
"""
def __init__(self,x,y,parameters=None):
import scipy.optimize
_x = numpy.asarray(x)
_y = numpy.asarray(y)
p0 = self._get_initial_values(parameters)
fitfunc = self.f_factory()
def errfunc(p,x,y):
return fitfunc(p,x) - y # residuals
p,msg = scipy.optimize.leastsq(errfunc,p0[:],args=(_x,_y))
try:
p[0]
self.parameters = p
except (TypeError,IndexError,):
# TypeError for int p, IndexError for numpy scalar (new scipy)
self.parameters = [p]
self.message = msg
[docs]
def f_factory(self):
"""Stub for fit function factory, which returns the fit function.
Override for derived classes.
"""
def fitfunc(p,x):
# return f(p,x); should be a numpy ufunc
raise NotImplementedError("base class must be extended for each fit function")
return fitfunc
def _get_initial_values(self, parameters=None):
p0 = numpy.asarray(self.initial_values())
if parameters is not None:
try:
p0[:] = parameters
except ValueError:
raise ValueError("Wrong number of custom initital values {0!r}, should be something like {1!r}".format(parameters, p0))
return p0
[docs]
def initial_values(self):
"""List of initital guesses for all parameters p[]"""
# return [1.0, 2.0, 0.5]
raise NotImplementedError("base class must be extended for each fit function")
[docs]
def fit(self,x):
"""Applies the fit to all *x* values"""
fitfunc = self.f_factory()
return fitfunc(self.parameters,numpy.asarray(x))
[docs]
class FitExp(FitFunc):
"""y = f(x) = exp(-p[0]*x)"""
def f_factory(self):
def fitfunc(p,x):
return numpy.exp(-p[0]*x) # exp(-B*x)
return fitfunc
def initial_values(self):
return [1.0]
def __repr__(self):
return "<FitExp "+str(self.parameters)+">"
[docs]
class FitExp2(FitFunc):
"""y = f(x) = p[0]*exp(-p[1]*x) + (1-p[0])*exp(-p[2]*x)"""
def f_factory(self):
def fitfunc(p,x):
return p[0]*numpy.exp(-p[1]*x) + (1-p[0])*numpy.exp(-p[2]*x)
return fitfunc
def initial_values(self):
return [0.5,0.1,1e-4]
def __repr__(self):
return "<FitExp2"+str(self.parameters)+">"
[docs]
class FitLin(FitFunc):
"""y = f(x) = p[0]*x + p[1]"""
def f_factory(self):
def fitfunc(p,x):
return p[0]*x + p[1]
return fitfunc
def initial_values(self):
return [1.0,0.0]
def __repr__(self):
return "<FitLin"+str(self.parameters)+">"
[docs]
class FitGauss(FitFunc):
r"""y = f(x) = p[2] * 1/sqrt(2*pi*p[1]**2) * exp(-(x-p[0])**2/(2*p[1]**2))
Fits an un-normalized gaussian (height scaled with parameter p[2]).
* p[0] == mean $\mu$
* p[1] == standard deviation $\sigma$
* p[2] == scale $a$
"""
def f_factory(self):
def fitfunc(p,x):
return p[2] * 1.0/(p[1]*numpy.sqrt(2*numpy.pi)) * numpy.exp(-(x-p[0])**2/(2*p[1]**2))
return fitfunc
def initial_values(self):
return [0.0,1.0,0.0]
def __repr__(self):
return "<FitGauss"+str(self.parameters)+">"