# SPDX-FileCopyrightText: 2014-2020 Martin Roelfs
#
# SPDX-License-Identifier: MIT
from collections import OrderedDict
from collections.abc import Sequence
import sympy
import numpy as np
from symfit.core.argument import Variable
from .support import key2str
from .minimizers import (
BFGS, SLSQP, LBFGSB, BaseMinimizer, GradientMinimizer, HessianMinimizer,
ConstrainedMinimizer, MINPACK, ChainedMinimizer, BasinHopping
)
from .objectives import (
LeastSquares, BaseObjective, MinimizeModel, VectorLeastSquares,
LogLikelihood, HessianObjectiveJacApprox
)
from .models import BaseModel, Model, BaseNumericalModel, CallableModel
import inspect
[docs]class TakesData(object):
"""
An base class for everything that takes data. Most importantly, it takes care
of linking the provided data to variables. The allowed variables are extracted
from the model.
"""
[docs] def __init__(self, model, *ordered_data, absolute_sigma=None, **named_data):
"""
:param model: (dict of) sympy expression or ``Model`` object.
:param bool absolute_sigma: True by default. If the sigma is only used
for relative weights in your problem, you could consider setting it to
False, but if your sigma are measurement errors, keep it at True.
Note that curve_fit has this set to False by default, which is wrong in
experimental science.
:param ordered_data: data for dependent, independent and sigma variables. Assigned in
the following order: independent vars are assigned first, then dependent
vars, then sigma's in dependent vars. Within each group they are assigned in
alphabetical order.
:param named_data: assign dependent, independent and sigma variables data by name.
Standard deviation can be provided to any variable. They have to be prefixed
with sigma\_. For example, let x be a Variable. Then sigma_x will give the
stdev in x.
"""
if isinstance(model, BaseModel):
self.model = model
else:
self.model = Model(model)
# Handle ordered_data and named_data according to the allowed names.
signature = self._make_signature()
try:
bound_arguments = signature.bind(*ordered_data, **named_data)
except TypeError as err:
for var in self.model.vars:
if var.name.startswith(Variable._argument_name):
raise type(err)(str(err) + '. Some of your Variable\'s are unnamed. That might be the cause of this Error: make sure you use e.g. x = Variable(\'x\')')
elif isinstance(var, sympy.Derivative):
# Include a very strong warning with this error.
raise RuntimeWarning(
'The model contains derivatives in its definition. '
'Are you sure you don\'t mean to use `symfit.ODEModel`?'
)
else:
raise err
# Include default values in bound_argument object
for param in signature.parameters.values():
if param.name not in bound_arguments.arguments:
bound_arguments.arguments[param.name] = param.default
original_data = bound_arguments.arguments # ordereddict of the data
self.data = original_data.copy()
for var in self.model.vars:
# Identify data by their Variable, not their variable names.
# But anything that is not a part of model should not be thrown away
if var.name in self.data:
self.data[var] = self.data.pop(var.name)
# Change the type to array if no array operations are supported.
# We don't want to break duck-typing, hence the try-except.
for var, dataset in self.data.items():
try:
dataset**2
except TypeError:
if dataset is not None:
self.data[var] = np.array(dataset)
self.sigmas_provided = any(value is not None for value in self.sigma_data.values())
# Replace sigmas that are constant by an array of that constant
for var, sigma in zip(self.dependent_data, self.sigma_data):
try:
iter(self.data[sigma])
except TypeError: # not iterable
if self.data[var] is not None and self.data[sigma] is None:
self.data[sigma] = np.ones(self.data[var].shape)
elif self.data[var] is not None:
self.data[sigma] *= np.ones(self.data[var].shape)
# If user gives a preference, use that. Otherwise, use True if at least one sigma is
# given, False if no sigma is given.
if absolute_sigma is not None:
self.absolute_sigma = absolute_sigma
else:
for sigma in self.sigma_data:
# Check if the user provided sigmas in the original data.
# If so, interpret sigmas as measurement errors
if original_data[sigma.name] is not None:
self.absolute_sigma = True
break
else:
self.absolute_sigma = False
def _make_signature(self):
"""
Make a :class:`inspect.Signature` object corresponding to
``self.model``.
:return: :class:`inspect.Signature` object corresponding to
``self.model``.
"""
parameters = self._make_parameters(self.model)
parameters = sorted(parameters, key=lambda p: p.default is None)
return inspect.Signature(parameters=parameters)
@staticmethod
def _make_parameters(model, none_allowed=None):
"""
Based on a model, return the inspect.Parameter objects
needed to satisfy all the variables of this model.
:param model: instance of model
:param none_allowed: If provided, this has to be a sequence of
:class:`symfit.core.argument.Variable` whose values are set to
``None`` by default. If not provided, this will be set to sigma
variables only.
:return: list of :class:`inspect.Parameter` corresponding to all the
external variables of the model.
"""
if none_allowed is None:
none_allowed = model.sigmas.values()
parameters = [
inspect.Parameter(
var.name,
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
default=None if var in none_allowed else inspect.Parameter.empty
)
for var in model.vars
]
return parameters
@property
def dependent_data(self):
"""
Read-only Property
:return: Data belonging to each dependent variable as a dict with
variable names as key, data as value.
:rtype: collections.OrderedDict
"""
return OrderedDict((var, self.data[var])
for var in self.model.dependent_vars if var in self.data)
@property
def independent_data(self):
"""
Read-only Property
:return: Data belonging to each independent variable as a dict with
variable names as key, data as value.
:rtype: collections.OrderedDict
"""
return OrderedDict((var, self.data[var]) for var in self.model.independent_vars)
@property
def sigma_data(self):
"""
Read-only Property
:return: Data belonging to each sigma variable as a dict with
variable names as key, data as value.
:rtype: collections.OrderedDict
"""
sigmas = self.model.sigmas
return OrderedDict((sigmas[var], self.data[sigmas[var]])
for var in self.model.dependent_vars if sigmas[var] in self.data)
@property
def data_shapes(self):
"""
Returns the shape of the data. In most cases this will be the same for
all variables of the same type, if not this raises an Exception.
Ignores variables which are set to None by design so we know that those
None variables can be assumed to have the same shape as the other in
calculations where this is needed, such as the covariance matrix.
:return: Tuple of all independent var shapes, dependent var shapes.
"""
independent_shapes = []
for var, data in self.independent_data.items():
if data is not None:
independent_shapes.append(data.shape)
dependent_shapes = []
for var, data in self.dependent_data.items():
if data is not None:
dependent_shapes.append(data.shape)
return list(set(independent_shapes)), list(set(dependent_shapes))
@property
def initial_guesses(self):
"""
:return: Initial guesses for every parameter.
"""
return np.array([param.value for param in self.model.params])
[docs]class HasCovarianceMatrix(TakesData):
"""
Mixin class for calculating the covariance matrix for any model that has a
well-defined Jacobian :math:`J`. The covariance is then approximated as
:math:`J^T W J`, where W contains the weights of each data point.
Supports vector valued models, but is unable to estimate covariances for
those, just variances. Therefore, take the result with a grain of salt for
vector models.
"""
def _covariance_matrix(self, best_fit_params, objective):
# Helper function for self.covariance_matrix.
try:
hess = objective.eval_hessian(**key2str(best_fit_params))
except AttributeError:
# Some models do not have an eval_hessian, in which case we give up
return None
else:
if hess is None:
return hess
try:
# The squeezing to a matrix is required for MinimizeModel objectives
hess_inv = np.linalg.inv(np.atleast_2d(np.squeeze(hess)))
except np.linalg.LinAlgError:
return None
if isinstance(objective, LeastSquares):
# Calculate the covariance for a least squares method.
# https://www8.cs.umu.se/kurser/5DA001/HT07/lectures/lsq-handouts.pdf
# Residual sum of squares
rss = 2 * objective(**key2str(best_fit_params))
# Degrees of freedom
raw_dof = np.sum([np.product(shape) for shape in self.data_shapes[1]])
dof = raw_dof - len(self.model.params)
if self.absolute_sigma:
# When interpreting as measurement error, we do not rescale.
s2 = 1
else:
s2 = rss / dof
cov_mat = s2 * hess_inv
return cov_mat
else:
# The inverse hessian is the covariance matrix for Loglikelihood and
# also for objectives in general.
return hess_inv
[docs] def covariance_matrix(self, best_fit_params):
"""
Given best fit parameters, this function finds the covariance matrix.
This matrix gives the (co)variance in the parameters.
:param best_fit_params: ``dict`` of best fit parameters as given by .best_fit_params()
:return: covariance matrix.
"""
cov_matrix = self._covariance_matrix(best_fit_params,
objective=self.objective)
if cov_matrix is None:
# If the covariance matrix could not be computed we try again by
# approximating the hessian with the jacobian.
# VectorLeastSquares should be turned into a LeastSquares for
# cov matrix calculation
if self.objective.__class__ is VectorLeastSquares:
base = LeastSquares
else:
base = self.objective.__class__
class HessApproximation(base, HessianObjectiveJacApprox):
"""
Class which impersonates ``base``, but which returns zeros
for the models Hessian. This will effectively result in the
calculation of the approximate Hessian by calculating
outer(J.T, J) when calling ``base.eval_hessian``.
"""
objective = HessApproximation(self.objective.model,
self.objective.data)
cov_matrix = self._covariance_matrix(best_fit_params,
objective=objective)
return cov_matrix
[docs]class Fit(HasCovarianceMatrix):
"""
Your one stop fitting solution! Based on the nature of the input, this
object will attempt to select the right fitting type for your problem.
If you need very specific control over how the problem is solved, you can
pass it the minimizer or objective function you would like to use.
Example usage::
a, b = parameters('a, b')
x, y = variables('x, y')
model = {y: a * x + b}
# Fit will use its default settings
fit = Fit(model, x=xdata, y=ydata)
fit_result = fit.execute()
# Use Nelder-Mead instead
fit = Fit(model, x=xdata, y=ydata, minimizer=NelderMead)
fit_result = fit.execute()
# Use Nelder-Mead to get close, and BFGS to polish it off
fit = Fit(model, x=xdata, y=ydata, minimizer=[NelderMead, BFGS])
fit_result = fit.execute(minimizer_kwargs=[dict(xatol=0.1), {}])
"""
[docs] def __init__(self, model, *ordered_data, objective=None, minimizer=None,
constraints=None, absolute_sigma=None, **named_data):
"""
:param model: (dict of) sympy expression(s) or ``Model`` object.
:param constraints: iterable of ``Relation`` objects to be used as
constraints.
:param bool absolute_sigma: True by default. If the sigma is only used
for relative weights in your problem, you could consider setting it
to False, but if your sigma are measurement errors, keep it at True.
Note that curve_fit has this set to False by default, which is
wrong in experimental science.
:param objective: Have Fit use your specified objective. Can be one of
the predefined `symfit` objectives or any callable which accepts fit
parameters and returns a scalar.
:param minimizer: Have Fit use your specified
:class:`symfit.core.minimizers.BaseMinimizer`. Can be a
:class:`~collections.abc.Sequence` of :class:`symfit.core.minimizers.BaseMinimizer`.
:param ordered_data: data for dependent, independent and sigma
variables. Assigned in the following order: independent vars are
assigned first, then dependent vars, then sigma's in dependent
vars. Within each group they are assigned in alphabetical order.
:param named_data: assign dependent, independent and sigma variables
data by name.
"""
# Should be a list of Constraint objects
constraints = [] if constraints is None else constraints
# Initiate self.model as an instance of BaseModel if it isn't already
if isinstance(model, BaseModel):
self.model = model
else:
self.model = Model(model)
self.constraints = self._init_constraints(constraints=constraints,
model=self.model)
# Bind as much as possible the provided arguments.
signature = self._make_signature()
bound_arguments = signature.bind_partial(*ordered_data, **named_data)
# Select objective function to use. Has to be done before calling
# super.__init__
self.objective = self._determine_objective(
self.model, objective=objective,
minimizer=minimizer, bound_arguments=bound_arguments
)
super(Fit, self).__init__(self.model, absolute_sigma=absolute_sigma,
**bound_arguments.arguments)
# Update the data belonging to the constraints. We do this by checking
# for the presence of data with the same name as one of the independent
# variables of the constraint. If present, we start addressing them by
# their Variable instead.
for constraint in self.constraints:
for var in constraint.vars:
if var.name in self.data:
self.data[var] = self.data.pop(var.name)
# Initialise the objective with data if it's not initialised already
if not isinstance(self.objective, BaseObjective):
self.objective = self.objective(self.model, self.data)
# Select the minimizer on the basis of the provided information.
if minimizer is None:
minimizer = self._determine_minimizer()
# Initialise the minimizer
if isinstance(minimizer, Sequence):
minimizers = [self._init_minimizer(mini) for mini in minimizer]
self.minimizer = self._init_minimizer(ChainedMinimizer, minimizers=minimizers)
else:
self.minimizer = self._init_minimizer(minimizer)
def _make_signature(self):
parameters = self._make_parameters(self.model)
# Extend the signature with the variables to the constraint. Since
# constraints will be turned into MinimizeModel objectives, they only
# need independent variables to be provided.
for constraint in self.constraints:
none_allowed = constraint.dependent_vars + list(constraint.sigmas.values())
parameters.extend(
self._make_parameters(
constraint, none_allowed=none_allowed
)
)
# Make unique while preserving order, and sort by default value so
# sigma variables end last
unique_parameters = []
for par in parameters:
if par not in unique_parameters:
unique_parameters.append(par)
parameters = sorted(unique_parameters, key=lambda p: p.default is None)
return inspect.Signature(parameters=parameters)
def _determine_minimizer(self):
"""
Determine the most suitable minimizer by the presence of bounds or
constraints.
:return: a subclass of `BaseMinimizer`.
"""
if self.constraints:
return SLSQP
elif any([bound is not None for pair in self.model.bounds for bound in pair]):
# If any bound is set
return LBFGSB
else:
return BFGS
@staticmethod
def _determine_objective(model, objective, minimizer, bound_arguments):
"""
Determine the most suitable objective on the basis of the problem at
hand. This could modify ``bound_arguments`` in place accordingly if
required!
:param model: :class:`symfit.core.models.BaseModel` under consideration.
:param objective: objective provided to :class:`symfit.core.fit.Fit` by
the user, or ``None``.
:param minimizer: :class:`~symfit.core.minimizers.BaseMinimizer`
provided by the user, or ``None``
:param bound_arguments: Instance of :class:`inspect.BoundArguments`.
:return: a subclass of `BaseObjective`.
"""
if objective is None:
if minimizer is MINPACK:
# MINPACK is considered a special snowflake, as its API has to
# be considered separately and has its own non standard
# objective function.
objective = VectorLeastSquares
elif (len(model) == 1 and len(model.independent_vars) == 0 and
model.dependent_vars[0].name not in bound_arguments.arguments):
objective = MinimizeModel
else:
objective = LeastSquares
# Check if the data is compatible with the objective
if (objective is LogLikelihood or objective is MinimizeModel or
isinstance(objective, (MinimizeModel, LogLikelihood))):
# Set dependent vars and corresponding sigmas to None.
for var in model.dependent_vars + list(model.sigmas.values()):
if var.name not in bound_arguments.arguments:
bound_arguments.arguments[var.name] = None
else:
raise TypeError(
'A value was provided for `{}`, however for {} '
'fitting the dependent variable cannot have a value '
'assigned to it.'.format(var.name, objective)
)
return objective
def _init_minimizer(self, minimizer, **minimizer_options):
"""
Takes a :class:`~symfit.core.minimizers.BaseMinimizer` and instantiates
it, passing the jacobian and constraints as appropriate for the
minimizer.
:param minimizer: :class:`~symfit.core.minimizers.BaseMinimizer` to
instantiate.
:param **minimizer_options: Further options to be passed to the
minimizer on instantiation.
:returns: instance of :class:`~symfit.core.minimizers.BaseMinimizer`.
"""
if isinstance(minimizer, BaseMinimizer):
return minimizer
if issubclass(minimizer, BasinHopping):
minimizer_options['local_minimizer'] = self._init_minimizer(
self._determine_minimizer()
)
if issubclass(minimizer, GradientMinimizer):
# If an analytical version of the Jacobian exists we should use
# that, otherwise we let the minimizer estimate it itself.
# Hence the check of jacobian_model, as this is the
# py function version of the analytical jacobian.
if hasattr(self.model, 'eval_jacobian') and hasattr(self.objective, 'eval_jacobian'):
minimizer_options['jacobian'] = self.objective.eval_jacobian
if issubclass(minimizer, HessianMinimizer):
# If an analytical version of the Hessian exists we should use
# that, otherwise we let the minimizer estimate it itself.
# Hence the check of hessian_model, as this is the
# py function version of the analytical hessian.
if hasattr(self.model, 'eval_hessian') and hasattr(self.objective, 'eval_hessian'):
minimizer_options['hessian'] = self.objective.eval_hessian
if issubclass(minimizer, ConstrainedMinimizer):
# set the constraints as MinimizeModel. The dependent vars of the
# constraint are set to None since their value is irrelevant.
constraint_objectives = []
for constraint in self.constraints:
data = self.data # No copy, share state
constraint_objectives.append(MinimizeModel(constraint, data))
minimizer_options['constraints'] = constraint_objectives
return minimizer(self.objective, self.model.params, **minimizer_options)
def _init_constraints(self, constraints, model):
"""
Takes the user provided constraints and converts them to a list of
``type(model)`` objects, which are extended to also have the
parameters of ``model``.
:param constraints: iterable of :class:`~sympy.core.relational.Relation`
objects.
:return: list of :class:`~symfit.core.models.BaseModel` objects. The
exact type will depend on the type of ``model``.
"""
con_models = []
for constraint in constraints:
if hasattr(constraint, 'constraint_type'):
con_models.append(constraint)
else:
if isinstance(model, BaseNumericalModel):
# Numerical models need to be provided with a connectivity
# mapping, so we cannot use the type of model. Instead,
# use the bare minimum for an analytical model for the
# constraint. ToDo: once GradientNumericalModel etc are
# introduced, pick the corresponding analytical model for
# the constraint.
con_models.append(
CallableModel.as_constraint(constraint, model)
)
else:
con_models.append(
model.__class__.as_constraint(constraint, model)
)
return con_models
[docs] def execute(self, **minimize_options):
"""
Execute the fit.
:param minimize_options: keyword arguments to be passed to the specified
minimizer.
:return: FitResults instance
"""
minimizer_ans = self.minimizer.execute(**minimize_options)
minimizer_ans.covariance_matrix = self.covariance_matrix(
dict(zip(self.model.params, minimizer_ans._popt))
)
# Overwrite the DummyModel with the current model
minimizer_ans.model = self.model
minimizer_ans.minimizer = self.minimizer
return minimizer_ans