29 #ifndef CDPL_MATH_MLRMODEL_HPP
30 #define CDPL_MATH_MLRMODEL_HPP
78 template <
typename T =
double>
92 chiSquare(0), Q(0), r(0), stdDeviation(0) {}
121 template <
typename V>
138 template <
typename V>
185 template <
typename V>
199 template <
typename V>
306 template <
typename T>
309 if (num_points == xMatrix.getSize1() && num_vars == xMatrix.getSize2())
312 xMatrix.resize(num_points, num_vars,
true,
ValueType());
316 template <
typename T>
320 xMatrix.resize(0, 0,
false);
323 template <
typename T>
324 template <
typename V>
327 SizeType x_mtx_size1 = xMatrix.getSize1();
328 SizeType x_mtx_size2 = xMatrix.getSize2();
329 SizeType x_vars_size = x_vars().getSize();
330 SizeType y_vals_size = yValues.getSize();
332 resizeDataSet(std::max(i + 1, std::max(x_mtx_size1, y_vals_size)), std::max(x_vars_size, x_mtx_size2));
334 for (
SizeType j = 0; j < x_vars_size; j++)
335 xMatrix(i, j) = x_vars()(j);
337 if (x_vars_size < x_mtx_size2)
338 for (
SizeType j = x_vars_size; j < x_mtx_size2; j++)
344 template <
typename T>
345 template <
typename V>
349 SizeType x_mtx_size2 = xMatrix.getSize2();
350 SizeType x_vars_size = x_vars().getSize();
352 resizeDataSet(i + 1, std::max(x_mtx_size2, x_vars_size));
354 for (
SizeType j = 0; j < x_vars_size; j++)
355 xMatrix(i, j) = x_vars()(j);
357 if (x_vars_size < x_mtx_size2)
358 for (
SizeType j = x_vars_size; j < x_mtx_size2; j++)
364 template <
typename T>
371 template <
typename T>
378 template <
typename T>
385 template <
typename T>
392 template <
typename T>
400 if (
m == 0 || n == 0)
403 if (n !=
SizeType(yValues.getSize()))
404 resizeDataSet(std::max(
SizeType(yValues.getSize()), n),
m);
406 svdU.resize(n,
m,
false);
407 svdV.resize(
m,
m,
false);
408 svdW.resize(
m,
false);
427 coefficients.resize(
m,
false);
432 template <
typename T>
433 template <
typename V>
438 throw Base::CalculationFailed(
"MLRModel: number of regression coefficients does not match number of independent variables");
443 template <
typename T>
444 template <
typename V>
448 return calcYValue(x);
451 template <
typename T>
458 template <
typename T>
465 template <
typename T>
472 template <
typename T>
479 template <
typename T>
486 template <
typename T>
497 if (
m !=
SizeType(coefficients.getSize()))
498 throw Base::CalculationFailed(
"MLRModel: number of independent variables does not match number of regression coefficients");
500 if (n !=
SizeType(yValues.getSize()))
501 throw Base::CalculationFailed(
"MLRModel: number of dependent variables does not match number of vectors with independent variables");
503 calcYValues.resize(n);
510 mean_data_y += data_y;
511 mean_calc_y += calc_y;
512 chiSquare += y_diff * y_diff;
514 calcYValues(i) = calc_y;
526 ValueType yt = calcYValues(i) - mean_calc_y;
Definition of exception classes.
Definition of matrix data types.
Implementation of matrix singular value decomposition and associated operations.
Provides miscellaneous special mathematical functions.
Definition of type traits.
Definition of vector data types.
Thrown to indicate that some requested calculation has failed.
Definition: Base/Exceptions.hpp:230
Performs Multiple Linear Regression [WLIREG] on a set of data points .
Definition: MLRModel.hpp:80
ValueType operator()(const VectorExpression< V > &x_vars) const
Predicts the value of the dependent variable for a vector of independent variables given by x_vars.
ValueType getCorrelationCoefficient() const
Returns the correlation coefficient .
Definition: MLRModel.hpp:474
MLRModel()
Constructs and initializes a regression model with an empty data set.
Definition: MLRModel.hpp:91
const VectorType & getCoefficients() const
Returns a read-only vector containing the estimated regression coefficients which were calculated by...
Definition: MLRModel.hpp:453
void resizeDataSet(SizeType num_points, SizeType num_vars)
Resizes the data set to hold num_points data points with num_vars independent variables.
Definition: MLRModel.hpp:307
ValueType getGoodnessOfFit() const
Returns the goodness of fit .
Definition: MLRModel.hpp:467
CommonType< typename Vector< T >::SizeType, typename Matrix< T >::SizeType >::Type SizeType
Definition: MLRModel.hpp:83
void buildModel()
Performs linear least squares regression modeling of the set of currently stored data points .
Definition: MLRModel.hpp:393
ValueType getStandardDeviation() const
Returns the standard deviation of the residuals .
Definition: MLRModel.hpp:481
void setXYData(SizeType i, const VectorExpression< V > &x_vars, ValueType y)
Sets the i-th data point of the data set.
Definition: MLRModel.hpp:325
Vector< T > VectorType
Definition: MLRModel.hpp:86
void calcStatistics()
Calculates various statistical parameters describing the built regression model.
Definition: MLRModel.hpp:487
void clearDataSet()
Clears the data set.
Definition: MLRModel.hpp:317
VectorType & getYValues()
Returns a vector containing the dependent variables of the currently stored data points .
Definition: MLRModel.hpp:380
T ValueType
Definition: MLRModel.hpp:84
ValueType calcYValue(const VectorExpression< V > &x_vars) const
Predicts the value of the dependent variable for a vector of independent variables given by x_vars.
ValueType getChiSquare() const
Returns the sum of squared residuals .
Definition: MLRModel.hpp:460
MatrixType & getXMatrix()
Returns a matrix where each row represents the vector with independent variables of the currently st...
Definition: MLRModel.hpp:366
Matrix< T > MatrixType
Definition: MLRModel.hpp:85
void addXYData(const VectorExpression< V > &x_vars, ValueType y)
Adds a new data point to the current data set.
Definition: MLRModel.hpp:346
Definition: Matrix.hpp:280
A::size_type SizeType
Definition: Matrix.hpp:288
Definition: Expression.hpp:54
Definition: Vector.hpp:258
constexpr unsigned int T
Specifies Hydrogen (Tritium).
Definition: AtomType.hpp:67
constexpr unsigned int Q
A generic type that covers any element except hydrogen and carbon.
Definition: AtomType.hpp:647
constexpr unsigned int r
Specifies that the stereocenter has r configuration.
Definition: CIPDescriptor.hpp:76
constexpr unsigned int m
Specifies that the stereocenter has m configuration.
Definition: CIPDescriptor.hpp:116
VectorInnerProduct< E1, E2 >::ResultType innerProd(const VectorExpression< E1 > &e1, const VectorExpression< E2 > &e2)
Definition: VectorExpression.hpp:504
MatrixRow< M > row(MatrixExpression< M > &e, typename MatrixRow< M >::SizeType i)
Definition: MatrixProxy.hpp:716
void svSubstitute(const MatrixExpression< U > &u, const VectorExpression< W > &w, const MatrixExpression< V > &v, const VectorExpression< B > &b, VectorExpression< X > &x)
Solves for a vector where is given by its Singular Value Decomposition [WSVD].
Definition: SVDecomposition.hpp:466
bool svDecompose(MatrixExpression< A > &a, VectorExpression< W > &w, MatrixExpression< V > &v, std::size_t max_iter=0)
Computes the Singular Value Decomposition [WSVD] of a -dimensional matrix a.
Definition: SVDecomposition.hpp:70
T gammaQ(const T &a, const T &x)
Computes the incomplete gamma function (see [NRIC] for details).
CDPL_PHARM_API const Base::LookupKey TOLERANCE
The namespace of the Chemical Data Processing Library.
Definition: CommonType.hpp:41
Definition: TypeTraits.hpp:171