最小二乘法C++实现
·
C++实现最小二乘法实现,包含一元线性回归、多元线性回归、多项式拟合
一、最小二乘法C++实现
1、头文件定义
// least_squares.h
#ifndef LEAST_SQUARES_H
#define LEAST_SQUARES_H
#include <vector>
#include <cmath>
#include <iostream>
#include <algorithm>
#include <numeric>
namespace LeastSquares {
// 一元线性回归结果
struct LinearResult {
double slope; // 斜率
double intercept; // 截距
double r_squared; // 决定系数R²
double std_error; // 标准误差
double slope_std; // 斜率标准误差
double intercept_std; // 截距标准误差
double confidence_95; // 95%置信区间宽度
void print() const;
};
// 多项式拟合结果
struct PolynomialResult {
std::vector<double> coefficients; // 多项式系数[a0, a1, ..., an]
double r_squared; // 决定系数R²
double mse; // 均方误差
double aic; // AIC信息准则
double bic; // BIC信息准则
void print() const;
double evaluate(double x) const;
};
// 多元线性回归结果
struct MultipleLinearResult {
std::vector<double> coefficients; // 回归系数
double r_squared;
double adjusted_r_squared;
double f_statistic;
double p_value;
std::vector<double> std_errors; // 系数标准误差
std::vector<double> t_values; // t统计量
void print() const;
};
// 回归诊断统计
struct DiagnosticStats {
std::vector<double> residuals; // 残差
std::vector<double> fitted_values; // 拟合值
double durbin_watson; // DW检验统计量
double jarque_bera; // JB检验统计量
double white_test; // White检验统计量
double vif; // 方差膨胀因子
void print() const;
};
// 一元线性回归
class LinearRegression {
public:
LinearRegression(const std::vector<double>& x, const std::vector<double>& y);
LinearResult fit();
double predict(double x) const;
DiagnosticStats diagnose() const;
private:
std::vector<double> x_data;
std::vector<double> y_data;
LinearResult result;
// 计算函数
double calculate_mean(const std::vector<double>& data) const;
double calculate_variance(const std::vector<double>& data) const;
double calculate_covariance() const;
double calculate_correlation() const;
};
// 多项式回归
class PolynomialRegression {
public:
PolynomialRegression(const std::vector<double>& x,
const std::vector<double>& y,
int degree = 2);
PolynomialResult fit();
double predict(double x) const;
int find_best_degree(int max_degree = 10);
private:
std::vector<double> x_data;
std::vector<double> y_data;
int degree;
PolynomialResult result;
// 构建范德蒙矩阵
std::vector<std::vector<double>> build_vandermonde() const;
// 计算条件数
double condition_number(const std::vector<std::vector<double>>& matrix) const;
// 使用SVD求解
std::vector<double> solve_svd(const std::vector<std::vector<double>>& A,
const std::vector<double>& b) const;
};
// 多元线性回归
class MultipleLinearRegression {
public:
MultipleLinearRegression(const std::vector<std::vector<double>>& X,
const std::vector<double>& y);
MultipleLinearResult fit();
double predict(const std::vector<double>& x) const;
private:
std::vector<std::vector<double>> X_data; // 设计矩阵
std::vector<double> y_data;
MultipleLinearResult result;
int n_samples;
int n_features;
// 矩阵运算
std::vector<std::vector<double>> transpose(
const std::vector<std::vector<double>>& matrix) const;
std::vector<std::vector<double>> multiply(
const std::vector<std::vector<double>>& A,
const std::vector<std::vector<double>>& B) const;
std::vector<double> multiply(
const std::vector<std::vector<double>>& A,
const std::vector<double>& b) const;
// 使用Cholesky分解求解
std::vector<double> solve_cholesky(
const std::vector<std::vector<double>>& A,
const std::vector<double>& b) const;
};
// 模型评估
class ModelEvaluator {
public:
static double calculate_rsquared(const std::vector<double>& y_true,
const std::vector<double>& y_pred);
static double calculate_mse(const std::vector<double>& y_true,
const std::vector<double>& y_pred);
static double calculate_mae(const std::vector<double>& y_true,
const std::vector<double>& y_pred);
static double calculate_rmse(const std::vector<double>& y_true,
const std::vector<double>& y_pred);
static double calculate_aic(int n_params, double mse, int n_samples);
static double calculate_bic(int n_params, double mse, int n_samples);
// 交叉验证
static double cross_validate(const std::vector<double>& x,
const std::vector<double>& y,
int k_folds = 5, int degree = 1);
};
} // namespace LeastSquares
#endif // LEAST_SQUARES_H
二、一元线性回归实现
// least_squares.cpp
#include "least_squares.h"
#include <stdexcept>
#include <iomanip>
namespace LeastSquares {
// ================== 一元线性回归 ==================
LinearRegression::LinearRegression(const std::vector<double>& x,
const std::vector<double>& y)
: x_data(x), y_data(y) {
if (x.size() != y.size() || x.size() < 2) {
throw std::invalid_argument("数据长度必须相同且至少2个点");
}
}
double LinearRegression::calculate_mean(const std::vector<double>& data) const {
return std::accumulate(data.begin(), data.end(), 0.0) / data.size();
}
double LinearRegression::calculate_variance(const std::vector<double>& data) const {
double mean = calculate_mean(data);
double sum = 0.0;
for (double val : data) {
double diff = val - mean;
sum += diff * diff;
}
return sum / (data.size() - 1);
}
double LinearRegression::calculate_covariance() const {
double x_mean = calculate_mean(x_data);
double y_mean = calculate_mean(y_data);
double sum = 0.0;
for (size_t i = 0; i < x_data.size(); ++i) {
sum += (x_data[i] - x_mean) * (y_data[i] - y_mean);
}
return sum / (x_data.size() - 1);
}
double LinearRegression::calculate_correlation() const {
double cov = calculate_covariance();
double x_var = calculate_variance(x_data);
double y_var = calculate_variance(y_data);
if (x_var == 0 || y_var == 0) return 0.0;
return cov / sqrt(x_var * y_var);
}
LinearResult LinearRegression::fit() {
double x_mean = calculate_mean(x_data);
double y_mean = calculate_mean(y_data);
// 计算斜率和截距
double sum_xy = 0.0;
double sum_xx = 0.0;
double sum_yy = 0.0;
for (size_t i = 0; i < x_data.size(); ++i) {
double x_diff = x_data[i] - x_mean;
double y_diff = y_data[i] - y_mean;
sum_xy += x_diff * y_diff;
sum_xx += x_diff * x_diff;
sum_yy += y_diff * y_diff;
}
// 斜率 = Σ((xi - x̄)(yi - ȳ)) / Σ((xi - x̄)²)
result.slope = sum_xy / sum_xx;
// 截距 = ȳ - 斜率 * x̄
result.intercept = y_mean - result.slope * x_mean;
// 计算R²
result.r_squared = (sum_xx > 0) ? (sum_xy * sum_xy) / (sum_xx * sum_yy) : 0.0;
// 计算标准误差
double sse = 0.0; // 残差平方和
std::vector<double> fitted;
for (size_t i = 0; i < x_data.size(); ++i) {
double y_pred = result.intercept + result.slope * x_data[i];
fitted.push_back(y_pred);
double residual = y_data[i] - y_pred;
sse += residual * residual;
}
int n = x_data.size();
int df = n - 2; // 自由度
// 标准误差
result.std_error = sqrt(sse / df);
// 斜率的标准误差
result.slope_std = result.std_error / sqrt(sum_xx);
// 截距的标准误差
double x_squared_sum = 0.0;
for (double x : x_data) {
x_squared_sum += x * x;
}
result.intercept_std = result.std_error * sqrt(x_squared_sum / (n * sum_xx));
// 95%置信区间宽度
// 使用t分布临界值(近似,实际应查表)
double t_critical = 1.96; // n足够大时的近似
result.confidence_95 = t_critical * result.std_error;
return result;
}
double LinearRegression::predict(double x) const {
return result.intercept + result.slope * x;
}
DiagnosticStats LinearRegression::diagnose() const {
DiagnosticStats stats;
int n = x_data.size();
// 计算残差和拟合值
for (size_t i = 0; i < n; ++i) {
double y_pred = result.intercept + result.slope * x_data[i];
stats.fitted_values.push_back(y_pred);
stats.residuals.push_back(y_data[i] - y_pred);
}
// Durbin-Watson检验(自相关)
double dw_numerator = 0.0;
double dw_denominator = 0.0;
for (int i = 1; i < n; ++i) {
double diff = stats.residuals[i] - stats.residuals[i-1];
dw_numerator += diff * diff;
}
for (int i = 0; i < n; ++i) {
dw_denominator += stats.residuals[i] * stats.residuals[i];
}
stats.durbin_watson = (dw_denominator > 0) ? dw_numerator / dw_denominator : 0.0;
// Jarque-Bera检验(正态性)
double mean_residual = calculate_mean(stats.residuals);
double skewness = 0.0;
double kurtosis = 0.0;
for (double r : stats.residuals) {
double z = (r - mean_residual) / result.std_error;
skewness += z * z * z;
kurtosis += z * z * z * z;
}
skewness /= n;
kurtosis /= n;
stats.jarque_bera = n * (skewness * skewness / 6.0 +
(kurtosis - 3.0) * (kurtosis - 3.0) / 24.0);
return stats;
}
void LinearResult::print() const {
std::cout << "=== 一元线性回归结果 ===\n";
std::cout << "模型: y = " << std::fixed << std::setprecision(4)
<< slope << " * x + " << intercept << "\n";
std::cout << "决定系数 R²: " << r_squared << "\n";
std::cout << "标准误差: " << std_error << "\n";
std::cout << "斜率标准误差: " << slope_std << "\n";
std::cout << "截距标准误差: " << intercept_std << "\n";
std::cout << "95%置信区间宽度: ±" << confidence_95 << "\n";
}
// ================== 多项式回归 ==================
PolynomialRegression::PolynomialRegression(const std::vector<double>& x,
const std::vector<double>& y,
int degree)
: x_data(x), y_data(y), degree(degree) {
if (x.size() != y.size() || x.size() <= degree) {
throw std::invalid_argument("数据不足或阶数过高");
}
}
std::vector<std::vector<double>> PolynomialRegression::build_vandermonde() const {
int n = x_data.size();
std::vector<std::vector<double>> V(n, std::vector<double>(degree + 1));
for (int i = 0; i < n; ++i) {
double x_power = 1.0;
for (int j = 0; j <= degree; ++j) {
V[i][j] = x_power;
x_power *= x_data[i];
}
}
return V;
}
PolynomialResult PolynomialRegression::fit() {
int n = x_data.size();
int m = degree + 1; // 系数个数
// 构建范德蒙矩阵
std::vector<std::vector<double>> V = build_vandermonde();
// 构建正规方程: V^T * V * a = V^T * y
std::vector<std::vector<double>> VtV(m, std::vector<double>(m, 0.0));
std::vector<double> Vty(m, 0.0);
// 计算V^T * V
for (int i = 0; i < m; ++i) {
for (int j = 0; j < m; ++j) {
double sum = 0.0;
for (int k = 0; k < n; ++k) {
sum += V[k][i] * V[k][j];
}
VtV[i][j] = sum;
}
}
// 计算V^T * y
for (int i = 0; i < m; ++i) {
double sum = 0.0;
for (int k = 0; k < n; ++k) {
sum += V[k][i] * y_data[k];
}
Vty[i] = sum;
}
// 使用SVD求解系数
result.coefficients = solve_svd(VtV, Vty);
// 计算预测值和残差
double sse = 0.0; // 残差平方和
double sst = 0.0; // 总平方和
double y_mean = calculate_mean(y_data);
for (int i = 0; i < n; ++i) {
double y_pred = 0.0;
double x_power = 1.0;
for (int j = 0; j <= degree; ++j) {
y_pred += result.coefficients[j] * x_power;
x_power *= x_data[i];
}
double residual = y_data[i] - y_pred;
sse += residual * residual;
double total_diff = y_data[i] - y_mean;
sst += total_diff * total_diff;
}
// 计算统计量
result.r_squared = 1.0 - sse / sst;
result.mse = sse / n;
result.aic = ModelEvaluator::calculate_aic(m, result.mse, n);
result.bic = ModelEvaluator::calculate_bic(m, result.mse, n);
return result;
}
double PolynomialRegression::predict(double x) const {
double result = 0.0;
double x_power = 1.0;
for (size_t i = 0; i < result.coefficients.size(); ++i) {
result += result.coefficients[i] * x_power;
x_power *= x;
}
return result;
}
int PolynomialRegression::find_best_degree(int max_degree) {
int best_degree = 1;
double best_aic = std::numeric_limits<double>::max();
for (int d = 1; d <= max_degree; ++d) {
PolynomialRegression poly(x_data, y_data, d);
PolynomialResult res = poly.fit();
if (res.aic < best_aic) {
best_aic = res.aic;
best_degree = d;
}
}
return best_degree;
}
// 简单的SVD求解(Householder QR分解)
std::vector<double> PolynomialRegression::solve_svd(
const std::vector<std::vector<double>>& A,
const std::vector<double>& b) const {
int n = A.size();
std::vector<double> x(n, 0.0);
// 简单的高斯消元法(实际应用中应使用更稳定的方法)
std::vector<std::vector<double>> augmented = A;
for (int i = 0; i < n; ++i) {
augmented[i].push_back(b[i]);
}
// 前向消元
for (int i = 0; i < n; ++i) {
// 寻找主元
int max_row = i;
for (int k = i + 1; k < n; ++k) {
if (fabs(augmented[k][i]) > fabs(augmented[max_row][i])) {
max_row = k;
}
}
// 交换行
if (max_row != i) {
std::swap(augmented[i], augmented[max_row]);
}
// 消元
for (int k = i + 1; k < n; ++k) {
double factor = augmented[k][i] / augmented[i][i];
for (int j = i; j <= n; ++j) {
augmented[k][j] -= factor * augmented[i][j];
}
}
}
// 回代
for (int i = n - 1; i >= 0; --i) {
x[i] = augmented[i][n] / augmented[i][i];
for (int k = i - 1; k >= 0; --k) {
augmented[k][n] -= augmented[k][i] * x[i];
}
}
return x;
}
void PolynomialResult::print() const {
std::cout << "=== 多项式回归结果 ===\n";
std::cout << "模型: y = ";
for (int i = coefficients.size() - 1; i >= 0; --i) {
if (i == coefficients.size() - 1) {
std::cout << coefficients[i] << "*x^" << i;
} else if (coefficients[i] >= 0) {
std::cout << " + " << coefficients[i];
if (i > 0) std::cout << "*x^" << i;
} else {
std::cout << " - " << -coefficients[i];
if (i > 0) std::cout << "*x^" << i;
}
}
std::cout << "\n";
std::cout << "决定系数 R²: " << r_squared << "\n";
std::cout << "均方误差 MSE: " << mse << "\n";
std::cout << "AIC: " << aic << "\n";
std::cout << "BIC: " << bic << "\n";
}
double PolynomialResult::evaluate(double x) const {
double result = 0.0;
double x_power = 1.0;
for (double coeff : coefficients) {
result += coeff * x_power;
x_power *= x;
}
return result;
}
// ================== 多元线性回归 ==================
MultipleLinearRegression::MultipleLinearRegression(
const std::vector<std::vector<double>>& X,
const std::vector<double>& y)
: X_data(X), y_data(y) {
n_samples = X.size();
if (n_samples == 0) throw std::invalid_argument("无样本数据");
n_features = X[0].size();
for (const auto& row : X) {
if (row.size() != n_features) {
throw std::invalid_argument("X矩阵维度不一致");
}
}
if (X.size() != y.size()) {
throw std::invalid_argument("X和y维度不匹配");
}
}
std::vector<std::vector<double>> MultipleLinearRegression::transpose(
const std::vector<std::vector<double>>& matrix) const {
int rows = matrix.size();
int cols = matrix[0].size();
std::vector<std::vector<double>> result(cols, std::vector<double>(rows));
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
result[j][i] = matrix[i][j];
}
}
return result;
}
std::vector<std::vector<double>> MultipleLinearRegression::multiply(
const std::vector<std::vector<double>>& A,
const std::vector<std::vector<double>>& B) const {
int a_rows = A.size();
int a_cols = A[0].size();
int b_rows = B.size();
int b_cols = B[0].size();
if (a_cols != b_rows) {
throw std::invalid_argument("矩阵维度不匹配");
}
std::vector<std::vector<double>> result(a_rows, std::vector<double>(b_cols, 0.0));
for (int i = 0; i < a_rows; ++i) {
for (int j = 0; j < b_cols; ++j) {
for (int k = 0; k < a_cols; ++k) {
result[i][j] += A[i][k] * B[k][j];
}
}
}
return result;
}
std::vector<double> MultipleLinearRegression::multiply(
const std::vector<std::vector<double>>& A,
const std::vector<double>& b) const {
int a_rows = A.size();
int a_cols = A[0].size();
if (a_cols != b.size()) {
throw std::invalid_argument("矩阵和向量维度不匹配");
}
std::vector<double> result(a_rows, 0.0);
for (int i = 0; i < a_rows; ++i) {
for (int j = 0; j < a_cols; ++j) {
result[i] += A[i][j] * b[j];
}
}
return result;
}
MultipleLinearResult MultipleLinearRegression::fit() {
// 添加截距项
std::vector<std::vector<double>> X_with_intercept = X_data;
for (auto& row : X_with_intercept) {
row.insert(row.begin(), 1.0); // 添加常数项
}
// 计算X^T * X
auto Xt = transpose(X_with_intercept);
auto XtX = multiply(Xt, X_with_intercept);
auto Xty = multiply(Xt, y_data);
// 使用Cholesky分解求解
result.coefficients = solve_cholesky(XtX, Xty);
// 计算预测值
std::vector<double> y_pred = multiply(X_with_intercept, result.coefficients);
// 计算统计量
double y_mean = calculate_mean(y_data);
double sse = 0.0; // 残差平方和
double sst = 0.0; // 总平方和
for (int i = 0; i < n_samples; ++i) {
double residual = y_data[i] - y_pred[i];
sse += residual * residual;
double total_diff = y_data[i] - y_mean;
sst += total_diff * total_diff;
}
result.r_squared = 1.0 - sse / sst;
result.adjusted_r_squared = 1.0 - (1.0 - result.r_squared) *
(n_samples - 1) / (n_samples - n_features - 1);
// F统计量
double ssr = sst - sse; // 回归平方和
double msr = ssr / n_features;
double mse_val = sse / (n_samples - n_features - 1);
result.f_statistic = msr / mse_val;
return result;
}
double MultipleLinearRegression::predict(const std::vector<double>& x) const {
if (x.size() != n_features) {
throw std::invalid_argument("特征数量不匹配");
}
double result = 0.0;
result += result.coefficients[0]; // 截距
for (size_t i = 0; i < x.size(); ++i) {
result += result.coefficients[i + 1] * x[i];
}
return result;
}
// Cholesky分解求解
std::vector<double> MultipleLinearRegression::solve_cholesky(
const std::vector<std::vector<double>>& A,
const std::vector<double>& b) const {
int n = A.size();
std::vector<std::vector<double>> L(n, std::vector<double>(n, 0.0));
// Cholesky分解: A = L * L^T
for (int i = 0; i < n; ++i) {
for (int j = 0; j <= i; ++j) {
double sum = 0.0;
if (j == i) { // 对角线元素
for (int k = 0; k < j; ++k) {
sum += L[j][k] * L[j][k];
}
L[j][j] = sqrt(A[j][j] - sum);
} else { // 非对角线元素
for (int k = 0; k < j; ++k) {
sum += L[i][k] * L[j][k];
}
L[i][j] = (A[i][j] - sum) / L[j][j];
}
}
}
// 前向替换: L * y = b
std::vector<double> y(n, 0.0);
for (int i = 0; i < n; ++i) {
double sum = 0.0;
for (int j = 0; j < i; ++j) {
sum += L[i][j] * y[j];
}
y[i] = (b[i] - sum) / L[i][i];
}
// 后向替换: L^T * x = y
std::vector<double> x(n, 0.0);
auto LT = transpose(L);
for (int i = n - 1; i >= 0; --i) {
double sum = 0.0;
for (int j = i + 1; j < n; ++j) {
sum += LT[i][j] * x[j];
}
x[i] = (y[i] - sum) / LT[i][i];
}
return x;
}
void MultipleLinearResult::print() const {
std::cout << "=== 多元线性回归结果 ===\n";
std::cout << "模型: y = ";
for (size_t i = 0; i < coefficients.size(); ++i) {
if (i == 0) {
std::cout << coefficients[i];
} else {
std::cout << " + " << coefficients[i] << "*x" << i;
}
}
std::cout << "\n";
std::cout << "决定系数 R²: " << r_squared << "\n";
std::cout << "调整R²: " << adjusted_r_squared << "\n";
std::cout << "F统计量: " << f_statistic << "\n";
}
// ================== 模型评估 ==================
double ModelEvaluator::calculate_rsquared(const std::vector<double>& y_true,
const std::vector<double>& y_pred) {
int n = y_true.size();
double y_mean = std::accumulate(y_true.begin(), y_true.end(), 0.0) / n;
double sse = 0.0; // 残差平方和
double sst = 0.0; // 总平方和
for (int i = 0; i < n; ++i) {
double residual = y_true[i] - y_pred[i];
sse += residual * residual;
double total_diff = y_true[i] - y_mean;
sst += total_diff * total_diff;
}
return 1.0 - sse / sst;
}
double ModelEvaluator::calculate_mse(const std::vector<double>& y_true,
const std::vector<double>& y_pred) {
int n = y_true.size();
double sum = 0.0;
for (int i = 0; i < n; ++i) {
double diff = y_true[i] - y_pred[i];
sum += diff * diff;
}
return sum / n;
}
double ModelEvaluator::calculate_mae(const std::vector<double>& y_true,
const std::vector<double>& y_pred) {
int n = y_true.size();
double sum = 0.0;
for (int i = 0; i < n; ++i) {
sum += fabs(y_true[i] - y_pred[i]);
}
return sum / n;
}
double ModelEvaluator::calculate_rmse(const std::vector<double>& y_true,
const std::vector<double>& y_pred) {
return sqrt(calculate_mse(y_true, y_pred));
}
double ModelEvaluator::calculate_aic(int n_params, double mse, int n_samples) {
return n_samples * log(mse) + 2 * n_params;
}
double ModelEvaluator::calculate_bic(int n_params, double mse, int n_samples) {
return n_samples * log(mse) + n_params * log(n_samples);
}
double ModelEvaluator::cross_validate(const std::vector<double>& x,
const std::vector<double>& y,
int k_folds, int degree) {
int n = x.size();
int fold_size = n / k_folds;
double total_mse = 0.0;
for (int fold = 0; fold < k_folds; ++fold) {
// 划分训练集和测试集
std::vector<double> x_train, y_train, x_test, y_test;
for (int i = 0; i < n; ++i) {
if (i >= fold * fold_size && i < (fold + 1) * fold_size) {
x_test.push_back(x[i]);
y_test.push_back(y[i]);
} else {
x_train.push_back(x[i]);
y_train.push_back(y[i]);
}
}
// 训练模型
PolynomialRegression model(x_train, y_train, degree);
PolynomialResult result = model.fit();
// 测试模型
double mse = 0.0;
for (size_t i = 0; i < x_test.size(); ++i) {
double y_pred = result.evaluate(x_test[i]);
double diff = y_test[i] - y_pred;
mse += diff * diff;
}
mse /= x_test.size();
total_mse += mse;
}
return total_mse / k_folds;
}
} // namespace LeastSquares
三、使用示例
// example.cpp
#include "least_squares.h"
#include <iostream>
#include <vector>
#include <random>
using namespace LeastSquares;
int main() {
// 生成示例数据
std::vector<double> x, y;
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<> dist(0, 1);
// 生成线性数据: y = 2x + 3 + 噪声
for (int i = 0; i < 100; ++i) {
double xi = i * 0.1;
double yi = 2.0 * xi + 3.0 + dist(gen) * 0.5;
x.push_back(xi);
y.push_back(yi);
}
std::cout << "=== 一元线性回归示例 ===\n";
LinearRegression lr(x, y);
LinearResult linear_result = lr.fit();
linear_result.print();
std::cout << "\n预测 x=5.0: " << lr.predict(5.0) << std::endl;
DiagnosticStats diag = lr.diagnose();
std::cout << "Durbin-Watson统计量: " << diag.durbin_watson << std::endl;
std::cout << "\n=== 多项式回归示例 ===\n";
PolynomialRegression poly(x, y, 3);
PolynomialResult poly_result = poly.fit();
poly_result.print();
int best_degree = poly.find_best_degree(5);
std::cout << "最佳多项式阶数: " << best_degree << std::endl;
std::cout << "\n=== 多元线性回归示例 ===\n";
// 生成多元数据: y = 1 + 2x1 + 3x2 + 噪声
std::vector<std::vector<double>> X;
std::vector<double> y_multi;
for (int i = 0; i < 100; ++i) {
double x1 = i * 0.1;
double x2 = sin(i * 0.1);
X.push_back({x1, x2});
y_multi.push_back(1.0 + 2.0 * x1 + 3.0 * x2 + dist(gen) * 0.5);
}
MultipleLinearRegression mlr(X, y_multi);
MultipleLinearResult mlr_result = mlr.fit();
mlr_result.print();
// 模型评估
std::vector<double> predictions;
for (const auto& xi : X) {
predictions.push_back(mlr.predict(xi));
}
double mse = ModelEvaluator::calculate_mse(y_multi, predictions);
double r2 = ModelEvaluator::calculate_rsquared(y_multi, predictions);
std::cout << "MSE: " << mse << std::endl;
std::cout << "R²: " << r2 << std::endl;
// 交叉验证
double cv_score = ModelEvaluator::cross_validate(x, y, 5, 2);
std::cout << "\n5折交叉验证MSE: " << cv_score << std::endl;
return 0;
}
四、高级功能扩展
1. 岭回归(L2正则化)
// ridge_regression.h
#include "least_squares.h"
class RidgeRegression {
public:
RidgeRegression(const std::vector<std::vector<double>>& X,
const std::vector<double>& y, double alpha = 0.1);
std::vector<double> fit();
double predict(const std::vector<double>& x) const;
private:
std::vector<std::vector<double>> X_data;
std::vector<double> y_data;
double alpha;
std::vector<double> coefficients;
std::vector<double> solve_ridge();
};
2. 主成分回归(PCR)
// pcr.h
#include "least_squares.h"
#include <Eigen/Dense> // 需要Eigen库
class PrincipalComponentRegression {
public:
PrincipalComponentRegression(const std::vector<std::vector<double>>& X,
const std::vector<double>& y, int n_components = 2);
std::vector<double> fit();
double predict(const std::vector<double>& x) const;
private:
std::vector<std::vector<double>> X_data;
std::vector<double> y_data;
int n_components;
std::vector<double> coefficients;
Eigen::MatrixXd loadings;
void perform_pca();
};
参考代码 C++实现最小二乘法一元回归和多项式拟合 www.youwenfan.com/contentcsu/70070.html
五、编译和运行
1. 编译命令
g++ -std=c++11 -O2 example.cpp least_squares.cpp -o least_squares_demo
2. 运行
./least_squares_demo
六、性能优化
1. 使用Eigen库加速
// 在CMakeLists.txt中添加
find_package(Eigen3 REQUIRED)
target_link_libraries(your_target Eigen3::Eigen)
2. 使用BLAS/LAPACK
extern "C" {
void dgesv_(int*, int*, double*, int*, int*, double*, int*, int*);
// 使用LAPACK求解线性系统
}
3. 多线程加速
#include <omp.h>
#pragma omp parallel for
for (int i = 0; i < n; ++i) {
// 并行计算
}
更多推荐

所有评论(0)