岭回归交叉验证确定最佳惩罚因子

#!/usr/bin/python# -*- coding:utf-8 -*-import numpy as npimport matplotlib.pyplot as pltimport pandas as pdfrom sklearn.model_selection import train_test_splitfrom sklearn.linear_model import ...

ALWAYS_FANG

4289人浏览 · 2018-09-04 17:07:50

ALWAYS_FANG · 2018-09-04 17:07:50 发布

我做了一个导航站（域名是挂路灯的全拼gualudeng.com)，里面精选了各种影视，动漫，黑科技，实用工具，搞笑有趣的站点，动动大家可爱的小手，点进来看看吧,良心站点。

#!/usr/bin/python
# -*- coding:utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import GridSearchCV
import matplotlib as mpl

mpl.rcParams['font.sans-serif']=['simhei']
mpl.rcParams['axes.unicode_minus']=False

if __name__ == "__main__":
    # pandas读入
    data = pd.read_csv('Advertising.csv')    # TV、Radio、Newspaper、Sales
    x = data[['TV', 'Radio', 'Newspaper']]
    
    y = data['Sales']
    

    #绘制原始数据关系图
    
    plt.scatter(x['TV'],y,marker="^",label="tv")
    plt.scatter(x['Radio'],y,marker="v",label="radio")
    plt.scatter(x['Newspaper'],y,marker="*",label="newspaper")
    plt.legend(loc='upper right')
    plt.show()
    
    #划分训练集
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)
    
    #model = Lasso()
    model = Ridge()

    #开始交叉验证
    alpha_can = np.logspace(-3, 2, 10)  #这是验证的参数列表
    ridge_model = GridSearchCV(model, param_grid={'alpha': alpha_can}, cv=5)
    ridge_model.fit(x_train, y_train)
    print '最优参数：', ridge_model.best_params_

    pre_y = ridge_model.predict(np.array(x_test))
    mse = np.average((pre_y - np.array(y_test)) ** 2)  # Mean Squared Error
    rmse = np.sqrt(mse)  # Root Mean Squared Error
    print mse, rmse

    t = np.arange(len(x_test))
    plt.plot(t, y_test, 'r-', linewidth=2, label=u'原值')
    plt.plot(t, pre_y, 'g-', linewidth=2, label=u'预测值')
    plt.legend(loc='upper right')
    plt.grid()
    plt.show()