这是牛马我第一次发CSDN，废话不多说，直接上代码：

1.手写线性回归模型：

包括：正规方程法+梯度下降法+可选正则项，MSE作为损失值（可绘图展示Loss变化曲线），R2作为评价值。

import numpy as np
import matplotlib.pyplot as plt


# 手写线性规划模型（含梯度下降法与正规方程法，并可选择是否引入正则项、是否动态调整学习率）
class MyLinearRegression:
    def __init__(self):
        self.intercept = None   # 截距（即w）
        self.coef = None        # 系数（即b）
        self.theta = None       # 将截距与系数合并为θ
        self.bgd_params = {}    # 梯度下降的参数

    def reset_params(self, theta, **bgd_params):
        """设置系数和参数"""
        theta = np.reshape(theta, (-1,))  # 将系数转为numpy向量格式
        self.theta = theta
        self.intercept = self.theta[0]
        self.coef = self.theta[1:]
        self.bgd_params = bgd_params
        return self

    def fit(self, train_x, train_y, method=2):
        """快速进入不同的训练方法"""
        if method in [1, 'ne']:
            self.fit_ne(train_x, train_y)
        elif method in [2, 'bgd']:
            self.fit_bgd(train_x, train_y)
        elif method in [3, 'ne_reg']:
            self.fit_ne(train_x, train_y, regularized=True)
        elif method in [4, 'bgd_reg']:
            self.fit_bgd(train_x, train_y, regularized=True)

    def fit_ne(self, train_x, train_y,
               regularized=False, lamda=1.5):
        """使用正规方程法训练模型（可选择是否正则化）"""
        # 若不选择正则项，则将其系数设为0
        lamda = 0 if not regularized else lamda
        # 在原有x上加一行1，用于与截距相乘，形成X
        X = np.hstack([np.ones((len(train_x), 1)), train_x])

        reg = lamda*np.eye(X.shape[1])
        reg[0, 0] = 0
        theta = np.linalg.pinv(X.T.dot(X) + reg)
        theta = theta.dot(X.T).dot(train_y)
        self.reset_params(theta)

    def fit_bgd(self, train_x, train_y,
                alpha=0.1, iters=20000, regularized=False, lamda=1.5,
                alpha_v_step=5000, alpha_v_rate=0.95, loss_show=False,
                theta=None):
        """使用梯度下降法训练模型（可选择是否正则化）"""

        # 初始化
        X = np.hstack([np.ones((len(train_x), 1)), train_x])
        num_data, len_theta = X.shape[0], X.shape[1]
        self.theta = np.ones((len_theta, 1)) if theta is None else theta
        lamda = 0 if not regularized else lamda     # 若不选择正则项，则将其系数设为0
        losses = []  # 记录迭代时的损失值变化

        # 梯度下降
        for i in range(iters):
            # 对MSE求导
            res = np.reshape(np.dot(X, self.theta), (-1,))
            error = res - train_y
            update = [np.reshape(error * X[:, j], (-1, 1)) for j in range(len_theta)]
            update = np.hstack(update)
            update = np.reshape(np.mean(update, axis=0), (-1, 1))
            # 更新学习率（每隔一定的迭代次数就按比缩小学习率）
            if i > 0 and i % alpha_v_step == 0:
                alpha = alpha * alpha_v_rate
            # 更新参数（若含正则项，则会在梯度下降前适当缩小原系数）
            self.theta = self.theta * (1-alpha*(lamda/num_data)) - alpha*update
            losses.append(self.mse_loss(train_x, train_y))

        # 绘图展示迭代过程中损失值的变化
        self.reset_params(self.theta, alpha=alpha, iters=iters, lamda=lamda)
        if loss_show:
            plt.plot(range(len(losses)), losses)
            plt.title("MSE-Loss of BGD")  # 图形标题
            plt.xlabel("iters")  # x轴名称
            plt.show()
        return losses

    def predict(self, pred_x):
        pred_x = np.hstack([np.ones((len(pred_x), 1)), pred_x])
        pred_y = pred_x.dot(self.theta)
        # 非常重要的一句！保证输出结果是向量！
        pred_y = np.reshape(pred_y, (-1,))
        return pred_y

    def mse_loss(self, x, y):
        """MSE损失函数"""
        pred_y = self.predict(x)
        mse = np.sum(np.power((pred_y - y), 2)) / y.shape[0]
        return round(mse, 4)

    def score(self, x, y):
        """使用R2_score进行模型评价"""
        mse = self.mse_loss(x, y)
        var = np.var(y)
        return round(1-mse/var, 4)

二、相关实验 (使用Boston房价数据集)

实施的：
1. 实现四种不同的线性回归模型训练方式：正规方程，正规方程+正则化，梯度下降，梯度下降+正则化
2. 使用K折交叉验证法对比四种训练方式的效果
3. 探讨了feature scaling的作用（不进行缩放 / 进行标准化 / 进行归一化）
4. 绘制模型的学习曲线，探讨模型的拟合效果
5. 探讨正则项系数大小对于模型的影响

接下来要尝试的事情：
1. 损失函数使用的是MSE，若换为其他函数呢？比如计算均立方误差/均差？同理，正则项是否也可以立方？
2. 其他的复杂优化方法，如共轭梯度法，拟牛顿法。

import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import MinMaxScaler, scale
import matplotlib.pyplot as plt
from myLinearRegression import MyLinearRegression


# K折交叉验证函数（默认为5折）
def cross_val_score(model, x, y, n_splits=5, fit_method='bgd', image_show=True):
    """
    :param model: 线性模型
    :param x: 数据特征
    :param y: 数据标签
    :param n_splits: 分为几折进行交叉验证
    :param fit_method: 训练模型的方式，可以选择:'ne','bgd','ne_reg','bgd_reg'（ne正规方程法，bgd批量梯度下降，reg正则化）
    :param image_show: 是否绘图展示每一折验证集的结果
    :return: K折交叉验证的平均MSE值，平均R2—score值
    """
    mse_scores, r2_scores = [], []
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=2)
    for train_index, test_index in kf.split(x):
        train_x, train_y = x[train_index], y[train_index]
        test_x, test_y = x[test_index], y[test_index]
        # 重新训练
        model.fit(train_x, train_y, method=fit_method)
        # 测试集效果
        pred_y = model.predict(test_x)
        if image_show:
            plt.subplot(231+len(mse_scores))
            plt.plot(np.arange(len(test_y)), test_y, label='TRUE')
            plt.plot(np.arange(len(test_y)), pred_y, label='PRED')
            plt.legend()
        # 计算模型MSE和R2
        mse_scores.append(model.mse_loss(test_x, test_y))
        r2_scores.append(model.score(test_x, test_y))
    if image_show:
        plt.show()
    print("\nMethod='{}':\n\tCross Val MSEs:".format(fit_method), mse_scores)
    print("\tCross Val R2_scores:", r2_scores)
    return round(np.mean(mse_scores), 4), round(np.mean(r2_scores), 4)


# 实验1：对比四种不同的模型训练方式（ [正规方程法; 梯度下降法] × [无正则化; 正则化] )
def compare_training_method(model, x, y):
    """结论：在波士顿房价数据集下，正规方程法最快且效果好，正则化效果会不稳定"""
    methods, scores = ['ne', 'bgd', 'ne_reg', 'bgd_reg'], []
    for method in methods:
        score = cross_val_score(model, x, y, n_splits=5, fit_method=method, image_show=False)
        scores.append(score)
    print('\n\nAverage Score (MSE_Loss, R2_score): ')
    print(dict(zip(methods, scores)))


# 实验2：特征缩放的必要性 + 不同学习率的Loss变化
def feature_scaling_test(model, x, y):
    """结论（详见报告）:
    1. 正规方程不需要特征缩放
    2. 学习率越小，Loss下降越缓慢。但当学习率过大时，Loss将快速上升（缩放后学习率应小于1)。
    3. 若未缩放特征，学习率的调整难度很大，其数量级只能控制在1e-6左右，否则在迭代时Loss将急速增加或下降极慢
    4. MinMax归一化Loss下降先快后慢，Standard标准化Loss下降先慢后快，（这里的快慢是与另一方比较而言）但总体效果上，Standard缩放收敛更快。
    """
    data1 = x
    data2 = MinMaxScaler().fit_transform(x)
    data3 = scale(x)

    # 正规方程法
    print('NE_TEST')
    for d in [data1, data2, data3]:
        print(cross_val_score(model, d, y, fit_method='ne', image_show=False))

    # 梯度下降法
    print('BGD-TEST')
    # 不进行特征缩放
    alphas = [1e-5, 1e-6, 1e-7]
    losses = []
    train_x, test_x, train_y, test_y = train_test_split(data1, y, test_size=0.25, random_state=1)
    for alpha in alphas:
        losses.append(model.fit_bgd(train_x, train_y, alpha=alpha))
        plt.subplot(130+len(losses))
        plt.plot(range(20000), losses[-1])
        plt.title('No Scaling'+', alpha={}'.format(alpha))
    plt.show()

    # 进行标准化、归一化
    alphas = [1e-2, 5e-3, 1e-3, 5e-4, 1e-4]
    ymaxs = [200, 250, 300, 450, 500]
    losses = []
    for i, alpha in enumerate(alphas):
        for d in [data2, data3]:
            train_x, test_x, train_y, test_y = train_test_split(d, y, test_size=0.25, random_state=1)
            losses.append(model.fit_bgd(train_x, train_y, alpha=alpha))
        plt.subplot(151+i)
        plt.plot(range(20000), losses[-1], label='Standard')
        plt.plot(range(20000), losses[-2], label='MinMax')
        plt.ylim((-1, ymaxs[i]))
        plt.legend()
        plt.xlabel('iters')
        if i==0:
            plt.ylabel('MSE-Loss of BGD')
        plt.title('alpha={}'.format(alpha))
    plt.show()


# 实验3：绘制模型的学习曲线，探讨模型的拟合效果
def draw_learning_curve(model, x, y):
    """结论：随着训练样本增多，训练集与测试集的损失值逐渐重合，说明数据量已经不是影响模型效果的关键了"""
    train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, random_state=3)
    num_of_train = range(1, len(train_x) + 1, 3)

    for n in range(2):
        plt.subplot(121+n)
        if n == 0:
            plt.title('Learning Curve of NE')
        else:
            plt.title('Learning Curve of BGD')
        train_scores, test_scores = [], []

        # 逐渐增大训练样本，查看模型在训练集和测试集上的MSE损失值
        for i in num_of_train:
            # 分别查看对正规方程法和梯度下降法的影响
            if n == 0:
                model.fit_ne(train_x[:i, :], train_y[:i])
            else:
                model.fit_bgd(train_x[:i, :], train_y[:i])
            train_scores.append(model.mse_loss(train_x[:i, :], train_y[:i]))
            test_scores.append(model.mse_loss(test_x, test_y))
        plt.plot(num_of_train, train_scores, label='train')
        plt.plot(num_of_train, test_scores, label='test')
        plt.legend()
        plt.xlabel('num of training examples')
        plt.ylabel('MSE-Loss')
    plt.show()


# 实验4：探讨正则项系数对模型训练的影响
def lambda_test(model, x, y):
    """结论：正则项系数lambda由0变大，训练集损失值随之增大，测试集损失值一般先减小后增大（但有时会只有增大过程），lambda在[1,2]区间表现较好"""
    lamdas = list(range(50))
    train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.25, random_state=7)

    for i in range(2):
        plt.subplot(121+i)
        if i == 0:
            plt.title('MSE-Loss of NE-REG')
        else:
            plt.title('MSE-Loss of BGD-REG')
        train_scores, test_scores = [], []
        for lamda in lamdas:
            # 分别查看正则项系数变化对正规方程法和梯度下降法的影响
            if i == 0:
                model.fit_ne(train_x, train_y, regularized=True, lamda=lamda)
            else:
                model.fit_bgd(train_x, train_y, regularized=True, lamda=lamda)
            train_scores.append(model.mse_loss(train_x, train_y))
            test_scores.append(model.mse_loss(test_x, test_y))
        plt.plot(range(len(lamdas)), train_scores, label='train')
        plt.plot(range(len(lamdas)), test_scores, label='test')
        plt.legend()
        plt.xlabel('lambda')
        plt.ylabel('MSE-Loss')
    plt.show()


if __name__ == '__main__':
    # 加载数据
    boston = load_boston()
    print("data: ", boston.data.shape)
    print("target: ", boston.target.shape)
    scaler = MinMaxScaler()
    scaling_data = scaler.fit_transform(boston.data)

    # 模型运行与测试
    test_id = 1
    lr = MyLinearRegression()
    # 简单运行
    if test_id == 0:
        cross_val_score(lr, scaling_data, boston.target, n_splits=5)

    # 实验1
    elif test_id == 1:
        compare_training_method(lr, scaling_data, boston.target)

    # 实验2
    elif test_id == 2:
        feature_scaling_test(lr, boston.data, boston.target)

    # 实验3
    elif test_id == 3:
        draw_learning_curve(lr, scaling_data, boston.target)

    # 实验4
    elif test_id == 4:
        lambda_test(lr, scaling_data, boston.target)

OVER，简单粗暴。

文章出处登录后可见！

已经登录？立即刷新

手写线性回归模型 + 相关实验（训练方法对比、确定学习率/正则项、特征缩放、学习曲线）

1.手写线性回归模型：

二、相关实验 (使用Boston房价数据集)

相关推荐