这是牛马我第一次发CSDN,废话不多说,直接上代码:
1.手写线性回归模型:
包括:正规方程法+梯度下降法+可选正则项,MSE作为损失值(可绘图展示Loss变化曲线),R2作为评价值。
import numpy as np
import matplotlib.pyplot as plt
# 手写线性规划模型(含梯度下降法与正规方程法,并可选择是否引入正则项、是否动态调整学习率)
class MyLinearRegression:
def __init__(self):
self.intercept = None # 截距(即w)
self.coef = None # 系数(即b)
self.theta = None # 将截距与系数合并为θ
self.bgd_params = {} # 梯度下降的参数
def reset_params(self, theta, **bgd_params):
"""设置系数和参数"""
theta = np.reshape(theta, (-1,)) # 将系数转为numpy向量格式
self.theta = theta
self.intercept = self.theta[0]
self.coef = self.theta[1:]
self.bgd_params = bgd_params
return self
def fit(self, train_x, train_y, method=2):
"""快速进入不同的训练方法"""
if method in [1, 'ne']:
self.fit_ne(train_x, train_y)
elif method in [2, 'bgd']:
self.fit_bgd(train_x, train_y)
elif method in [3, 'ne_reg']:
self.fit_ne(train_x, train_y, regularized=True)
elif method in [4, 'bgd_reg']:
self.fit_bgd(train_x, train_y, regularized=True)
def fit_ne(self, train_x, train_y,
regularized=False, lamda=1.5):
"""使用正规方程法训练模型(可选择是否正则化)"""
# 若不选择正则项,则将其系数设为0
lamda = 0 if not regularized else lamda
# 在原有x上加一行1,用于与截距相乘,形成X
X = np.hstack([np.ones((len(train_x), 1)), train_x])
reg = lamda*np.eye(X.shape[1])
reg[0, 0] = 0
theta = np.linalg.pinv(X.T.dot(X) + reg)
theta = theta.dot(X.T).dot(train_y)
self.reset_params(theta)
def fit_bgd(self, train_x, train_y,
alpha=0.1, iters=20000, regularized=False, lamda=1.5,
alpha_v_step=5000, alpha_v_rate=0.95, loss_show=False,
theta=None):
"""使用梯度下降法训练模型(可选择是否正则化)"""
# 初始化
X = np.hstack([np.ones((len(train_x), 1)), train_x])
num_data, len_theta = X.shape[0], X.shape[1]
self.theta = np.ones((len_theta, 1)) if theta is None else theta
lamda = 0 if not regularized else lamda # 若不选择正则项,则将其系数设为0
losses = [] # 记录迭代时的损失值变化
# 梯度下降
for i in range(iters):
# 对MSE求导
res = np.reshape(np.dot(X, self.theta), (-1,))
error = res - train_y
update = [np.reshape(error * X[:, j], (-1, 1)) for j in range(len_theta)]
update = np.hstack(update)
update = np.reshape(np.mean(update, axis=0), (-1, 1))
# 更新学习率(每隔一定的迭代次数就按比缩小学习率)
if i > 0 and i % alpha_v_step == 0:
alpha = alpha * alpha_v_rate
# 更新参数(若含正则项,则会在梯度下降前适当缩小原系数)
self.theta = self.theta * (1-alpha*(lamda/num_data)) - alpha*update
losses.append(self.mse_loss(train_x, train_y))
# 绘图展示迭代过程中损失值的变化
self.reset_params(self.theta, alpha=alpha, iters=iters, lamda=lamda)
if loss_show:
plt.plot(range(len(losses)), losses)
plt.title("MSE-Loss of BGD") # 图形标题
plt.xlabel("iters") # x轴名称
plt.show()
return losses
def predict(self, pred_x):
pred_x = np.hstack([np.ones((len(pred_x), 1)), pred_x])
pred_y = pred_x.dot(self.theta)
# 非常重要的一句!保证输出结果是向量!
pred_y = np.reshape(pred_y, (-1,))
return pred_y
def mse_loss(self, x, y):
"""MSE损失函数"""
pred_y = self.predict(x)
mse = np.sum(np.power((pred_y - y), 2)) / y.shape[0]
return round(mse, 4)
def score(self, x, y):
"""使用R2_score进行模型评价"""
mse = self.mse_loss(x, y)
var = np.var(y)
return round(1-mse/var, 4)
二、相关实验 (使用Boston房价数据集)
实施的:
1. 实现四种不同的线性回归模型训练方式:正规方程,正规方程+正则化,梯度下降,梯度下降+正则化
2. 使用K折交叉验证法对比四种训练方式的效果
3. 探讨了feature scaling的作用(不进行缩放 / 进行标准化 / 进行归一化)
4. 绘制模型的学习曲线,探讨模型的拟合效果
5. 探讨正则项系数大小对于模型的影响
接下来要尝试的事情:
1. 损失函数使用的是MSE,若换为其他函数呢?比如计算均立方误差/均差?同理,正则项是否也可以立方?
2. 其他的复杂优化方法,如共轭梯度法,拟牛顿法。
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import MinMaxScaler, scale
import matplotlib.pyplot as plt
from myLinearRegression import MyLinearRegression
# K折交叉验证函数(默认为5折)
def cross_val_score(model, x, y, n_splits=5, fit_method='bgd', image_show=True):
"""
:param model: 线性模型
:param x: 数据特征
:param y: 数据标签
:param n_splits: 分为几折进行交叉验证
:param fit_method: 训练模型的方式,可以选择:'ne','bgd','ne_reg','bgd_reg'(ne正规方程法,bgd批量梯度下降,reg正则化)
:param image_show: 是否绘图展示每一折验证集的结果
:return: K折交叉验证的平均MSE值,平均R2—score值
"""
mse_scores, r2_scores = [], []
kf = KFold(n_splits=n_splits, shuffle=True, random_state=2)
for train_index, test_index in kf.split(x):
train_x, train_y = x[train_index], y[train_index]
test_x, test_y = x[test_index], y[test_index]
# 重新训练
model.fit(train_x, train_y, method=fit_method)
# 测试集效果
pred_y = model.predict(test_x)
if image_show:
plt.subplot(231+len(mse_scores))
plt.plot(np.arange(len(test_y)), test_y, label='TRUE')
plt.plot(np.arange(len(test_y)), pred_y, label='PRED')
plt.legend()
# 计算模型MSE和R2
mse_scores.append(model.mse_loss(test_x, test_y))
r2_scores.append(model.score(test_x, test_y))
if image_show:
plt.show()
print("\nMethod='{}':\n\tCross Val MSEs:".format(fit_method), mse_scores)
print("\tCross Val R2_scores:", r2_scores)
return round(np.mean(mse_scores), 4), round(np.mean(r2_scores), 4)
# 实验1:对比四种不同的模型训练方式( [正规方程法; 梯度下降法] × [无正则化; 正则化] )
def compare_training_method(model, x, y):
"""结论:在波士顿房价数据集下,正规方程法最快且效果好,正则化效果会不稳定"""
methods, scores = ['ne', 'bgd', 'ne_reg', 'bgd_reg'], []
for method in methods:
score = cross_val_score(model, x, y, n_splits=5, fit_method=method, image_show=False)
scores.append(score)
print('\n\nAverage Score (MSE_Loss, R2_score): ')
print(dict(zip(methods, scores)))
# 实验2:特征缩放的必要性 + 不同学习率的Loss变化
def feature_scaling_test(model, x, y):
"""结论(详见报告):
1. 正规方程不需要特征缩放
2. 学习率越小,Loss下降越缓慢。但当学习率过大时,Loss将快速上升(缩放后学习率应小于1)。
3. 若未缩放特征,学习率的调整难度很大,其数量级只能控制在1e-6左右,否则在迭代时Loss将急速增加或下降极慢
4. MinMax归一化Loss下降先快后慢,Standard标准化Loss下降先慢后快,(这里的快慢是与另一方比较而言)但总体效果上,Standard缩放收敛更快。
"""
data1 = x
data2 = MinMaxScaler().fit_transform(x)
data3 = scale(x)
# 正规方程法
print('NE_TEST')
for d in [data1, data2, data3]:
print(cross_val_score(model, d, y, fit_method='ne', image_show=False))
# 梯度下降法
print('BGD-TEST')
# 不进行特征缩放
alphas = [1e-5, 1e-6, 1e-7]
losses = []
train_x, test_x, train_y, test_y = train_test_split(data1, y, test_size=0.25, random_state=1)
for alpha in alphas:
losses.append(model.fit_bgd(train_x, train_y, alpha=alpha))
plt.subplot(130+len(losses))
plt.plot(range(20000), losses[-1])
plt.title('No Scaling'+', alpha={}'.format(alpha))
plt.show()
# 进行标准化、归一化
alphas = [1e-2, 5e-3, 1e-3, 5e-4, 1e-4]
ymaxs = [200, 250, 300, 450, 500]
losses = []
for i, alpha in enumerate(alphas):
for d in [data2, data3]:
train_x, test_x, train_y, test_y = train_test_split(d, y, test_size=0.25, random_state=1)
losses.append(model.fit_bgd(train_x, train_y, alpha=alpha))
plt.subplot(151+i)
plt.plot(range(20000), losses[-1], label='Standard')
plt.plot(range(20000), losses[-2], label='MinMax')
plt.ylim((-1, ymaxs[i]))
plt.legend()
plt.xlabel('iters')
if i==0:
plt.ylabel('MSE-Loss of BGD')
plt.title('alpha={}'.format(alpha))
plt.show()
# 实验3:绘制模型的学习曲线,探讨模型的拟合效果
def draw_learning_curve(model, x, y):
"""结论:随着训练样本增多,训练集与测试集的损失值逐渐重合,说明数据量已经不是影响模型效果的关键了"""
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, random_state=3)
num_of_train = range(1, len(train_x) + 1, 3)
for n in range(2):
plt.subplot(121+n)
if n == 0:
plt.title('Learning Curve of NE')
else:
plt.title('Learning Curve of BGD')
train_scores, test_scores = [], []
# 逐渐增大训练样本,查看模型在训练集和测试集上的MSE损失值
for i in num_of_train:
# 分别查看对正规方程法和梯度下降法的影响
if n == 0:
model.fit_ne(train_x[:i, :], train_y[:i])
else:
model.fit_bgd(train_x[:i, :], train_y[:i])
train_scores.append(model.mse_loss(train_x[:i, :], train_y[:i]))
test_scores.append(model.mse_loss(test_x, test_y))
plt.plot(num_of_train, train_scores, label='train')
plt.plot(num_of_train, test_scores, label='test')
plt.legend()
plt.xlabel('num of training examples')
plt.ylabel('MSE-Loss')
plt.show()
# 实验4:探讨正则项系数对模型训练的影响
def lambda_test(model, x, y):
"""结论:正则项系数lambda由0变大,训练集损失值随之增大,测试集损失值一般先减小后增大(但有时会只有增大过程),lambda在[1,2]区间表现较好"""
lamdas = list(range(50))
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.25, random_state=7)
for i in range(2):
plt.subplot(121+i)
if i == 0:
plt.title('MSE-Loss of NE-REG')
else:
plt.title('MSE-Loss of BGD-REG')
train_scores, test_scores = [], []
for lamda in lamdas:
# 分别查看正则项系数变化对正规方程法和梯度下降法的影响
if i == 0:
model.fit_ne(train_x, train_y, regularized=True, lamda=lamda)
else:
model.fit_bgd(train_x, train_y, regularized=True, lamda=lamda)
train_scores.append(model.mse_loss(train_x, train_y))
test_scores.append(model.mse_loss(test_x, test_y))
plt.plot(range(len(lamdas)), train_scores, label='train')
plt.plot(range(len(lamdas)), test_scores, label='test')
plt.legend()
plt.xlabel('lambda')
plt.ylabel('MSE-Loss')
plt.show()
if __name__ == '__main__':
# 加载数据
boston = load_boston()
print("data: ", boston.data.shape)
print("target: ", boston.target.shape)
scaler = MinMaxScaler()
scaling_data = scaler.fit_transform(boston.data)
# 模型运行与测试
test_id = 1
lr = MyLinearRegression()
# 简单运行
if test_id == 0:
cross_val_score(lr, scaling_data, boston.target, n_splits=5)
# 实验1
elif test_id == 1:
compare_training_method(lr, scaling_data, boston.target)
# 实验2
elif test_id == 2:
feature_scaling_test(lr, boston.data, boston.target)
# 实验3
elif test_id == 3:
draw_learning_curve(lr, scaling_data, boston.target)
# 实验4
elif test_id == 4:
lambda_test(lr, scaling_data, boston.target)
OVER,简单粗暴。
文章出处登录后可见!
已经登录?立即刷新