0.导语
Scipy是一个用于数学、科学、工程领域的常用软件包,可以处理插值、积分、优化、图像处理、常微分方程数值解的求解、信号处理等问题。它用于有效计算Numpy矩阵,使Numpy和Scipy协同工作,高效解决问题。
Scipy是由针对特定任务的子模块组成:
模块名 | 应用领域 |
---|---|
scipy.cluster | 向量计算/Kmeans |
scipy.constants | 物理和数学常量 |
scipy.fftpack | 傅立叶变换 |
scipy.integrate | 积分程序 |
scipy.interpolate | 插值 |
scipy.io | 数据输入输出 |
scipy.linalg | 线性代数程序 |
scipy.ndimage | n维图像包 |
scipy.odr | 正交距离回归 |
scipy.optimize | 优化 |
scipy.signal | 信号处理 |
scipy.sparse | 稀疏矩阵 |
scipy.spatial | 空间数据结构和算法 |
scipy.special | 一些特殊的数学函数 |
scipy.stats | 统计 |
备注:本文代码可以在github下载
https://github.com/fengdu78/Data-Science-Notes/tree/master/4.scipy
1.SciPy-数值计算库
import numpy as np
import pylab as pl
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
import scipy
scipy.__version__#查看版本
'1.0.0'
常数和特殊函数
from scipy import constants as C
print (C.c) # 真空中的光速
print (C.h) # 普朗克常数
299792458.0
6.62607004e-34
C.physical_constants["electron mass"]
(9.10938356e-31, 'kg', 1.1e-38)
# 1英里等于多少米, 1英寸等于多少米, 1克等于多少千克, 1磅等于多少千克
print(C.mile)
print(C.inch)
print(C.gram)
print(C.pound)
1609.3439999999998
0.0254
0.001
0.45359236999999997
import scipy.special as S
print (1 + 1e-20)
print (np.log(1+1e-20))
print (S.log1p(1e-20))
1.0
0.0
1e-20
m = np.linspace(0.1, 0.9, 4)
u = np.linspace(-10, 10, 200)
results = S.ellipj(u[:, None], m[None, :])
print([y.shape for y in results])
[(200, 4), (200, 4), (200, 4), (200, 4)]
#%figonly=使用广播计算得到的`ellipj()`返回值
fig, axes = pl.subplots(2, 2, figsize=(12, 4))
labels = ["$sn$", "$cn$", "$dn$", "$\phi$"]
for ax, y, label in zip(axes.ravel(), results, labels):
ax.plot(u, y)
ax.set_ylabel(label)
ax.margins(0, 0.1)
axes[1, 1].legend(["$m={:g}$".format(m_) for m_ in m], loc="best", ncol=2);
2.拟合与优化-optimize
非线性方程组求解
import pylab as pl
import numpy as np
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
from math import sin, cos
from scipy import optimize
def f(x): #❶
x0, x1, x2 = x.tolist() #❷
return [
5*x1+3,
4*x0*x0 - 2*sin(x1*x2),
x1*x2 - 1.5
]
# f计算方程组的误差,[1,1,1]是未知数的初始值
result = optimize.fsolve(f, [1,1,1]) #❸
print (result)
print (f(result))
[-0.70622057 -0.6 -2.5 ]
[0.0, -9.126033262418787e-14, 5.329070518200751e-15]
def j(x): #❶
x0, x1, x2 = x.tolist()
return [[0, 5, 0],
[8 * x0, -2 * x2 * cos(x1 * x2), -2 * x1 * cos(x1 * x2)],
[0, x2, x1]]
result = optimize.fsolve(f, [1, 1, 1], fprime=j) #❷
print(result)
print(f(result))
[-0.70622057 -0.6 -2.5 ]
[0.0, -9.126033262418787e-14, 5.329070518200751e-15]
最小二乘拟合
import numpy as np
from scipy import optimize
X = np.array([ 8.19, 2.72, 6.39, 8.71, 4.7 , 2.66, 3.78])
Y = np.array([ 7.01, 2.78, 6.47, 6.71, 4.1 , 4.23, 4.05])
def residuals(p): #❶
"计算以p为参数的直线和原始数据之间的误差"
k, b = p
return Y - (k*X + b)
# leastsq使得residuals()的输出数组的平方和最小,参数的初始值为[1,0]
r = optimize.leastsq(residuals, [1, 0]) #❷
k, b = r[0]
print ("k =",k, "b =",b)
k = 0.6134953491930442 b = 1.794092543259387
#%figonly=最小化正方形面积之和(左),误差曲面(右)
scale_k = 1.0
scale_b = 10.0
scale_error = 1000.0
def S(k, b):
"计算直线y=k*x+b和原始数据X、Y的误差的平方和"
error = np.zeros(k.shape)
for x, y in zip(X, Y):
error += (y - (k * x + b)) ** 2
return error
ks, bs = np.mgrid[k - scale_k:k + scale_k:40j, b - scale_b:b + scale_b:40j]
error = S(ks, bs) / scale_error
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.patches import Rectangle
fig = pl.figure(figsize=(12, 5))
ax1 = pl.subplot(121)
ax1.plot(X, Y, "o")
X0 = np.linspace(2, 10, 3)
Y0 = k*X0 + b
ax1.plot(X0, Y0)
for x, y in zip(X, Y):
y2 = k*x+b
rect = Rectangle((x,y), abs(y-y2), y2-y, facecolor="red", alpha=0.2)
ax1.add_patch(rect)
ax1.set_aspect("equal")
ax2 = fig.add_subplot(122, projection='3d')
ax2.plot_surface(
ks, bs / scale_b, error, rstride=3, cstride=3, cmap="jet", alpha=0.5)
ax2.scatter([k], [b / scale_b], [S(k, b) / scale_error], c="r", s=20)
ax2.set_xlabel("$k$")
ax2.set_ylabel("$b$")
ax2.set_zlabel("$error$");
#%fig=带噪声的正弦波拟合
def func(x, p): #❶
"""
数据拟合所用的函数: A*sin(2*pi*k*x + theta)
"""
A, k, theta = p
return A * np.sin(2 * np.pi * k * x + theta)
def residuals(p, y, x): #❷
"""
实验数据x, y和拟合函数之间的差,p为拟合需要找到的系数
"""
return y - func(x, p)
x = np.linspace(0, 2 * np.pi, 100)
A, k, theta = 10, 0.34, np.pi / 6 # 真实数据的函数参数
y0 = func(x, [A, k, theta]) # 真实数据
# 加入噪声之后的实验数据
np.random.seed(0)
y1 = y0 + 2 * np.random.randn(len(x)) #❸
p0 = [7, 0.40, 0] # 第一次猜测的函数拟合参数
# 调用leastsq进行数据拟合
# residuals为计算误差的函数
# p0为拟合参数的初始值
# args为需要拟合的实验数据
plsq = optimize.leastsq(residuals, p0, args=(y1, x)) #❹
print(u"真实参数:", [A, k, theta])
print(u"拟合参数", plsq[0]) # 实验数据拟合后的参数
pl.plot(x, y1, "o", label=u"带噪声的实验数据")
pl.plot(x, y0, label=u"真实数据")
pl.plot(x, func(x, plsq[0]), label=u"拟合数据")
pl.legend(loc="best")
真实参数: [10, 0.34, 0.5235987755982988]
拟合参数 [10.25218748 0.3423992 0.50817423]
def func2(x, A, k, theta):
return A*np.sin(2*np.pi*k*x+theta)
popt, _ = optimize.curve_fit(func2, x, y1, p0=p0)
print (popt)
[10.25218748 0.3423992 0.50817425]
popt, _ = optimize.curve_fit(func2, x, y1, p0=[10, 1, 0])
print(u"真实参数:", [A, k, theta])
print(u"拟合参数", popt)
真实参数: [10, 0.34, 0.5235987755982988]
拟合参数 [ 0.71093469 1.02074585 -0.12776742]
计算函数局域最小值
def target_function(x, y):
return (1 - x)**2 + 100 * (y - x**2)**2
class TargetFunction(object):
def __init__(self):
self.f_points = []
self.fprime_points = []
self.fhess_points = []
def f(self, p):
x, y = p.tolist()
z = target_function(x, y)
self.f_points.append((x, y))
return z
def fprime(self, p):
x, y = p.tolist()
self.fprime_points.append((x, y))
dx = -2 + 2 * x - 400 * x * (y - x**2)
dy = 200 * y - 200 * x**2
return np.array([dx, dy])
def fhess(self, p):
x, y = p.tolist()
self.fhess_points.append((x, y))
return np.array([[2 * (600 * x**2 - 200 * y + 1), -400 * x],
[-400 * x, 200]])
def fmin_demo(method):
target = TargetFunction()
init_point = (-1, -1)
res = optimize.minimize(
target.f,
init_point,
method=method,
jac=target.fprime,
hess=target.fhess)
return res, [
np.array(points) for points in (target.f_points, target.fprime_points,
target.fhess_points)
]
methods = ("Nelder-Mead", "Powell", "CG", "BFGS", "Newton-CG", "L-BFGS-B")
for method in methods:
res, (f_points, fprime_points, fhess_points) = fmin_demo(method)
print(
"{:12s}: min={:12g}, f count={:3d}, fprime count={:3d}, fhess count={:3d}"
.format(method, float(res["fun"]), len(f_points), len(fprime_points),
len(fhess_points)))
Nelder-Mead : min= 5.30934e-10, f count=125, fprime count= 0, fhess count= 0
Powell : min= 0, f count= 52, fprime count= 0, fhess count= 0
CG : min= 9.63056e-21, f count= 39, fprime count= 39, fhess count= 0
BFGS : min= 1.84992e-16, f count= 40, fprime count= 40, fhess count= 0
Newton-CG : min= 5.22666e-10, f count= 60, fprime count= 97, fhess count= 38
L-BFGS-B : min= 6.5215e-15, f count= 33, fprime count= 33, fhess count= 0
#%figonly=各种优化算法的搜索路径
def draw_fmin_demo(f_points, fprime_points, ax):
xmin, xmax = -3, 3
ymin, ymax = -3, 3
Y, X = np.ogrid[ymin:ymax:500j,xmin:xmax:500j]
Z = np.log10(target_function(X, Y))
zmin, zmax = np.min(Z), np.max(Z)
ax.imshow(Z, extent=(xmin,xmax,ymin,ymax), origin="bottom", aspect="auto", cmap="gray")
ax.plot(f_points[:,0], f_points[:,1], lw=1)
ax.scatter(f_points[:,0], f_points[:,1], c=range(len(f_points)), s=50, linewidths=0)
if len(fprime_points):
ax.scatter(fprime_points[:, 0], fprime_points[:, 1], marker="x", color="w", alpha=0.5)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
fig, axes = pl.subplots(2, 3, figsize=(9, 6))
methods = ("Nelder-Mead", "Powell", "CG", "BFGS", "Newton-CG", "L-BFGS-B")
for ax, method in zip(axes.ravel(), methods):
res, (f_points, fprime_points, fhess_points) = fmin_demo(method)
draw_fmin_demo(f_points, fprime_points, ax)
ax.set_aspect("equal")
ax.set_title(method)
计算全域最小值
def func(x, p):
A, k, theta = p
return A*np.sin(2*np.pi*k*x+theta)
def func_error(p, y, x):
return np.sum((y - func(x, p))**2)
x = np.linspace(0, 2*np.pi, 100)
A, k, theta = 10, 0.34, np.pi/6
y0 = func(x, [A, k, theta])
np.random.seed(0)
y1 = y0 + 2 * np.random.randn(len(x))
result = optimize.basinhopping(func_error, (1, 1, 1),
niter = 10,
minimizer_kwargs={"method":"L-BFGS-B",
"args":(y1, x)})
print (result.x)
[10.25218676 -0.34239909 2.63341581]
#%figonly=用`basinhopping()`拟合正弦波
pl.plot(x, y1, "o", label=u"带噪声的实验数据")
pl.plot(x, y0, label=u"真实数据")
pl.plot(x, func(x, result.x), label=u"拟合数据")
pl.legend(loc="best");
3.线性代数-linalg
解线性方程组
import pylab as pl
import numpy as np
from scipy import linalg
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
import numpy as np
from scipy import linalg
m, n = 500, 50
A = np.random.rand(m, m)
B = np.random.rand(m, n)
X1 = linalg.solve(A, B)
X2 = np.dot(linalg.inv(A), B)
print (np.allclose(X1, X2))
%timeit linalg.solve(A, B)
%timeit np.dot(linalg.inv(A), B)
True
5.38 ms ± 120 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
8.14 ms ± 994 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
luf = linalg.lu_factor(A)
X3 = linalg.lu_solve(luf, B)
np.allclose(X1, X3)
True
M, N = 1000, 100
np.random.seed(0)
A = np.random.rand(M, M)
B = np.random.rand(M, N)
Ai = linalg.inv(A)
luf = linalg.lu_factor(A)
%timeit linalg.inv(A)
%timeit np.dot(Ai, B)
%timeit linalg.lu_factor(A)
%timeit linalg.lu_solve(luf, B)
50.6 ms ± 1.94 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
3.49 ms ± 306 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
20.1 ms ± 1.42 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
4.49 ms ± 65 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
最小二乘解
from numpy.lib.stride_tricks import as_strided
def make_data(m, n, noise_scale): #❶
np.random.seed(42)
x = np.random.standard_normal(m)
h = np.random.standard_normal(n)
y = np.convolve(x, h)
yn = y + np.random.standard_normal(len(y)) * noise_scale * np.max(y)
return x, yn, h
def solve_h(x, y, n): #❷
X = as_strided(
x, shape=(len(x) - n + 1, n), strides=(x.itemsize, x.itemsize)) #❸
Y = y[n - 1:len(x)] #❹
h = linalg.lstsq(X, Y) #❺
return h[0][::-1] #❻
x, yn, h = make_data(1000, 100, 0.4)
H1 = solve_h(x, yn, 120)
H2 = solve_h(x, yn, 80)
print("Average error of H1:", np.mean(np.abs(h[:100] - h)))
print("Average error of H2:", np.mean(np.abs(h[:80] - H2)))
Average error of H1: 0.0
Average error of H2: 0.2958422158342371
#%figonly=实际的系统参数与最小二乘解的比较
fig, (ax1, ax2) = pl.subplots(2, 1, figsize=(6, 4))
ax1.plot(h, linewidth=2, label=u"实际的系统参数")
ax1.plot(H1, linewidth=2, label=u"最小二乘解H1", alpha=0.7)
ax1.legend(loc="best", ncol=2)
ax1.set_xlim(0, len(H1))
ax2.plot(h, linewidth=2, label=u"实际的系统参数")
ax2.plot(H2, linewidth=2, label=u"最小二乘解H2", alpha=0.7)
ax2.legend(loc="best", ncol=2)
ax2.set_xlim(0, len(H1));
特征值和特征向量
A = np.array([[1, -0.3], [-0.1, 0.9]])
evalues, evectors = linalg.eig(A)
print(evalues)
print(evectors)
[1.13027756+0.j 0.76972244+0.j]
[[ 0.91724574 0.79325185]
[-0.3983218 0.60889368]]
#%figonly=线性变换将蓝色箭头变换为红色箭头
points = np.array([[0, 1.0], [1.0, 0], [1, 1]])
def draw_arrows(points, **kw):
props = dict(color="blue", arrowstyle="->")
props.update(kw)
for x, y in points:
pl.annotate("",
xy=(x, y), xycoords='data',
xytext=(0, 0), textcoords='data',
arrowprops=props)
draw_arrows(points)
draw_arrows(np.dot(A, points.T).T, color="red")
draw_arrows(evectors.T, alpha=0.7, linewidth=2)
draw_arrows(np.dot(A, evectors).T, color="red", alpha=0.7, linewidth=2)
ax = pl.gca()
ax.set_aspect("equal")
ax.set_xlim(-0.5, 1.1)
ax.set_ylim(-0.5, 1.1);
np.random.seed(42)
t = np.random.uniform(0, 2*np.pi, 60)
alpha = 0.4
a = 0.5
b = 1.0
x = 1.0 + a*np.cos(t)*np.cos(alpha) - b*np.sin(t)*np.sin(alpha)
y = 1.0 + a*np.cos(t)*np.sin(alpha) - b*np.sin(t)*np.cos(alpha)
x += np.random.normal(0, 0.05, size=len(x))
y += np.random.normal(0, 0.05, size=len(y))
D = np.c_[x**2, x*y, y**2, x, y, np.ones_like(x)]
A = np.dot(D.T, D)
C = np.zeros((6, 6))
C[[0, 1, 2], [2, 1, 0]] = 2, -1, 2
evalues, evectors = linalg.eig(A, C) #❶
evectors = np.real(evectors)
err = np.mean(np.dot(D, evectors)**2, 0) #❷
p = evectors[:, np.argmin(err) ] #❸
print (p)
[-0.55214278 0.5580915 -0.23809922 0.54584559 -0.08350449 -0.14852803]
#%figonly=用广义特征向量计算的拟合椭圆
def ellipse(p, x, y):
a, b, c, d, e, f = p
return a*x**2 + b*x*y + c*y**2 + d*x + e*y + f
X, Y = np.mgrid[0:2:100j, 0:2:100j]
Z = ellipse(p, X, Y)
pl.plot(x, y, "ro", alpha=0.5)
pl.contour(X, Y, Z, levels=[0]);
文章出处登录后可见!
已经登录?立即刷新