单输入单输出通道
# 手动实现卷积 单输出单输入通道
def my_conv2d(input_feature_map, kernel, stride=1, padding=0, bias=0):
if padding > 0:
input_feature_map = F.pad(input_feature_map, (padding, padding, padding, padding))
print(input_feature_map)
input_h, input_w = input_feature_map.shape
kernel_h, kernel_w = kernel.shape
output_w = math.floor((input_w - kernel_w + padding + stride) / stride) # 卷积输出的宽度
output_h = math.floor((input_h - kernel_h + padding + stride) / stride) # 卷积输出的高度
# 可见 要想 输出和输入的feature map大小相同则需要 将padding = kernel_w - 1
print(f"output_h:{output_h}, output_w:{output_w}")
output = torch.zeros(output_h, output_w)
for i in range(0, input_h-kernel_h+1, stride):
for j in range(0, input_w-kernel_w+1, stride):
region = input_feature_map[i:i+kernel_h, j:j+kernel_w] # 取出和kernel相同大小的区域在feature map中
output[int(i / stride), int(j / stride)] = torch.multiply(region, kernel).sum() + bias
return output
与 nn.functional.conv2d
比较
input_feature_map = torch.randint(0, 5, (5, 5)).float()
kernel = torch.ones(3, 3)
stride = 2
padding = 0
bias = torch.randn(1) # 和输出通道数维度相同 此处默认是1
# 手动实现 默认输出通道为1
output_feature_map = my_conv2d(input_feature_map, kernel, stride, padding, bias)
print(output_feature_map)
# pytorch API
output_api = F.conv2d(input_feature_map.unsqueeze(0).unsqueeze(0),
kernel.unsqueeze(0).unsqueeze(0), stride=stride, padding=padding, bias=bias)
print(output_api.squeeze(0).squeeze(0))
结果
tensor([[2., 1., 1., 3., 1.],
[4., 2., 2., 2., 2.],
[1., 4., 1., 3., 1.],
[3., 2., 2., 2., 4.],
[3., 4., 2., 1., 2.]])
output_h:2, output_w:2
tensor([[19.1882, 17.1882],
[23.1882, 19.1882]])
tensor([[19.1882, 17.1882],
[23.1882, 19.1882]])
多输入多输出通道
# 多输入多输出通道
import math
# 手动实现卷积 (batch_size, C, H, W)
def my_conv2d(input_feature_map, kernel, stride=1, padding=0, bias=0):
if padding > 0:
input_feature_map = F.pad(input_feature_map, (padding, padding, padding, padding))
print(input_feature_map.shape) # 从最后的维度开始进行pad 比如(3, 5, 5)就先从第三维度开始pad 按照提供的元组大size/2=需要pad的维度个数 一个维度两个pad长度值(前后),默认pad 0
B, _, input_h, input_w = input_feature_map.shape
out_channels, in_channels, kernel_h, kernel_w = kernel.shape
output_w = math.floor((input_w - kernel_w + padding + stride) / stride) # 卷积输出的宽度
output_h = math.floor((input_h - kernel_h + padding + stride) / stride) # 卷积输出的高度
# 可见 要想 输出和输入的feature map大小相同则需要 将padding = kernel_w - 1
print(f"output_h:{output_h}, output_w:{output_w}")
output = torch.zeros(B, out_channels, output_h, output_w)
for b in range(batch_size):
for out_c in range(out_channels):
for in_c in range(in_channels):
for i in range(0, input_h-kernel_h+1, stride):
for j in range(0, input_w-kernel_w+1, stride):
# print(kernel[out_c, in_c, ...].shape) # (K, K)
region = input_feature_map[b, in_c, i:i+kernel_h, j:j+kernel_w] # 取出和kernel相同大小的区域在feature map中
# print(region.shape) # (K, K)
output[b, out_c, int(i / stride), int(j / stride)] += torch.multiply(region, kernel[out_c, in_c, ...]).sum()
output[b, out_c, ...] += bias[out_c]
return output
与nn.functional.conv2d
比较
B = batch_size = 2
C = in_c = in_channels = 3
H = height = 5
W = weight = 5
input_feature_map = torch.randint(0, 5, (B, C, H, W)).float()
K = kernel_size = 3
out_c = out_channels = 2
kernel = torch.ones(out_c, in_c, K, K)
stride = 2
padding = 1
bias = torch.randn(out_c) # 和输出通道数维度相同
# print(bias)
# 手动实现
output_feature_map = my_conv2d(input_feature_map, kernel, stride, padding, bias)
print(output_feature_map)
# pytorch API funcitonal
api_output_feature_map = F.conv2d(input_feature_map, kernel, stride=stride, padding=padding, bias=bias)
print(api_output_feature_map)
# class Conv2d
# conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=False)
# output_fm = conv_layer(input_feature_map)
# print(output_fm.shape)
# print(conv_layer.weight.shape) # kernel 的大小和 batch_size 无关
结果
torch.Size([2, 3, 7, 7])
output_h:3, output_w:3
tensor([[[[13.8990, 25.8990, 20.8990],
[27.8990, 46.8990, 28.8990],
[23.8990, 33.8990, 17.8990]],
[[11.8909, 23.8909, 18.8909],
[25.8909, 44.8909, 26.8909],
[21.8909, 31.8909, 15.8909]]],
[[[21.8990, 32.8990, 21.8990],
[43.8990, 68.8990, 43.8990],
[23.8990, 42.8990, 31.8990]],
[[19.8909, 30.8909, 19.8909],
[41.8909, 66.8909, 41.8909],
[21.8909, 40.8909, 29.8909]]]])
tensor([[[[13.8990, 25.8990, 20.8990],
[27.8990, 46.8990, 28.8990],
[23.8990, 33.8990, 17.8990]],
[[11.8909, 23.8909, 18.8909],
[25.8909, 44.8909, 26.8909],
[21.8909, 31.8909, 15.8909]]],
[[[21.8990, 32.8990, 21.8990],
[43.8990, 68.8990, 43.8990],
[23.8990, 42.8990, 31.8990]],
[[19.8909, 30.8909, 19.8909],
[41.8909, 66.8909, 41.8909],
[21.8909, 40.8909, 29.8909]]]])
结论
由上述结果可看出,手动实现主要是依赖循环实现,需要注意的是,在手动实现过程中输出特征图形状的计算方式,以及bias的维度(bias的维度是输出通道的维度,也就是输出特征图每个通道特征图分配一个bias标量值,然后特征图的每个像素值都加相同的bias)。
文章出处登录后可见!
已经登录?立即刷新