【深度学习】二维CNN卷积手动实现(单/多输入单/多输出通道)

乘风 • 2022年6月13日上午11:59 • 技术文章 • 阅读 345

Table of Contents

单输入单输出通道

# 手动实现卷积 单输出单输入通道
def my_conv2d(input_feature_map, kernel, stride=1, padding=0, bias=0):
    if padding > 0:
        input_feature_map = F.pad(input_feature_map, (padding, padding, padding, padding))
    print(input_feature_map)

    input_h, input_w = input_feature_map.shape
    kernel_h, kernel_w = kernel.shape

    output_w = math.floor((input_w - kernel_w + padding + stride) / stride)  # 卷积输出的宽度
    output_h = math.floor((input_h - kernel_h + padding + stride) / stride)  # 卷积输出的高度
    # 可见 要想 输出和输入的feature map大小相同则需要 将padding = kernel_w - 1
    print(f"output_h:{output_h}, output_w:{output_w}")

    output = torch.zeros(output_h, output_w)
    for i in range(0, input_h-kernel_h+1, stride):
        for j in range(0, input_w-kernel_w+1, stride):
            region = input_feature_map[i:i+kernel_h, j:j+kernel_w]  # 取出和kernel相同大小的区域在feature map中
            output[int(i / stride), int(j / stride)] = torch.multiply(region, kernel).sum() + bias
    
    return output

与 `nn.functional.conv2d`比较

input_feature_map = torch.randint(0, 5, (5, 5)).float()
kernel = torch.ones(3, 3)
stride = 2
padding = 0
bias = torch.randn(1)  # 和输出通道数维度相同 此处默认是1
# 手动实现 默认输出通道为1
output_feature_map = my_conv2d(input_feature_map, kernel, stride, padding, bias)
print(output_feature_map)

# pytorch API
output_api = F.conv2d(input_feature_map.unsqueeze(0).unsqueeze(0),
                        kernel.unsqueeze(0).unsqueeze(0), stride=stride, padding=padding, bias=bias)
print(output_api.squeeze(0).squeeze(0))

结果

tensor([[2., 1., 1., 3., 1.],
        [4., 2., 2., 2., 2.],
        [1., 4., 1., 3., 1.],
        [3., 2., 2., 2., 4.],
        [3., 4., 2., 1., 2.]])
output_h:2, output_w:2
tensor([[19.1882, 17.1882],
        [23.1882, 19.1882]])
tensor([[19.1882, 17.1882],
        [23.1882, 19.1882]])

多输入多输出通道

# 多输入多输出通道
import math
# 手动实现卷积 (batch_size, C, H, W)
def my_conv2d(input_feature_map, kernel, stride=1, padding=0, bias=0):
    if padding > 0:
        input_feature_map = F.pad(input_feature_map, (padding, padding, padding, padding))
    print(input_feature_map.shape)  # 从最后的维度开始进行pad 比如（3, 5, 5）就先从第三维度开始pad 按照提供的元组大size/2=需要pad的维度个数 一个维度两个pad长度值(前后)，默认pad 0

    B, _, input_h, input_w = input_feature_map.shape
    out_channels, in_channels, kernel_h, kernel_w = kernel.shape

    output_w = math.floor((input_w - kernel_w + padding + stride) / stride)  # 卷积输出的宽度
    output_h = math.floor((input_h - kernel_h + padding + stride) / stride)  # 卷积输出的高度
    # 可见 要想 输出和输入的feature map大小相同则需要 将padding = kernel_w - 1
    print(f"output_h:{output_h}, output_w:{output_w}")
    output = torch.zeros(B, out_channels, output_h, output_w)

    for b in range(batch_size):
        for out_c in range(out_channels):
            for in_c in range(in_channels):
                for i in range(0, input_h-kernel_h+1, stride):
                    for j in range(0, input_w-kernel_w+1, stride):
                        # print(kernel[out_c, in_c, ...].shape)  # (K, K)
                        region = input_feature_map[b, in_c, i:i+kernel_h, j:j+kernel_w]  # 取出和kernel相同大小的区域在feature map中
                        # print(region.shape)  # (K, K)
                        output[b, out_c, int(i / stride), int(j / stride)] += torch.multiply(region, kernel[out_c, in_c, ...]).sum()
            output[b, out_c, ...] += bias[out_c]
    
    return output

与`nn.functional.conv2d`比较

B = batch_size = 2
C = in_c = in_channels = 3
H = height = 5
W = weight = 5
input_feature_map = torch.randint(0, 5, (B, C, H, W)).float()
K = kernel_size = 3
out_c = out_channels = 2
kernel = torch.ones(out_c, in_c, K, K)
stride = 2
padding = 1
bias = torch.randn(out_c)  # 和输出通道数维度相同
# print(bias)

# 手动实现
output_feature_map = my_conv2d(input_feature_map, kernel, stride, padding, bias)
print(output_feature_map)

# pytorch API funcitonal
api_output_feature_map = F.conv2d(input_feature_map, kernel, stride=stride, padding=padding, bias=bias)
print(api_output_feature_map)

# class Conv2d
# conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=False)
# output_fm = conv_layer(input_feature_map)
# print(output_fm.shape)
# print(conv_layer.weight.shape)  # kernel 的大小和 batch_size 无关

结果

torch.Size([2, 3, 7, 7])
output_h:3, output_w:3
tensor([[[[13.8990, 25.8990, 20.8990],
          [27.8990, 46.8990, 28.8990],
          [23.8990, 33.8990, 17.8990]],

         [[11.8909, 23.8909, 18.8909],
          [25.8909, 44.8909, 26.8909],
          [21.8909, 31.8909, 15.8909]]],


        [[[21.8990, 32.8990, 21.8990],
          [43.8990, 68.8990, 43.8990],
          [23.8990, 42.8990, 31.8990]],

         [[19.8909, 30.8909, 19.8909],
          [41.8909, 66.8909, 41.8909],
          [21.8909, 40.8909, 29.8909]]]])
tensor([[[[13.8990, 25.8990, 20.8990],
          [27.8990, 46.8990, 28.8990],
          [23.8990, 33.8990, 17.8990]],

         [[11.8909, 23.8909, 18.8909],
          [25.8909, 44.8909, 26.8909],
          [21.8909, 31.8909, 15.8909]]],


        [[[21.8990, 32.8990, 21.8990],
          [43.8990, 68.8990, 43.8990],
          [23.8990, 42.8990, 31.8990]],

         [[19.8909, 30.8909, 19.8909],
          [41.8909, 66.8909, 41.8909],
          [21.8909, 40.8909, 29.8909]]]])

结论

由上述结果可看出，手动实现主要是依赖循环实现，需要注意的是，在手动实现过程中输出特征图形状的计算方式，以及bias的维度(bias的维度是输出通道的维度，也就是输出特征图每个通道特征图分配一个bias标量值，然后特征图的每个像素值都加相同的bias)。

文章出处登录后可见！

已经登录？立即刷新

cnn pytorch 深度学习

赞 (0)

乘风管理团队

0

Ubuntu18.04下针对Ros-melodic配置IKFAST

上一篇 2022年6月13日

三维重建系统 | L3双视角运动恢复结构（SFM双目SFM）

下一篇 2022年6月13日

此站出售，如需请站内私信或者邮箱！