文章目录

1.1 深度估计应用场景之一（特斯拉撞前预警）
1.2 深度估计概念
1.3 深度估计整体架构
1.4 深度估计架构流程论文解读
1.5 深度估计项目应用

1.1 深度估计应用场景之一（特斯拉撞前预警）

特斯拉自动驾驶

1.2 深度估计概念

深度估计，就是获取图像中场景里的每个点到相机的距离信息，这种距离信息组成的图我们称之为深度图，英文叫Depth map
深度估计效果

1.3 深度估计整体架构

深度估计整体模型

1.4 深度估计架构流程论文解读

一、backbone提取
深度估计backbone

代码实现：

class deepFeatureExtractor_ResNext101(nn.Module):
    def __init__(self,args, lv6 = False):
        super(deepFeatureExtractor_ResNext101, self).__init__()
        self.args = args
        # after passing ReLU   : H/2  x W/2
        # after passing Layer1 : H/4  x W/4
        # after passing Layer2 : H/8  x W/8
        # after passing Layer3 : H/16 x W/16
        self.encoder = models.resnext101_32x8d(pretrained=True)
        self.fixList = ['layer1.0','layer1.1','.bn']
        self.lv6 = lv6

        if lv6 is True:
            self.layerList = ['relu','layer1','layer2','layer3', 'layer4']
            self.dimList = [64, 256, 512, 1024,2048]
        else:
            del self.encoder.layer4
            del self.encoder.fc
            self.layerList = ['relu','layer1','layer2','layer3']
            self.dimList = [64, 256, 512, 1024]

        for name, parameters in self.encoder.named_parameters():
            if name == 'conv1.weight':
                parameters.requires_grad = False
            if any(x in name for x in self.fixList):
                parameters.requires_grad = False
        
    def forward(self, x):
        out_featList = []
        feature = x
        for k, v in self.encoder._modules.items():
            if k == 'avgpool':
                break
            feature = v(feature)
            #feature = v(features[-1])
            #features.append(feature)
            if any(x in k for x in self.layerList):
                out_featList.append(feature)
        return out_featList

二、差异特征提取
深度估计差异特征提取

代码实现：

def forward(self, x):
        print(x.shape)
        out_featList = self.encoder(x)
        rgb_down2 = F.interpolate(x, scale_factor = 0.5, mode='bilinear')
        print(rgb_down2.shape)
        rgb_down4 = F.interpolate(rgb_down2, scale_factor = 0.5, mode='bilinear')
        rgb_down8 = F.interpolate(rgb_down4, scale_factor = 0.5, mode='bilinear')
        rgb_down16 = F.interpolate(rgb_down8, scale_factor = 0.5, mode='bilinear')
        rgb_down32 = F.interpolate(rgb_down16, scale_factor = 0.5, mode='bilinear')
        print(rgb_down32.shape)
        rgb_up16 = F.interpolate(rgb_down32, rgb_down16.shape[2:], mode='bilinear')
        print(rgb_up16.shape)
        rgb_up8 = F.interpolate(rgb_down16, rgb_down8.shape[2:], mode='bilinear')
        rgb_up4 = F.interpolate(rgb_down8, rgb_down4.shape[2:], mode='bilinear')
        rgb_up2 = F.interpolate(rgb_down4, rgb_down2.shape[2:], mode='bilinear')
        rgb_up = F.interpolate(rgb_down2, x.shape[2:], mode='bilinear')
        print(rgb_up.shape)
        lap1 = x - rgb_up
        lap2 = rgb_down2 - rgb_up2
        lap3 = rgb_down4 - rgb_up4
        lap4 = rgb_down8 - rgb_up8
        lap5 = rgb_down16 - rgb_up16
        rgb_list = [rgb_down32, lap5, lap4, lap3, lap2, lap1]

        d_res_list, depth = self.decoder(out_featList, rgb_list)
        return d_res_list, depth

三、权重操作标准化

代码实现：

def forward(self, x):
        weight = self.weight
        weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2, keepdim=True).mean(dim=3, keepdim=True)
        weight = weight - weight_mean
        std = weight.view(weight.size(0), -1).std(dim=1).view(-1,1,1,1) + 1e-5
        #std = torch.sqrt(torch.var(weight.view(weight.size(0),-1),dim=1)+1e-12).view(-1,1,1,1)+1e-5
        weight = weight / std.expand_as(weight)
        return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)

四、网络结构ASPP（空洞卷积）
深度估计ASPP

代码实现

class Dilated_bottleNeck(nn.Module):
    def __init__(self, norm, act, in_feat):
        super(Dilated_bottleNeck, self).__init__()
        conv = conv_ws
        # in feat = 1024 in ResNext101 and ResNet101
        self.reduction1 = conv(in_feat, in_feat//2, kernel_size=1, stride = 1, bias=False, padding=0)
        self.aspp_d3 = nn.Sequential(myConv(in_feat//2, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat//2)//16),
                                    myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=3, dilation=3,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
        self.aspp_d6 = nn.Sequential(myConv(in_feat//2 + in_feat//4, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat//2 + in_feat//4)//16),
                                    myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=6, dilation=6,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
        self.aspp_d12 = nn.Sequential(myConv(in_feat, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat)//16),
                                    myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=12, dilation=12,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
        self.aspp_d18 = nn.Sequential(myConv(in_feat + in_feat//4, in_feat//4, kSize=1, stride=1, padding=0, dilation=1,bias=False, norm=norm, act=act, num_groups=(in_feat + in_feat//4)//16),
                                    myConv(in_feat//4, in_feat//4, kSize=3, stride=1, padding=18, dilation=18,bias=False, norm=norm, act=act, num_groups=(in_feat//4)//16))
        self.reduction2 = myConv(((in_feat//4)*4) + (in_feat//2), in_feat//2, kSize=3, stride=1, padding=1,bias=False, norm=norm, act=act, num_groups = ((in_feat//4)*4 + (in_feat//2))//16)
    def forward(self, x):
        print(x.shape)
        x = self.reduction1(x)
        print(x.shape)
        d3 = self.aspp_d3(x)
        print(d3.shape)
        cat1 = torch.cat([x, d3],dim=1)
        print(cat1.shape)
        d6 = self.aspp_d6(cat1)
        print(d6.shape)
        cat2 = torch.cat([cat1, d6],dim=1)
        print(cat2.shape)
        d12 = self.aspp_d12(cat2)
        print(d12.shape)
        cat3 = torch.cat([cat2, d12],dim=1)
        print(cat3.shape)
        d18 = self.aspp_d18(cat3)
        print(d18.shape)
        out = self.reduction2(torch.cat([x,d3,d6,d12,d18], dim=1))
        print(out.shape)
        return out      # 512 x H/16 x W/16

五、coarst-to-fine特征拼接
实现代码：

 # decoder 1 - Pyramid level 5
        lap_lv5 = torch.sigmoid(self.decoder1(dense_feat))#R5
        print(lap_lv5.shape) 
        lap_lv5_up = self.upscale(lap_lv5, scale_factor = 2, mode='bilinear')
        print(lap_lv5_up.shape) 

        # decoder 2 - Pyramid level 4
        dec2 = self.decoder2_up1(dense_feat)
        print(dec2.shape)
        dec2 = self.decoder2_reduc1(torch.cat([dec2,cat3],dim=1))#252
        print(dec2.shape)
        dec2_up = self.decoder2_1(torch.cat([dec2,lap_lv5_up,rgb_lv4],dim=1))
        print(dec2_up.shape)
        dec2 = self.decoder2_2(dec2_up)
        print(dec2.shape)
        dec2 = self.decoder2_3(dec2)
        print(dec2.shape)
        lap_lv4 = torch.tanh(self.decoder2_4(dec2) + (0.1*rgb_lv4.mean(dim=1,keepdim=True)))
        print(lap_lv4.shape)                 
        # if depth range is (0,1), laplacian of image range is (-1,1)
        lap_lv4_up = self.upscale(lap_lv4, scale_factor = 2, mode='bilinear')
        print(lap_lv4_up.shape)
        
        # decoder 2 - Pyramid level 3
        dec3 = self.decoder2_1_up2(dec2_up)
        dec3 = self.decoder2_1_reduc2(torch.cat([dec3,cat2],dim=1))
        dec3_up = self.decoder2_1_1(torch.cat([dec3,lap_lv4_up,rgb_lv3],dim=1))
        dec3 = self.decoder2_1_2(dec3_up)
        lap_lv3 = torch.tanh(self.decoder2_1_3(dec3) + (0.1*rgb_lv3.mean(dim=1,keepdim=True)))                 
        # if depth range is (0,1), laplacian of image range is (-1,1)
        lap_lv3_up = self.upscale(lap_lv3, scale_factor = 2, mode='bilinear')
        # decoder 2 - Pyramid level 2
        dec4 = self.decoder2_1_1_up3(dec3_up)
        dec4 = self.decoder2_1_1_reduc3(torch.cat([dec4,cat1],dim=1))
        dec4_up = self.decoder2_1_1_1(torch.cat([dec4,lap_lv3_up,rgb_lv2],dim=1))

        lap_lv2 = torch.tanh(self.decoder2_1_1_2(dec4_up) + (0.1*rgb_lv2.mean(dim=1,keepdim=True)))                  
        # if depth range is (0,1), laplacian of image range is (-1,1)
        lap_lv2_up = self.upscale(lap_lv2, scale_factor = 2, mode='bilinear')
        # decoder 2 - Pyramid level 1
        dec5 = self.decoder2_1_1_1_up4(dec4_up)
        dec5 = self.decoder2_1_1_1_1(torch.cat([dec5,lap_lv2_up,rgb_lv1],dim=1))
        dec5 = self.decoder2_1_1_1_2(dec5)
        lap_lv1 = torch.tanh(self.decoder2_1_1_1_3(dec5) + (0.1*rgb_lv1.mean(dim=1,keepdim=True)))
        # if depth range is (0,1), laplacian of image range is (-1,1)
        
        # Laplacian restoration
        lap_lv4_img = lap_lv4 + lap_lv5_up
        lap_lv3_img = lap_lv3 + self.upscale(lap_lv4_img, scale_factor = 2, mode = 'bilinear')
        lap_lv2_img = lap_lv2 + self.upscale(lap_lv3_img, scale_factor = 2, mode = 'bilinear')
        final_depth = lap_lv1 + self.upscale(lap_lv2_img, scale_factor = 2, mode = 'bilinear')
        final_depth = torch.sigmoid(final_depth)
        print(final_depth.shape)
        return [(lap_lv5)*self.max_depth, (lap_lv4)*self.max_depth, (lap_lv3)*self.max_depth, (lap_lv2)*self.max_depth, (lap_lv1)*self.max_depth], final_depth*self.max_depth
        # fit laplacian image range (-80,80), depth image range(0,80)

六、损失计算代码实现（正则化）：
深度估计损失计算
代码实现

def scale_invariant_loss(valid_out, valid_gt):
    logdiff = torch.log(valid_out) - torch.log(valid_gt)
    scale_inv_loss = torch.sqrt((logdiff ** 2).mean() - 0.85*(logdiff.mean() ** 2))*10.0
    return scale_inv_loss

1.5 深度估计项目应用

深度估计项目效果
如果需要本文完整项目代码，以上算法论文或者点数据集资源的小伙伴可以私信我哦！

文章出处登录后可见！

已经登录？立即刷新

自动驾驶算法 -撞前预警论文解读与项目应用

文章目录

1.1 深度估计应用场景之一（特斯拉撞前预警）

1.2 深度估计概念

1.3 深度估计整体架构

1.4 深度估计架构流程论文解读

1.5 深度估计项目应用

相关推荐