  • 在预测框回归过程中,一旦预测框和GT框的宽高纵横比呈现线性比例时,CIoU中添加的相对比例的惩罚项便不再起作用
  • 根据预测框w和h的梯度公式可以推知,w和h在其中一个值增大时,另外一个值必须减小,它俩不能保持同增同减


  • 其中【魔改YOLOv5-6.x(4)】结合EIoU、Alpha-IoU损失函数【魔改YOLOv5-6.x(4)】结合EIoU、Alpha-IoU损失函数分别是预测框和GT框最小外接矩形的宽和高
  • EIoU将损失函数分成了三个部分:
    • 预测框和真实框的重叠损失【魔改YOLOv5-6.x(4)】结合EIoU、Alpha-IoU损失函数
    • 预测框和真实框的中心距离损失【魔改YOLOv5-6.x(4)】结合EIoU、Alpha-IoU损失函数
    • 预测框和真实框的宽和高损失【魔改YOLOv5-6.x(4)】结合EIoU、Alpha-IoU损失函数
  • EIOU损失的前两部分延续CIOU中的方法,而宽高损失直接使预测框与真实框的宽度和高度之差最小,使得收敛速度更快


  • GIoU的问题是使用最小外接矩形的面积减去并集的面积作为惩罚项,这导致了GIoU存在先扩大并集面积,再优化IoU的走弯路的问题
  • CIoU的问题是宽和高不能同时增大或者减小,而EIoU则可以


除此之外,论文中还提到了利用Focal Loss对EIOU进行加权处理:


  • utils/metrics.py中,找到bbox_iou函数,可以把原有的注释掉,换成下面的代码:
# 计算两个框的特定IOU
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, EIoU=False, eps=1e-7):
    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
    # 这里取转置,为了后续方便每个维度(坐标)之间的计算
    box2 = box2.T

    # Get the coordinates of bounding boxes
    if x1y1x2y2:  # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
    else:  # transform from xywh to xyxy 默认执行这里
        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2

    # Intersection area
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)

    # Union Area
    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
    union = w1 * h1 + w2 * h2 - inter + eps

    iou = inter / union
    # 目标框IOU损失函数的计算
    if CIoU or DIoU or GIoU or EIoU:
        # 两个框的最小闭包区域的width
        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
        # 两个框的最小闭包区域的height
        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height

        if CIoU or DIoU or EIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
            # 最小外接矩形 对角线的长度平方
            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
            # 两个框中心点之间距离的平方
            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
                    (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
            if DIoU:
                return iou - rho2 / c2  # DIoU

            # CIoU 比DIoU多了限制长宽比的因素:v * alpha
            elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                with torch.no_grad():
                    alpha = v / (v - iou + (1 + eps))
                return iou - (rho2 / c2 + v * alpha)

            # EIoU 在CIoU的基础上将纵横比的损失项拆分成预测的宽高分别与最小外接框宽高的差值 加速了收敛提高了回归精度
            elif EIoU:
                rho_w2 = ((b2_x2 - b2_x1) - (b1_x2 - b1_x1)) ** 2
                rho_h2 = ((b2_y2 - b2_y1) - (b1_y2 - b1_y1)) ** 2
                cw2 = cw ** 2 + eps
                ch2 = ch ** 2 + eps
                return iou - (rho2 / c2 + rho_w2 / cw2 + rho_h2 / ch2)

        # GIoU https://arxiv.org/pdf/1902.09630.pdf
        c_area = cw * ch + eps  # convex area
        return iou - (c_area - union) / c_area
    return iou  # IoU
  • utils/loss.py中,找到ComputeLoss类中的__call__()函数,把Regression loss中计算iou的代码,换成下面这句:
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=False, EIoU=True)  # iou(prediction, target)



由于IoU Loss对于bbox尺度不变,可以训练出更好的检测器,因此在目标检测中常采用IOU Loss对预测框计算定位回归损失(在YOLOv5中采用CIoU Loss)

而本文提出的Alpha-IoU Loss是基于现有IoU Loss的统一幂化,即对所有的IoU Loss,增加【魔改YOLOv5-6.x(4)】结合EIoU、Alpha-IoU损失函数幂,当【魔改YOLOv5-6.x(4)】结合EIoU、Alpha-IoU损失函数等于1时,则回归到原始各个Loss中:


# Alpha-IOU:https://arxiv.org/abs/2110.13675
# 参考:https://mp.weixin.qq.com/s/l22GJtA7Vd11dpY9QG4k2A
def bbox_alpha_iou(box1, box2, x1y1x2y2=False, GIoU=False, DIoU=False, CIoU=False, EIoU=False, alpha=3, eps=1e-9):
    # Returns tsqrt_he IoU of box1 to box2. box1 is 4, box2 is nx4
    box2 = box2.T

    # Get the coordinates of bounding boxes
    if x1y1x2y2:  # x1, y1, x2, y2 = box1
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
    else:  # transform from xywh to xyxy
        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2

    # Intersection area
    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)

    # Union Area
    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
    union = w1 * h1 + w2 * h2 - inter + eps

    # change iou into pow(iou+eps) 加入α次幂
    # alpha iou
    iou = torch.pow(inter / union + eps, alpha)
    beta = 2 * alpha
    if GIoU or DIoU or CIoU or EIoU:
        # 两个框的最小闭包区域的width和height
        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height

        if CIoU or DIoU or EIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
            # 最小外接矩形 对角线的长度平方
            c2 = cw ** beta + ch ** beta + eps  # convex diagonal
            rho_x = torch.abs(b2_x1 + b2_x2 - b1_x1 - b1_x2)
            rho_y = torch.abs(b2_y1 + b2_y2 - b1_y1 - b1_y2)
            # 两个框中心点之间距离的平方
            rho2 = (rho_x ** beta + rho_y ** beta) / (2 ** beta)  # center distance
            if DIoU:
                return iou - rho2 / c2  # DIoU

            elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                with torch.no_grad():
                    alpha_ciou = v / ((1 + eps) - inter / union + v)
                # return iou - (rho2 / c2 + v * alpha_ciou)  # CIoU
                return iou - (rho2 / c2 + torch.pow(v * alpha_ciou + eps, alpha))  # CIoU

            # EIoU 在CIoU的基础上
            # 将预测框宽高的纵横比损失项 拆分成预测框的宽高分别与最小外接框宽高的差值
            # 加速了收敛提高了回归精度
            elif EIoU:
                rho_w2 = ((b2_x2 - b2_x1) - (b1_x2 - b1_x1)) ** beta
                rho_h2 = ((b2_y2 - b2_y1) - (b1_y2 - b1_y1)) ** beta
                cw2 = cw ** beta + eps
                ch2 = ch ** beta + eps
                return iou - (rho2 / c2 + rho_w2 / cw2 + rho_h2 / ch2)

            # GIoU https://arxiv.org/pdf/1902.09630.pdf
            c_area = torch.max(cw * ch + eps, union)  # convex area
            return iou - torch.pow((c_area - union) / c_area + eps, alpha)  # GIoU
        return iou  # torch.log(iou+eps) or iou



