什么是IOU
IOU是目标检测等任务当中,衡量网络标定框和给定框之间差距的一种衡量方式。
最初的IOU的计算公式为:
图示如下:
改进的IOU
传统的 IOU 在给定框和预测框之间,如果不重叠的话,则IOU的值为零,并且不会反应两个框之间的距离的远近,这样会导致下面两个问题:
1、我们无法知道两个框之间的距离,所以无法得知网络的现存的预测性能;
2、IOU为零的话,代表我们在标定框上面,没有梯度,我们无法更新使得网络的性能变得更优;
因为以上的问题,我们有了想法,想要推广IOU到非重叠的情况下,并且想要确保下面的几条也被满足:
1、遵循于IOU相同的定义,即将比较对象的形状属性编码为区域属性;
2、维持IOU的尺寸的不变性;
3、在重叠的情况下确保于IOU的强相关性;
所以有了GIOU、DIOU、CIOU等变体的出现;
GIOU
GIOU在IOU的基础上,减去了两个标定框外接最小矩形框和两个框之间的差值再比上外接最小矩形框的值,这样做的目的,将考虑了两个框外接最下矩形框的面积,保护使得外接最小矩形框的面积变得不可控制。
具体公式如下:
其中 C 为 A 和 B 的最小外接矩形
图示如下:
采用这种方式,可以较好的限制外接矩形的面积,从而不会使得标定框的优化出现没有梯度的现象。解决了IOU的梯度问题;
但同时GIOU也引入了新的问题:
1、收敛速度较慢,且不容易收敛。因为会引导网络预测框先变大,再去契合真值框的形状;
2、在标定框和真值框水平或者垂直的时候,退化为IOU;
GIOU的损失函数可以表示为:
DIOU
图示如下:
下面是 iou、giou和diou优化的对比:
下面是收敛速度的对比:
上面为GIOU的收敛速度,下面为DIOU的收敛速度,可以看出来,收敛速度提升很大。
DIOU公式如下:
DIOU的损失函数可以表示为:
$$
loss = 1 – DIOU
CIOU
考虑到bbox回归三要素中的长宽比还没被考虑到计算中,因此,进一步在DIoU的基础上提出了CIoU。又加入了一个形状的惩罚项,使得形状收敛的更快。
公式如下:
其中
对w、h进行求导的结果如下:
各种IOU代码如下
IOU代码及结果展示如下
结果为:
import cv2
import numpy as np
def IOU_score(box1,box2):
"""
计算两个区域的iou的值
para: box1 区域1的两个角的坐标值 x1,y1,x2,y2
para: box2 区域2的两个角的坐标值 x1,y1,x2,y2
"""
# 两个框的交
iou_x1 = max(box1[0], box2[0])
iou_y1 = max(box1[1], box2[1])
iou_x2 = min(box1[2], box2[2])
iou_y2 = min(box1[3], box2[3])
# 上面求出来的为交集的两个角的坐标
area_inter = max(0,(iou_x2 - iou_x1)) * max(0 , (iou_y2 - iou_y1))
# 计算两个区域的并集
area_all = ((box1[2] - box1[0]) * (box1[3] - box1[1])) + ((box2[2] - box2[0]) * (box2[3] - box2[1])) - area_inter
center_x = int((iou_x1 + iou_x2) / 2)
center_y = int((iou_y2 + iou_y1) / 2)
return float(area_inter / area_all) , (center_x,center_y)
def main():
img = np.zeros((512,512,3), np.uint8)
img.fill(255)
box1 = [50,50,300,300]
box2 = [51,51,301,301]
cv2.rectangle(img, (box1[0],box1[1]), (box1[2],box1[3]), (0, 0, 255), 2)
cv2.rectangle(img, (box2[0],box2[1]), (box2[2],box2[3]), (255, 0, 0), 2)
IOU , center = IOU_score(box1,box2)
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(img,"IOU = %.2f"%IOU,center,font,0.8,(0,0,0),2)
cv2.imshow("image",img)
cv2.waitKey()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
GIOU代码及结果展示如下
结果为:
import cv2
import numpy as np
def GIOU_score(box1,box2):
"""
计算两个区域的iou的值
para: box1 区域1的两个角的坐标值 x1,y1,x2,y2
para: box2 区域2的两个角的坐标值 x1,y1,x2,y2
"""
# 两个框的交
iou_x1 = max(box1[0], box2[0])
iou_y1 = max(box1[1], box2[1])
iou_x2 = min(box1[2], box2[2])
iou_y2 = min(box1[3], box2[3])
g_iou_x1 = min(box1[0], box2[0])
g_iou_y1 = min(box1[1], box2[1])
g_iou_x2 = max(box1[2], box2[2])
g_iou_y2 = max(box1[3], box2[3])
# 上面求出来的为交集的两个角的坐标
area_inter = max(0,(iou_x2 - iou_x1)) * max(0 , (iou_y2 - iou_y1))
# 计算两个区域的并集
area_union = max(0,((box1[2] - box1[0]) * (box1[3] - box1[1])) + ((box2[2] - box2[0]) * (box2[3] - box2[1])) - area_inter)
# 计算最小外接矩形
area_all = max(0,(g_iou_x2 - g_iou_x1) * (g_iou_y2 - g_iou_y1))
g_iou = max(0,area_inter/area_union) - max(0,area_all - area_union) / area_all
return float(g_iou) , (iou_x1,iou_y1,iou_x2,iou_y2) , (g_iou_x1,g_iou_y1,g_iou_x2,g_iou_y2)
def main():
img = np.zeros((512,512,3), np.uint8)
img.fill(255)
box1 = [50,50,300,300]
box2 = [100,100,400,400]
IOU , area_inter , area_all = GIOU_score(box1,box2)
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(img,"GIOU = %.2f"%IOU,(area_inter[0]+30,area_inter[1]+30),font,0.8,(0,0,0),2)
cv2.rectangle(img, (box1[0],box1[1]), (box1[2],box1[3]), (255, 0, 0), thickness = 3)
cv2.rectangle(img, (box2[0],box2[1]), (box2[2],box2[3]), (0, 255, 0), thickness = 3)
cv2.rectangle(img, (area_all[0],area_all[1]), (area_all[2],area_all[3]), (0, 0, 255), thickness = 3)
cv2.imshow("image",img)
cv2.waitKey()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
DIOU代码及结果展示如下
结果如下:
import cv2
import numpy as np
def DIOU_score(box1,box2):
"""
计算两个区域的iou的值
para: box1 区域1的两个角的坐标值 x1,y1,x2,y2
para: box2 区域2的两个角的坐标值 x1,y1,x2,y2
"""
# 两个框的交
iou_x1 = max(box1[0], box2[0])
iou_y1 = max(box1[1], box2[1])
iou_x2 = min(box1[2], box2[2])
iou_y2 = min(box1[3], box2[3])
d_x1 = max(0, (box1[2] + box1[0])/2)
d_y1 = max(0, (box1[3] + box1[1])/2)
d_x2 = max(0, (box2[2] + box2[0])/2)
d_y2 = max(0, (box2[3] + box2[1])/2)
c_x1 = min(box1[0], box2[0])
c_y1 = min(box1[1], box2[1])
c_x2 = max(box1[2], box2[2])
c_y2 = max(box1[3], box2[3])
# 上面求出来的为交集的两个角的坐标
area_inter = max(0,(iou_x2 - iou_x1)) * max(0 , (iou_y2 - iou_y1))
# 计算两个区域的并集
area_union = max(0,((box1[2] - box1[0]) * (box1[3] - box1[1])) + ((box2[2] - box2[0]) * (box2[3] - box2[1])) - area_inter)
# 计算最小外接矩形
c_2 = max(0,(c_x2 - c_x1))**2 + max(0,(c_y2 - c_y1))**2
d_2 = max(0,(d_x2 - d_x1))**2 + max(0,(d_y2 - d_y1))**2
g_iou = max(0,area_inter/area_union) - d_2/c_2
return float(g_iou) , (iou_x1,iou_y1,iou_x2,iou_y2) , (c_x1,c_y1,c_x2,c_y2), (int(d_x1),int(d_y1),int(d_x2),int(d_y2))
def main():
img = np.zeros((512,512,3), np.uint8)
img.fill(255)
box1 = [50,50,300,300]
box2 = [250,80,400,350]
IOU , area_inter , area_all , short_line = DIOU_score(box1,box2)
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(img,"GIOU = %.2f"%IOU,(area_inter[0]+30,area_inter[1]+30),font,0.8,(0,0,0),2)
cv2.rectangle(img, (box1[0],box1[1]), (box1[2],box1[3]), (255, 0, 0), thickness = 3)
cv2.rectangle(img, (box2[0],box2[1]), (box2[2],box2[3]), (0, 255, 0), thickness = 3)
cv2.rectangle(img, (area_all[0],area_all[1]), (area_all[2],area_all[3]), (0, 0, 255), thickness = 3)
cv2.line(img, (short_line[0],short_line[1]), (short_line[2],short_line[3]), (200,45,45),5)
cv2.line(img, (area_all[0],area_all[1]), (area_all[2],area_all[3]), (64,78,0),5)
cv2.imshow("image",img)
cv2.waitKey()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
CIOU代码及结果展示如下
结果如下:
import cv2
import numpy as np
from math import pi,atan
def CIOU_score(box1,box2):
"""
计算两个区域的iou的值
para: box1 区域1的两个角的坐标值 x1,y1,x2,y2
para: box2 区域2的两个角的坐标值 x1,y1,x2,y2
"""
# 两个框的交
iou_x1 = max(box1[0], box2[0])
iou_y1 = max(box1[1], box2[1])
iou_x2 = min(box1[2], box2[2])
iou_y2 = min(box1[3], box2[3])
d_x1 = max(0, (box1[2] + box1[0])/2)
d_y1 = max(0, (box1[3] + box1[1])/2)
d_x2 = max(0, (box2[2] + box2[0])/2)
d_y2 = max(0, (box2[3] + box2[1])/2)
c_x1 = min(box1[0], box2[0])
c_y1 = min(box1[1], box2[1])
c_x2 = max(box1[2], box2[2])
c_y2 = max(box1[3], box2[3])
w_gt = max(0,box2[2] - box2[0])
h_gt = max(0,box2[3] - box2[1])
w = max(0,box1[2] - box1[0])
h = max(0,box1[3] - box1[1])
# 上面求出来的为交集的两个角的坐标
area_inter = max(0,(iou_x2 - iou_x1)) * max(0 , (iou_y2 - iou_y1))
# 计算两个区域的并集
area_union = max(0,((box1[2] - box1[0]) * (box1[3] - box1[1])) + ((box2[2] - box2[0]) * (box2[3] - box2[1])) - area_inter)
iou = max(0,area_inter/area_union)
c_2 = max(0,(c_x2 - c_x1))**2 + max(0,(c_y2 - c_y1))**2
d_2 = max(0,(d_x2 - d_x1))**2 + max(0,(d_y2 - d_y1))**2
v = 4/pi**2 * (atan(w_gt/h_gt) - atan(w/h))**2
alpha = v / (1-iou + v)
c_iou = iou - d_2/c_2 - alpha * v
return float(c_iou) , (iou_x1,iou_y1,iou_x2,iou_y2) , (c_x1,c_y1,c_x2,c_y2), (int(d_x1),int(d_y1),int(d_x2),int(d_y2))
def main():
img = np.zeros((512,512,3), np.uint8)
img.fill(255)
box1 = [50,50,300,300]
box2 = [100,80,200,260]
IOU , area_inter , area_all , short_line = CIOU_score(box1,box2)
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(img,"GIOU = %.2f"%IOU,(area_inter[0]+30,area_inter[1]+30),font,0.8,(0,0,0),2)
cv2.rectangle(img, (box1[0],box1[1]), (box1[2],box1[3]), (255, 0, 0), thickness = 3)
cv2.rectangle(img, (box2[0],box2[1]), (box2[2],box2[3]), (0, 255, 0), thickness = 3)
cv2.rectangle(img, (area_all[0],area_all[1]), (area_all[2],area_all[3]), (0, 0, 255), thickness = 3)
cv2.line(img, (short_line[0],short_line[1]), (short_line[2],short_line[3]), (200,45,45),5)
cv2.line(img, (area_all[0],area_all[1]), (area_all[2],area_all[3]), (64,78,0),5)
cv2.imshow("image",img)
cv2.waitKey()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
有错误的地方,还望各位大佬指教。
文章出处登录后可见!