YOLOV5源码解读系列文章目录
- 数据集加载和扩充
- loss计算
前言
此篇为yolov5 3.1 版本,官方地址[https://github.com/ultralytics/yolov5]
看源代码之前有必要先大致了解实现原理和流程,强推这篇文章https://blog.csdn.net/nan355655600/article/details/107852353(https://github.com/amdegroot/ssd.pytorch)
持续采样InfiniteDataLoader
class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
""" Dataloader that reuses workers
Uses same syntax as vanilla DataLoader
"""
"""
这块对DataLoader进行封装,就是为了能够永久持续的采样数据
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
self.iterator = super().__iter__()
def __len__(self):
return len(self.batch_sampler.sampler)
def __iter__(self):
for i in range(len(self)):
yield next(self.iterator)
class _RepeatSampler(object):
""" Sampler that repeats forever
永久持续的采样
Args:
sampler (Sampler)
"""
def __init__(self, sampler):
self.sampler = sampler
def __iter__(self):
while True:
yield from iter(self.sampler)
数据加载
class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1):
"""
path: 数据集路径
img_size: 图片大小
batch_size: 批次大小
augment: 是否数据增强
hyp: 超参数的yaml文件
rect: 矩形训练,就是对图片填充灰边(只在高或宽的一边填充)
image_weights: 图像采样的权重
cache_images: 图片是否缓存,用于加速训练
single_cls: 是否是一个类别
stride: 模型步幅, 图像大小/网络下采样之后的输出大小
pad: 填充宽度
rank: 当前进程编号
"""
self.img_size = img_size
self.augment = augment
self.hyp = hyp
self.image_weights = image_weights
self.rect = False if image_weights else rect
# mosaic 将4张图片融合在一张图片里,进行训练
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
self.mosaic_border = [-img_size // 2, -img_size // 2]
self.stride = stride
"""
首先读取图像路径,转换合适的格式,根据图像路径,替换其中的images和图片后缀,转换成label路径
读取coco128/labels/train.cache文件,没有则创建,cache存储字典{图片路径:label路径,图片大小}
"""
def img2label_paths(img_paths):
# Define label paths as a function of image paths
"""
img_paths现在存储了所有的图片路径,只需将路径中的images换成labels,图片后缀改为.txt就得到标注文件的路径
"""
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
return [x.replace(sa, sb, 1).replace(os.path.splitext(x)[-1], '.txt') for x in img_paths]
# 读取图像路径,转换成合适的格式
try:
f = [] # image files
for p in path if isinstance(path, list) else [path]:
p = str(Path(p)) # os-agnostic
parent = str(Path(p).parent) + os.sep #上级目录 ../coco128/images
if os.path.isfile(p): # file
with open(p, 'r') as t:
t = t.read().splitlines()
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
elif os.path.isdir(p): # folder
f += glob.iglob(p + os.sep + '*.*') # 读取images下的所有文件不包含目录
else:
raise Exception('%s does not exist' % p)
# 将图片的路径改为适合本地系统的格式(windows是'\\', linux是'/'),图片后缀名在img_formats里的就改为小写
self.img_files = sorted(
[x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats])
assert len(self.img_files) > 0, 'No images found'
except Exception as e:
raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
# Check cache
self.label_files = img2label_paths(self.img_files) # labels 图片路径到label路径的转换
cache_path = str(Path(self.label_files[0]).parent) + '.cache' # cached labels
"""
读取labels下的.cache文件, 没有则创建, cache里的关键字'hash'是图片+label的文件字节大小之和
"""
if os.path.isfile(cache_path):
cache = torch.load(cache_path) # load
# 如果cache存储的hash与当前的label+图片大小对应不上,则重新创建.cache文件
if cache['hash'] != get_hash(self.label_files + self.img_files): # dataset changed
cache = self.cache_labels(cache_path) # re-cache
else:
cache = self.cache_labels(cache_path) # cache
# Read cache
cache.pop('hash') # remove hash
labels, shapes = zip(*cache.values())
self.labels = list(labels) # label
self.shapes = np.array(shapes, dtype=np.float64) # 图片大小
self.img_files = list(cache.keys()) # update 图片路径
self.label_files = img2label_paths(cache.keys()) # update 更新labels路径,因为可能有一部分图片或label损坏
"""
根据图片数量划分每批的图片数量
"""
n = len(shapes) # number of images 图片数量
bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index 划分批次
nb = bi[-1] + 1 # number of batches 批次数量
self.batch = bi # batch index of image
self.n = n
# Rectangular Training 矩形训练
"""
先求的图像的宽高比,然后对较长的边缩放到stride的倍数,
在按照宽高比对短的一边缩放,进行少量的填充也达到stride的最小倍数
"""
if self.rect:
# Sort by aspect ratio
s = self.shapes # wh
ar = s[:, 1] / s[:, 0] # aspect ratio 高宽比
irect = ar.argsort() # 按着高宽比从小到大排序
# 重新排序图片,label路径,真实框, shapes, 宽高比的顺序
self.img_files = [self.img_files[i] for i in irect]
self.label_files = [self.label_files[i] for i in irect]
self.labels = [self.labels[i] for i in irect]
self.shapes = s[irect] # wh
ar = ar[irect]
# Set training image shapes
shapes = [[1, 1]] * nb # [[h/w, 1], [1, w/h]....]
for i in range(nb):
ari = ar[bi == i] # 分批次选择
mini, maxi = ari.min(), ari.max()
# 下面操作就是为了保证shapes存储的值始终小于1,即只对较短的一边进行操作
if maxi < 1: # 高小于宽的时候
shapes[i] = [maxi, 1]
elif mini > 1: # 高大于宽的时候
shapes[i] = [1, 1 / mini]
# 下边的操作就是为了得到以stride为整数倍的图像大小(较短的一边)注意:只在测试时才会用到
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
# Check labels 检查标签是否合法
create_datasubset, extract_bounding_boxes, labels_loaded = False, False, False # 目前这些操作还不支持
# 消失的,存在的,空的,小型数据集的,重复的标签数量
nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate
pbar = enumerate(self.label_files)
if rank in [-1, 0]:
pbar = tqdm(pbar)
for i, file in pbar:
l = self.labels[i] # label
if l is not None and l.shape[0]:
assert l.shape[1] == 5, '> 5 label columns: %s' % file # 类别+4个坐标
assert (l >= 0).all(), 'negative labels: %s' % file # 不能出现负数
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file # 坐标不能大于1
if np.unique(l, axis=0).shape[0] < l.shape[0]: # duplicate rows 出现重复的标签
nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows
if single_cls: # 如果设置单一类别,则将label所有类别设为0
l[:, 0] = 0 # force dataset into single-class mode
self.labels[i] = l
nf += 1 # file found
# Create subdataset (a smaller dataset)
if create_datasubset and ns < 1E4:
if ns == 0:
create_folder(path='./datasubset')
os.makedirs('./datasubset/images')
exclude_classes = 43
if exclude_classes not in l[:, 0]:
ns += 1
# shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image
with open('./datasubset/images.txt', 'a') as f:
f.write(self.img_files[i] + '\n')
# Extract object detection boxes for a second stage classifier
if extract_bounding_boxes:
p = Path(self.img_files[i])
img = cv2.imread(str(p))
h, w = img.shape[:2]
for j, x in enumerate(l):
f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
if not os.path.exists(Path(f).parent):
os.makedirs(Path(f).parent) # make new output folder
b = x[1:] * [w, h, w, h] # box
b[2:] = b[2:].max() # rectangle to square
b[2:] = b[2:] * 1.3 + 30 # pad
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
else:
ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty
# os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove
if rank in [-1, 0]:
pbar.desc = 'Scanning labels %s (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
cache_path, nf, nm, ne, nd, n)
if nf == 0:
s = 'WARNING: No labels found in %s. See %s' % (os.path.dirname(file) + os.sep, help_url)
print(s)
assert not augment, '%s. Can not train without labels.' % s
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
# 缓存图像到内存中,为了快速训练, 通过调用8个线程,读取图像并进行resize处理,保存在self.imgs变量中
self.imgs = [None] * n
if cache_images:
gb = 0 # Gigabytes of cached images
self.img_hw0, self.img_hw = [None] * n, [None] * n # 原始图片大小,resize之后的图片大小
results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads
pbar = tqdm(enumerate(results), total=n)
for i, x in pbar:
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
gb += self.imgs[i].nbytes
pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
def cache_labels(self, path='labels.cache'):
# Cache dataset labels, check images and read shapes
"""
检测image和label有没有损坏
"""
x = {} # dict
pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files)) # 进度条
for (img, label) in pbar:
try:
l = []
im = Image.open(img)
im.verify() # PIL verify 判断图像是否损坏
shape = exif_size(im) # image size
assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
if os.path.isfile(label):
with open(label, 'r') as f:
l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) # labels
if len(l) == 0:
l = np.zeros((0, 5), dtype=np.float32)
x[img] = [l, shape]
except Exception as e:
print('WARNING: Ignoring corrupted image and/or label %s: %s' % (img, e))
x['hash'] = get_hash(self.label_files + self.img_files) # 图像+label的文件字节大小
torch.save(x, path) # save for next time
return x
def __len__(self):
return len(self.img_files)
# def __iter__(self):
# self.count = -1
# print('ran dataset iter')
# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
# return self
def __getitem__(self, index):
if self.image_weights:
index = self.indices[index]
hyp = self.hyp
mosaic = self.mosaic and random.random() < hyp['mosaic']
if mosaic:
# Load mosaic
img, labels = load_mosaic(self, index)
shapes = None
# MixUp https://arxiv.org/pdf/1710.09412.pdf
# 将两幅图片混合在一起(每幅图片包含4张小图)
if random.random() < hyp['mixup']:
img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0
img = (img * r + img2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
else:
# Load image
img, (h0, w0), (h, w) = load_image(self, index)
# Letterbox
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
# Load labels
labels = []
x = self.labels[index]
if x.size > 0:
# Normalized xywh to pixel xyxy format
# 将标签格式[centerx, centery, w, h]转换为[xim, ymin, xmax, ymax],
# 并调整为未归一化的格式(图片上真实坐标), 坐标平移调整
labels = x.copy()
labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
if self.augment:
# Augment imagespace
if not mosaic:
img, labels = random_perspective(img, labels,
degrees=hyp['degrees'],
translate=hyp['translate'],
scale=hyp['scale'],
shear=hyp['shear'],
perspective=hyp['perspective'])
# Augment colorspace
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
# Apply cutouts
# if random.random() < 0.9:
# labels = cutout(img, labels)
nL = len(labels) # number of labels
if nL:
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
if self.augment:
# flip up-down 垂直翻转
if random.random() < hyp['flipud']:
img = np.flipud(img)
if nL:
labels[:, 2] = 1 - labels[:, 2]
# flip left-right 水平翻转
if random.random() < hyp['fliplr']:
img = np.fliplr(img)
if nL:
labels[:, 1] = 1 - labels[:, 1]
labels_out = torch.zeros((nL, 6)) # [num_labels, batch_index, class_id, x, y, w, h]
if nL:
labels_out[:, 1:] = torch.from_numpy(labels)
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
return torch.from_numpy(img), labels_out, self.img_files[index], shapes
@staticmethod
def collate_fn(batch):
"""
pytorch的DataLoader打包一个batch的数据集时要经过此函数进行打包
通过重写此函数实现标签与图片对应的划分,一个batch中哪些标签属于哪一张图片
"""
img, label, path, shapes = zip(*batch) # transposed
for i, l in enumerate(label):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img, 0), torch.cat(label, 0), path, shapes
masico数据增强
def load_mosaic(self, index):
# loads images in a mosaic
labels4 = []
s = self.img_size
# 随机选取mosaic的中心点
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
# 随机添加剩余3张图像序列
indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = load_image(self, index)
# place img in img4 融合4张图片
if i == 0: # top left 左上角图片 114:代表灰色
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
# 当前图像在一张大图上的位置
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
# 选取当前图像的位置
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right 右上角图片
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left 左下角图片
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right 右下角图片
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
# 将当前图像的候选区域赋值给大图上设置好的区域
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b # 截取之后的图像相对原始图像的偏移量
padh = y1a - y1b
# Labels
x = self.labels[index]
labels = x.copy()
# label+上面计算好的偏移量
if x.size > 0: # Normalized xywh to pixel xyxy format
labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
labels4.append(labels)
# Concat/clip labels
if len(labels4):
labels4 = np.concatenate(labels4, 0)
# 对大图裁剪,对超出图像边界的值赋予0或img_size
np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_perspective
# img4, labels4 = replicate(img4, labels4) # replicate
# Augment 对图像和标签进行平移,旋转,透视等等处理
img4, labels4 = random_perspective(img4, labels4,
degrees=self.hyp['degrees'],
translate=self.hyp['translate'],
scale=self.hyp['scale'],
shear=self.hyp['shear'],
perspective=self.hyp['perspective'],
border=self.mosaic_border) # border to remove
return img4, labels4
仿射变换
def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = img.shape[0] + border[0] * 2 # shape(h,w,c)
width = img.shape[1] + border[1] * 2
# Center 让图片先放在正中央的位置,再进行缩放等处理
C = np.eye(3)
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
# Perspective 透视
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# Rotation and Scale 旋转和缩放
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1 + scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# Shear 错切
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# Translation 平移
T = np.eye(3)
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
# Combined rotation matrix
# @:线性代数的矩阵乘法操作 M:变换矩阵
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if perspective:
img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
else: # affine 仿射变换
img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(img[:, :, ::-1]) # base
# ax[1].imshow(img2[:, :, ::-1]) # warped
# Transform label coordinates
# 相应的label也要转换
n = len(targets) # label数量 [num_labels, 5]
if n:
# warp points
xy = np.ones((n * 4, 3))
# targets 坐标形式是[xmin, ymin, xmax, ymax] 下边这句话就是提取真实框的四个点
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
# 注意:下面的T是矩阵转置,而不是上边用于仿射变换的矩阵T
xy = xy @ M.T # transform
if perspective:
xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
else: # affine
xy = xy[:, :2].reshape(n, 8)
# create new boxes 得到新的真实框
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# # apply angle-based reduction of bounding boxes
# radians = a * math.pi / 180
# reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
# x = (xy[:, 2] + xy[:, 0]) / 2
# y = (xy[:, 3] + xy[:, 1]) / 2
# w = (xy[:, 2] - xy[:, 0]) * reduction
# h = (xy[:, 3] - xy[:, 1]) * reduction
# xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
# clip boxes 将超出图像边界的真实框的坐标赋予0或图像边长
xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
# filter candidates 筛选掉过于狭窄,高或宽小于2, 处理之后的真实框的面积要比处理之前真实框的面积<=0.1的真实框
i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)
targets = targets[i]
targets[:, 1:5] = xy[i]
return img, targets
letterbox 自适应缩放图片
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
# 调整图片大小,达到32的最小倍数
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int): # [height, width]
new_shape = (new_shape, new_shape)
# Scale ratio (new / old) 选择最小的缩放系数
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
"""
缩放(resize)到输入大小img_size的时候,如果没有设置上采样的话,则只进行下采样
因为上采样图片会让图片模糊,对训练不友好影响性能。
"""
if not scaleup: # only scale down, do not scale up (for better test mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) # [width, height]
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle 最小矩形填充
dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding
elif scaleFill: # stretch 直接resize为img_size大小,任由图片拉伸压缩
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
# 图像两边需要填充的宽度
dw /= 2 # pide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
# 进行填充
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return img, ratio, (dw, dh)
版权声明:本文为博主暮丶凉原创文章,版权归属原作者,如果侵权,请联系我们删除!
原文链接:https://blog.csdn.net/qq_19457459/article/details/113196420