制作COCO格式数据集

研一上学期要跑一个yoloe，需要用自己的数据集去跑，实验室没有合适的coco格式的数据集，于是需要自己制作数据集，防止以后需要在做的时候忘记，现在把整个操作流程记录下来。

Table of Contents

一.利用几个代码来创建VOC格式数据集

- 利用代码创建VOC格式文件夹或者自己手动创建。

# 创建VOC格式文件夹

import os
def make_voc_dir():
    os.makedirs('E:\B501\zhizuoshujuji\VOC2100/Annotations')
    os.makedirs('E:\B501\zhizuoshujuji\VOC2100/ImageSets')
    os.makedirs('E:\B501\zhizuoshujuji\VOC2100/ImageSets/Main')
    # os.makedirs('E:\B501\zhizuoshujuji\VOCVOC/ImageSets/Layout')
    # os.makedirs('E:\B501\zhizuoshujuji\VOCVOC/ImageSets/Segmentation')
    os.makedirs('E:\B501\zhizuoshujuji\VOC2100/JPEGImages')
    # os.makedirs('E:\B501\zhizuoshujuji\VOC/SegmentationClass')
    # os.makedirs('E:\B501\zhizuoshujuji\VOC/SegmentationObject')
if __name__ == '__main__':
    make_voc_dir()

因为我做的只是检测，不需要其他任务，所以注释掉了部分，只保留我需要的。

然后把图片数据集放在上一级目录，对图片文件进行重命名。然后将重命名后的图片放在JPEGImages中，png格式的图片也可以。

2.图片重命名

#图片重命名
import os
path = 'E:\B501\zhizuoshujuji\images2100'
num= 1
for file in os.listdir(path):
    os.rename(os.path.join(path,file),os.path.join(path,str(num)+'.png'))
    num+=1

二.制作VOC数据集

1.创建虚拟环境labelimg，安装labelimg，然后使用labelimg去做标注。

安装成功后输入指令labelimg打开软件

选择打开图片文件以及保存xml的文件夹

然后进行标注即可，标注完成文件夹如图所示：

然后利用几个代码继续制作数据集。

2.生成ImageSet下的Main

# 4make_imagesets
import os
import random

xmlfilepath = r'E:\B501\zhizuoshujuji\VOC2100\Annotations/'  # xml文件的路径
saveBasePath = r'E:\B501\zhizuoshujuji\VOC2100\ImageSets/'  # 生成的txt文件的保存路径

trainval_percent = 0.9  # 训练验证集占整个数据集的比重（划分训练集和测试验证集）
train_percent = 0.8  # 训练集占整个训练验证集的比重（划分训练集和验证集）
total_xml = os.listdir(xmlfilepath)
num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)

print("train and val size", tv)
print("traub suze", tr)
ftrainval = open(os.path.join(saveBasePath, 'Main/trainval.txt'), 'w')
ftest = open(os.path.join(saveBasePath, 'Main/test.txt'), 'w')
ftrain = open(os.path.join(saveBasePath, 'Main/train.txt'), 'w')
fval = open(os.path.join(saveBasePath, 'Main/val.txt'), 'w')

for i in list:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        ftrainval.write(name)
        if i in train:
            ftrain.write(name)
        else:
            fval.write(name)
    else:
        ftest.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest.close()

然后VOC格式数据集创建完毕。

三.VOC格式数据集转COCO格式数据集

用一个代码转换即可，首先在VOC数据集同级目录放代码以及labels.txt

其中labels.txt放几类都可以，我只有一个fish类别，所以我只放了fish。

然后开始运行代码

1.json格式的标签文件转换程序

import os
import argparse
import json
import xml.etree.ElementTree as ET
from typing import Dict, List
import re


def get_label2id(labels_path: str) -> Dict[str, int]:
    """id is 1 start"""
    with open(labels_path, 'r') as f:
        labels_str = f.read().split()
    labels_ids = list(range(1, len(labels_str) + 1))
    return dict(zip(labels_str, labels_ids))


def get_annpaths(ann_dir_path: str = None,
                 ann_ids_path: str = None,
                 ext: str = '',
                 annpaths_list_path: str = None) -> List[str]:
    # If use annotation paths list
    if annpaths_list_path is not None:
        with open(annpaths_list_path, 'r') as f:
            ann_paths = f.read().split()
        return ann_paths

    # If use annotaion ids list
    ext_with_dot = '.' + ext if ext != '' else ''
    with open(ann_ids_path, 'r') as f:
        ann_ids = f.read().split()
    ann_paths = [os.path.join(ann_dir_path, aid + ext_with_dot) for aid in ann_ids]
    return ann_paths


def get_image_info(annotation_root, extract_num_from_imgid=True):
    path = annotation_root.findtext('path')
    if path is None:
        filename = annotation_root.findtext('filename')
    else:
        filename = os.path.basename(path)
    img_name = os.path.basename(filename)
    img_id = os.path.splitext(img_name)[0]
    if extract_num_from_imgid and isinstance(img_id, str):
        img_id = int(re.findall(r'\d+', img_id)[0])

    size = annotation_root.find('size')
    width = int(size.findtext('width'))
    height = int(size.findtext('height'))

    image_info = {
        'file_name': filename,
        'height': height,
        'width': width,
        'id': img_id
    }
    return image_info


def get_coco_annotation_from_obj(obj, label2id):
    label = obj.findtext('name')
    assert label in label2id, f"Error: {label} is not in label2id !"
    category_id = label2id[label]
    bndbox = obj.find('bndbox')
    xmin = int(bndbox.findtext('xmin')) - 1
    ymin = int(bndbox.findtext('ymin')) - 1
    xmax = int(bndbox.findtext('xmax'))
    ymax = int(bndbox.findtext('ymax'))
    assert xmax > xmin and ymax > ymin, f"Box size error !: (xmin, ymin, xmax, ymax): {xmin, ymin, xmax, ymax}"
    o_width = xmax - xmin
    o_height = ymax - ymin
    ann = {
        'area': o_width * o_height,
        'iscrowd': 0,
        'bbox': [xmin, ymin, o_width, o_height],
        'category_id': category_id,
        'ignore': 0,
        'segmentation': []  # This script is not for segmentation
    }
    return ann


def convert_xmls_to_cocojson(annotation_paths: List[str],
                             label2id: Dict[str, int],
                             output_jsonpath: str,
                             extract_num_from_imgid: bool = True):
    output_json_dict = {
        "images": [],
        "type": "instances",
        "annotations": [],
        "categories": []
    }
    bnd_id = 1  # START_BOUNDING_BOX_ID, TODO input as args ?

    for a_path in annotation_paths:
        # Read annotation xml
        ann_tree = ET.parse(a_path)
        ann_root = ann_tree.getroot()

        img_info = get_image_info(annotation_root=ann_root,
                                  extract_num_from_imgid=extract_num_from_imgid)
        img_id = img_info['id']
        output_json_dict['images'].append(img_info)

        for obj in ann_root.findall('object'):
            ann = get_coco_annotation_from_obj(obj=obj, label2id=label2id)
            ann.update({'image_id': img_id, 'id': bnd_id})
            output_json_dict['annotations'].append(ann)
            bnd_id = bnd_id + 1

    for label, label_id in label2id.items():
        category_info = {'supercategory': 'none', 'id': label_id, 'name': label}
        output_json_dict['categories'].append(category_info)

    with open(output_jsonpath, 'w') as f:
        output_json = json.dumps(output_json_dict)
        f.write(output_json)
    print('Convert successfully !')


def main():
    parser = argparse.ArgumentParser(
        description='This script support converting voc format xmls to coco format json')
    parser.add_argument('--ann_dir', type=str, default='./Annotations')
    parser.add_argument('--ann_ids', type=str, default='./ImageSets/Main/train.txt')
    parser.add_argument('--ann_paths_list', type=str, default=None)
    parser.add_argument('--labels', type=str, default='./labels.txt')
    parser.add_argument('--output', type=str, default='./output/annotations/train.json')
    parser.add_argument('--ext', type=str, default='xml')
    args = parser.parse_args()
    label2id = get_label2id(labels_path=args.labels)
    ann_paths = get_annpaths(
        ann_dir_path=args.ann_dir,
        ann_ids_path=args.ann_ids,
        ext=args.ext,
        annpaths_list_path=args.ann_paths_list
    )
    convert_xmls_to_cocojson(
        annotation_paths=ann_paths,
        label2id=label2id,
        output_jsonpath=args.output,
        extract_num_from_imgid=True
    )


if __name__ == '__main__':
    if not os.path.exists('./output/annotations'):
        os.makedirs('./output/annotations')
    main()

其中124行和127行需要转换3次，分别改成train.txt，train.json；test.txt，test.json；val.txt，val.json。运行三次得到结果如下：

2.然后将对应的图像文件转换至对应文件夹中。

import os
import shutil

images_file_path = './JPEGImages/'
split_data_file_path = './ImageSets/Main/'
new_images_file_path = './output/'

if not os.path.exists(new_images_file_path + 'train'):
    os.makedirs(new_images_file_path + 'train')
if not os.path.exists(new_images_file_path + 'val'):
    os.makedirs(new_images_file_path + 'val')
if not os.path.exists(new_images_file_path + 'test'):
    os.makedirs(new_images_file_path + 'test')

dst_train_Image = new_images_file_path + 'train/'
dst_val_Image = new_images_file_path + 'val/'
dst_test_Image = new_images_file_path + 'test/'

total_txt = os.listdir(split_data_file_path)
for i in total_txt:
    name = i[:-4]
    if name == 'train':
        txt_file = open(split_data_file_path + i, 'r')
        for line in txt_file:
            line = line.strip('\n')
            line = line.strip('\r')
            srcImage = images_file_path + line + '.png'
            dstImage = dst_train_Image + line + '.png'
            shutil.copyfile(srcImage, dstImage)
        txt_file.close()
    elif name == 'val':
        txt_file = open(split_data_file_path + i, 'r')
        for line in txt_file:
            line = line.strip('\n')
            line = line.strip('\r')
            srcImage = images_file_path + line + '.png'
            dstImage = dst_val_Image + line + '.png'
            shutil.copyfile(srcImage, dstImage)
        txt_file.close()
    elif name == 'test':
        txt_file = open(split_data_file_path + i, 'r')
        for line in txt_file:
            line = line.strip('\n')
            line = line.strip('\r')
            srcImage = images_file_path + line + '.png'
            dstImage = dst_test_Image + line + '.png'
            shutil.copyfile(srcImage, dstImage)
        txt_file.close()
    else:
        print("Error, Please check the file name of folder")

代码中的png根据自己的图片后缀进行更改，比如改成jpg。至此COCO格式数据集制作完毕。

以上voc转coco代码转自博客：https://blog.csdn.net/a18838956649/article/details/124457462

文章出处登录后可见！

已经登录？立即刷新