在图像标注的数据集中,有些格式是x1,y1,x2,y2,x3,y3,x4,y4 label这种格式的,但在转成tfrecord的时候都是PASCAL VOC格式,因此需要转一下

#!/usr/bin/env python
# coding:utf-8
from lxml.etree import Element, SubElement, tostring
from xml.dom.minidom import parseString
import glob
import os
from PIL import Image
from tqdm import tqdm
def txtToXml(image_path, txt_path):
    for txt_file in tqdm(glob.glob(txt_path + '/*.gt')):
        txt_name_ = txt_file.split('\\')[-1][:-3]

        data = {"shapes": []}
        im = Image.open(image_path + '\\' + txt_name_)
        width = im.size[0]
        height = im.size[1]
        tree = open(txt_file, 'r', encoding='UTF-8')
        node_root = Element('annotation')
        node_folder = SubElement(node_root, 'folder')
        node_folder.text = 'ICPR'
        node_filename = SubElement(node_root, 'filename')
        node_filename.text = txt_name_
        node_size = SubElement(node_root, 'size')
        node_width = SubElement(node_size, 'width')
        node_width.text = str(width)
        node_height = SubElement(node_size, 'height')
        node_height.text = str(height)
        node_depth = SubElement(node_size, 'depth')
        node_depth.text = '3'
        root = tree.readlines()
        for i, line in enumerate(root):
            column = line.split('\t')
            node_object = SubElement(node_root, 'object')
            node_name = SubElement(node_object, 'name')
            node_name.text = 'text' 
            node_difficult = SubElement(node_object, 'difficult')
            node_difficult.text = '0'
            node_bndbox = SubElement(node_object, 'bndbox')
            node_xmin = SubElement(node_bndbox, 'x0')
            node_xmin.text = column[2].split(" ")[0]
            node_ymin = SubElement(node_bndbox, 'y0')
            node_ymin.text = column[2].split(" ")[1]
            node_xmax = SubElement(node_bndbox, 'x1')
            node_xmax.text = column[2].split(" ")[2]
            node_ymax = SubElement(node_bndbox, 'y1')
            node_ymax.text = column[2].split(" ")[3]
            node_xmin = SubElement(node_bndbox, 'x2')
            node_xmin.text = column[2].split(" ")[4]
            node_ymin = SubElement(node_bndbox, 'y2')
            node_ymin.text = column[2].split(" ")[5]
            node_xmax = SubElement(node_bndbox, 'x3')
            node_xmax.text = column[2].split(" ")[6]
            node_ymax = SubElement(node_bndbox, 'y3')
            node_ymax.text = column[2].split(" ")[7]
        xml = tostring(node_root, pretty_print=True)  #格式化显示,该换行的换行
        dom = parseString(xml)
        with open("C:\\Elag\\data\\ICDAR2015-TRW\\public_test_data\\Annotations\\"+txt_name_ + '.xml', 'w') as f:
            dom.writexml(f, indent='\t', addindent='\t', newl='\n', encoding="utf-8")


if __name__ == "__main__":
    data_path = os.path.join("C:\\Elag\\data\\ICDAR2015-TRW\\public_test_data\\public_test_data\\", '')
    pic_path = os.path.join("C:\\Elag\\data\\ICDAR2015-TRW\\public_test_data\\public_test_data\\", '')
    txtToXml(pic_path, data_path )
原始标注

这里写图片描述

转换后

这里写图片描述

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐