前端之家收集整理的这篇文章主要介绍了
读取INRIA 说明文件 生成 voc xml python 版本,
前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# 文件名:txt_to_xml.py
from xml.dom.minidom import Document
import os
import re
list = os.listdir("Annotations")
os.chdir('Annotations')
#os.system('pause')
#raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))
for oldfilename in list:
if str(".txt") not in oldfilename:
continue
print oldfilename
#raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))
fileindex = re.findall('\d+',oldfilename)
print fileindex
#raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))
print str(int(fileindex[0]))
#raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))
newfilename = "00" + str(int(fileindex[0])) + ".xml"
#print newfilename
#raw_input(unicode('按回车键退出...','utf-8').encode('gbk'))
f = open(oldfilename,"r")
print 'processing:' + f.name
doc = Document()
annotation = doc.createElement('annotation')
doc.appendChild(annotation)
folder = doc.createElement('folder')
folder.appendChild(doc.createTextNode('VOC2007'))
annotation.appendChild(folder)
filename = doc.createElement('filename')
filename.appendChild(doc.createTextNode("00" + str(int(fileindex[0])) + ".jpg"))
annotation.appendChild(filename)
source = doc.createElement('source')
annotation.appendChild(source)
database = doc.createElement('database')
database.appendChild(doc.createTextNode('PASperson Database'))
source.appendChild(database)
annotation1 = doc.createElement('annotation')
annotation1.appendChild(doc.createTextNode('PASperson'))
source.appendChild(annotation1)
fr = f.readlines() # 调用文件的 readline()方法一次读取
for line in fr:
if str(line).__contains__("size"):
sizes = []
sizes = re.findall('\d+',line)
size = doc.createElement('size')
annotation.appendChild(size)
width = doc.createElement('width')
width.appendChild(doc.createTextNode(sizes[0]))
size.appendChild(width)
height = doc.createElement('height')
height.appendChild(doc.createTextNode(sizes[1]))
size.appendChild(height)
depth = doc.createElement('depth')
depth.appendChild(doc.createTextNode(sizes[2]))
size.appendChild(depth)
segmented = doc.createElement('segmented')
segmented.appendChild(doc.createTextNode('0'))
annotation.appendChild(segmented)
if (str(line).__contains__('Objects')):
nums = re.findall('\d+',line)
break
for index in range(1,int(nums[0])+1):
for line in fr:
if str(line).__contains__("Bounding Box for object " + str(index)):
coordinate = re.findall('\d+',line)
object = doc.createElement('object')
annotation.appendChild(object)
name = doc.createElement('name')
name.appendChild(doc.createTextNode('person'))
object.appendChild(name)
pose = doc.createElement('pose')
pose.appendChild(doc.createTextNode('Unspecified'))
object.appendChild(pose)
truncated = doc.createElement('truncated')
truncated.appendChild(doc.createTextNode('0'))
object.appendChild(truncated)
difficult = doc.createElement('difficult')
difficult.appendChild(doc.createTextNode('0'))
object.appendChild(difficult)
bndBox = doc.createElement('bndBox')
object.appendChild(bndBox)
#数字中包含序号,下标应从1开始
xmin = doc.createElement('xmin')
xmin.appendChild(doc.createTextNode(coordinate[1]))
bndBox.appendChild(xmin)
ymin = doc.createElement('ymin')
ymin.appendChild(doc.createTextNode(coordinate[2]))
bndBox.appendChild(ymin)
xmax = doc.createElement('xmax')
xmax.appendChild(doc.createTextNode(coordinate[3]))
bndBox.appendChild(xmax)
ymax = doc.createElement('ymax')
ymax.appendChild(doc.createTextNode(coordinate[4]))
bndBox.appendChild(ymax)
f.close()
f = open(newfilename,'w')
f.write(doc.toprettyxml(indent=""))
f.close()
print str(fileindex) + " compelete"
print 'process compelete'