频道导航

利用TinyXML读取VOC2012数据集的XML标注文件裁剪出所有人体目标保存为文件

2020-06-13 XML 前端之家

前端之家收集整理的这篇文章主要介绍了利用TinyXML读取VOC2012数据集的XML标注文件裁剪出所有人体目标保存为文件，前端之家小编觉得挺不错的，现在分享给大家，也给大家做个参考。

转载自：利用TinyXML读取VOC2012数据集的XML标注文件裁剪出所有人体目标保存为文件 - Why So SerIoUs? - 博客频道 - CSDN.NET http://blog.csdn.net/masibuaa/article/details/16104717

PASCAL VOC目标检测数据集(The PASCAL Visual Object Classes)

http://pascallin.ecs.soton.ac.uk/challenges/VOC/

图片中的目标用XML文件标注，格式为：

[html]view plaincopy 
   
 <annotation>
 folder>VOC2012</>
filename>2007_000346.jpgsourcedatabase>TheVOC2007Database>PASCALVOC2007image>flickrsizewidth>500height>375depth>3segmented>1objectname>bottlepose>Unspecifiedtruncated>0difficultbndBoxxmin>124ymin>107xmax>230ymax>343>person>137>78>497>89>202>129>247>Frontal>72>209>111>259>

对应的图片为：

所以如果想用这个数据集做某种目标识别的训练集的话，需要先从中裁出需要的目标。

下面这个程序就是这个目的，其中用到了TinyXML这个简单易用的XML解析器(XML入门)

[cpp]copy 
   

#include<iostream>

#include<fstream>
#include<opencv2/core/core.hpp>
#include<opencv2/highgui/highgui.hpp>
#include<opencv2/imgproc/imgproc.hpp>
#include<opencv2/objdetect/objdetect.hpp>
#include<opencv2/ml/ml.hpp>
#include<tinyxml.h>
usingnamespacestd;
namespacecv;
intCropImageCount=0;//裁剪出来的人体图片个数
/**
*通过根节点和节点名查找所有指定节点，结果放到节点数组NodeVector中
*@parampRootElexml文件的根节点
*@paramstrNodeName要查询的节点名
*@paramNodeVector查询到的节点指针数组
*@return找到至少一个相应节点，返回true；否则false
*/
boolGetAllNodePointerByName(TiXmlElement*pRootEle,stringstrNodeName,vector<TiXmlElement*>&NodeVector)
{
//如果NodeName等于根节点名，加入NodeVector数组
if(strNodeName==pRootEle->Value())
{
NodeVector.push_back(pRootEle);//添加到数组末尾
//这里根据VOCAnnotation的XML文件格式，认为相同节点名的节点不会有父子关系，所以所有相同节点名的节点都在同一级别上
//只要找到第一个，剩下的肯定在它的兄弟节点里面
for(TiXmlElement*pElement=pRootEle->NextSiblingElement();pElement;pElement=pElement->NextSiblingElement())
if(strNodeName==pElement->Value())
NodeVector.push_back(pElement);
returntrue;
}
TiXmlElement*pEle=pRootEle;
for(pEle=pRootEle->FirstChildElement();pEle;pEle=pEle->NextSiblingElement())
//递归处理子节点，获取节点指针
if(GetAllNodePointerByName(pEle,strNodeName,NodeVector))
true;
}
false;//没找到
*根据目标名过滤目标节点数组,删除所有目标名不是objectName的元素
*@paramNodeVector要操作的TiXmlElement元素指针数组
*@paramobjectName指定的目标名，删除所有目标名不是objectName的元素
*@return过滤后目标数组为空，返回false；否则返回true
*/
boolFiltObject(vector<TiXmlElement*>&NodeVector,stringobjectName)
TiXmlElement*pEle=NULL;
vector<TiXmlElement*>::iteratoriter=NodeVector.begin();//数组的迭代器
for(;iter!=NodeVector.end();)
pEle=*iter;//第i个元素
//若目标名不是objectName，删除此节点
if(objectName!=pEle->FirstChildElement()->GetText())
//cout<<"删除的目标节点："<<pEle->FirstChildElement()->GetText()<<endl;
iter=NodeVector.erase(iter);//删除目标名不是objectName的，返回下一个元素的指针
else
iter++;
if(0==NodeVector.size())//过滤后目标数组为空，说明不包含指定目标
false;
else
*根据每个目标的BoundingBox，剪裁图像，保存为文件
*@paramimg图像
*@paramNodeVector目标节点数组
voidCropImage(Matimg,vector<TiXmlElement*>NodeVector)
intxmin,ymin,xmax,ymax;//从目标节点中读出的包围盒参数
charfileName[256];//剪裁后的图片和其水平翻转图片的文件名
//遍历目标数组
for(;iter!=NodeVector.end();iter++)
//遍历每个目标的子节点
TiXmlElement*pEle=(*iter)->FirstChildElement();//第i个元素的第一个孩子
for(;pEle;pEle=pEle->NextSiblingElement())
//找到包围盒"bndBox"节点
if(string("bndBox")==pEle->Value())
TiXmlElement*pCoord=pEle->FirstChildElement();//包围盒的第一个坐标值
//依次遍历包围盒的4个坐标值，放入整型变量中
for(;pCoord;pCoord=pCoord->NextSiblingElement())
if(string("xmin")==pCoord->Value())
xmin=atoi(pCoord->GetText());//xmin
if(string("ymin")==pCoord->Value())
ymin=atoi(pCoord->GetText());//ymin
if(string("xmax")==pCoord->Value())
xmax=atoi(pCoord->GetText());//xmax
if(string("ymax")==pCoord->Value())
ymax=atoi(pCoord->GetText());//ymax
//cout<<"xmin:"<<xmin<<","<<"ymin:"<<ymin<<","<<"xmax:"<<xmax<<","<<"ymax:"<<ymax<<endl;;
//根据读取的包围盒坐标设置图像ROI
MatimgROI=img(Rect(xmin,xmax-xmin,ymax-ymin));
resize(imgROI,imgROI,Size(64,128));//缩放为64*128大小
sprintf(fileName,"person%06d.jpg",++CropImageCount);//生成剪裁图片的文件名
imwrite(fileName,imgROI);//保存文件
flip(imgROI,1);//水平翻转
memset(fileName,0x00,sizeof(fileName));
//生成剪裁图片的水平翻转图片的文件名
/**
*根据XML文件，从图像中剪裁出objectName目标
*@paramXMLFileXML文件名
*@paramimg对应的图像
*@paramobjectName目标名
*@return若图像中包含objectName目标，返回true；否则返回false
boolCropImageAccordingToXML(stringXMLFile,Matimg,248)"> TiXmlDocument*pDoc=newTiXmlDocument();//创建XML文档
pDoc->LoadFile(XMLFile.c_str());//装载XML文件
vector<TiXmlElement*>nodeVector;//节点数组
//查找所有节点名是object的节点，即目标节点，结果放到节点数组nodeVector中
if(false==GetAllNodePointerByName(pDoc->RootElement(),"object",nodeVector))//未找到指定目标
false;
//cout<<"所有目标个数："<<nodeVector.size()<<endl;
//过滤节点数组，删除所有节点名不是objectName的节点
false==FiltObject(nodeVector,objectName))//目标数组中没有指定目标
//cout<<"过滤后的目标个数："<<nodeVector.size()<<endl;
//根据每个目标的BoundingBox，剪裁图像，保存为文件
CropImage(img,nodeVector);
intmain()
intfileCount=0;//文件个数
Matsrc;
stringXMLName,ImgName;//XML文件名和对应的图片文件名
//ifstreamfin("VOC2012AnnotationsXMLList.txt");//打开XML文件列表
ifstreamfin("subset.txt");
//ifstreamfin("test.txt");
//读取XML文件列表
while(getline(fin,XMLName))
cout<<"处理："<<XMLName<<endl;
ImgName="D:\\DataSet\\VOCtrainval_11-May-2012\\VOCdevkit\\VOC2012\\JPEGImages\\"+XMLName+".jpg";
XMLName="D:\\DataSet\\VOCtrainval_11-May-2012\\VOCdevkit\\VOC2012\\Annotations\\"+XMLName+".xml";
src=imread(ImgName);
CropImageAccordingToXML(XMLName,src,"person");//根据XML标注文档，从图像src中剪裁出所有person目标，保存为文件
system("pause");
}