转自 http://blog.csdn.net/yiluochenwu/article/details/23515923
- <students>
- <studentname='刘备'sex='男'age='35'/>
- <studentname='吕布'sex='男'age='38'/>
- <studentname='貂蝉'sex='女'age='22'/>
- </students>
代码:
- #-*-coding:UTF-8-*-
- #从文件中读取数据
- importxml.etree.ElementTreeasET
- #全局唯一标识
- unique_id=1
- #遍历所有的节点
- defwalkData(root_node,level,result_list):
- globalunique_id
- temp_list=[unique_id,root_node.tag,root_node.attrib]
- result_list.append(temp_list)
- unique_id+=1
- #遍历每个子节点
- children_node=root_node.getchildren()
- iflen(children_node)==0:
- return
- forchildinchildren_node:
- walkData(child,level+1,result_list)
- return
- #获得原始数据
- #out:
- #[
- ##ID,Level,AttrMap
- #[1,1,{'ID':1,'Name':'test1'}],
- #[2,'Name':'test2'}],
- #]
- defgetXmlData(file_name):
- level=1#节点的深度从1开始
- result_list=[]
- root=ET.parse(file_name).getroot()
- walkData(root,result_list)
- returnresult_list
- if__name__=='__main__':
- file_name='test.xml'
- R=getXmlData(file_name)
- forxinR:
- printx
- pass
输出结果:
- [1,1,'students',{}]
- [2,2,'student',{'age':'35','name':u'\u5218\u5907','sex':u'\u7537'}]
- [3,{'age':'38','name':u'\u5415\u5e03','sex':u'\u7537'}]
- [4,{'age':'22','name':u'\u8c82\u8749','sex':u'\u5973'}]