参考文章:http://inotgaoshou.iteye.com/blog/1012188
一、xml解析常用的四种方式:
DOM(Document Object Model)文档对象模型;
SAX(Simple APIs for XML)XML简单应用程序接口;
JDOM(java-based Document Object Model);
DOM4J(Document Object Model for Java);
================================================================================
推荐用dom4j(大文档,或xml文档较多时候) 和 dom (小文档,xml文档较少的时候)
================================================================================
参考文章:http://download.csdn.net/download/shizhending/4067649
二、四种方式的比较:
DOM:性能测试时表现不佳 .W3C的官方标准,允许应用程序对数据和结构做出更改,允许获取和操作文档的任意部分,
但需要加载整个文档,性能差,占内存多, 小文件可以,据说超10M才溢出
由于它的遍历能力,DOM解析器常用于XML文档需要频繁的改变的服务中。
SAX:性能表现较好.类似于流媒体特点,分析能够立即开始,而不是等待所有的数据被处理。
只在读取数据时检查数据,不需要保存在内存中。可以在某个条件得到满足时停止解析,
不必解析整个文档。效率和性能较高,能解析大于系统内存的文档。
很难同时访问同一个文档中的多处不同数据 ,对内存的要求通常会比较低,
适用于大型文档。
DOM4j:DOM4J性能最好,DOM4J是一个非常非常优秀的Java XML API,具有性能优异、功能强大和极端易用使用的特点,
同时它也是一个开放源代码的软件。
JDOM:性能测试时表现不佳 , JDOM的目的是成为Java特定文档模型,它简化与XML的交互并且比使用DOM实现更快。
三、使用举例
1.dom4j:
src/person.xml
<?xml version="1.0" encoding="UTF-8"?> <persons> <person id="psn0001" > <name>gavin</name> <age>18</age> <address> <country>中国</country> <province>北京</province> <city>北京</city> </address> <zipcode>100000</zipcode> </person> <person2 id="psn0002" > <name>sophia</name> <age>18</age> <address> <country>中国</country> <province>钓鱼岛</province> <city>钓鱼岛</city> </address> <zipcode>100000</zipcode> </person2> </persons>
package com.gavin.xmlparse.dom4j; import java.io.File; import java.io.InputStream; import java.io.Reader; import java.net.URL; import java.util.Iterator; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.DOMReader; import org.dom4j.io.SAXReader; import org.xml.sax.InputSource; /** * 用dom4j读取xml信息 * @author gavin */ public class ParseXml { /** * dom4j object model representation of a xml document. Note: We use the * interface(!) not its implementation */ private Document doc; /** * Loads a document from a file. * @param aFile the data source * @throw a org.dom4j.DocumentExcepiton occurs on parsing failure. */ public void parseWithSAX(File aFile) throws DocumentException { SAXReader xmlReader = new SAXReader(); this.doc = xmlReader.read(aFile); } /** * Loads a document from a file. * * @param aURL * the data source * @throw a org.dom4j.DocumentExcepiton occurs on parsing failure. */ public void parseWithSAX(URL aURL) throws DocumentException { SAXReader xmlReader = new SAXReader(); this.doc = xmlReader.read(aURL); } /** * Reads a Document from the given InputSource using SAX * @param inputSource * @throws DocumentException */ public void parseWithSAX(InputSource inputSource) throws DocumentException { SAXReader xmlReader = new SAXReader(); this.doc = xmlReader.read(inputSource); } /** * Reads a Document from the given stream using SAX * @param in * @throws DocumentException */ public void parseWithSAX(InputStream in) throws DocumentException { SAXReader xmlReader = new SAXReader(); this.doc = xmlReader.read(in); } /** * Reads a Document from the given stream using SAX * @param in * @param systemId * @throws DocumentException */ public void parseWithSAX(InputStream in,String systemId) throws DocumentException { SAXReader xmlReader = new SAXReader(); this.doc = xmlReader.read(in,systemId); } /** * Reads a Document from the given Reader using SAX * @param reader * @throws DocumentException */ public void parseWithSAX(Reader reader) throws DocumentException { SAXReader xmlReader = new SAXReader(); this.doc = xmlReader.read(reader); } /** * Reads a Document from the given URL or filename using SAX. * @param systemId * @throws DocumentException */ public void parseWithSAX(String systemId) throws DocumentException { SAXReader xmlReader = new SAXReader(); this.doc = xmlReader.read(systemId); } /** * converts a W3C DOM document into a dom4j document * @param doc */ public void parseW3CDom2Dom4j(org.w3c.dom.Document doc){ DOMReader domReader = new DOMReader(); this.doc = domReader.read(doc); } public Document getDoc() { return doc; } /** * 利用xpath读取属性值 * @param xpathExpression * @return */ public String getAttributeValue(String xpathExpression){ Node node = doc.selectSingleNode(xpathExpression); if(node == null){ return null; } return node.valueOf("@id"); } /** * 利用xpath读取某元素的值 * @param xpathExpression * @return */ public String getElementValue(String xpathExpression){ Node node = doc.selectSingleNode(xpathExpression); if(node == null){ return null; } return node.getText(); } public static void main(String[] args) throws DocumentException { ParseXml parser = new ParseXml(); File file = new File("D:\\workspace\\javaCoreSkill\\src\\person.xml"); if(!file.exists()){ return; } parser.parseWithSAX(file); Document document = parser.getDoc(); Element root = document.getRootElement(); // iterate through child elements of root for ( Iterator i = root.elementIterator(); i.hasNext(); ) { Element element = (Element) i.next(); String psnId = element.attributeValue("id"); System.out.println(psnId); // iterate through attributes of root for ( Iterator iter = element.attributeIterator(); iter.hasNext(); ) { Attribute attribute = (Attribute) iter.next(); System.out.println(attribute.getValue()); } } // iterate through child elements of root with element name "person" for ( Iterator i = root.elementIterator( "person" ); i.hasNext(); ) { Element person = (Element) i.next(); String psnId = person.attributeValue("id"); System.out.println(psnId); for(Iterator i2 = person.elementIterator(); i2.hasNext();){ Element el = (Element) i2.next(); if(el.isTextOnly()){ System.out.println(el.getName() + ":" +el.getText()); }else{ Node node1 = document.selectSingleNode( "/persons/person/address/country" ); Node node2 = document.selectSingleNode( "/persons/person/address/city" ); Node node3 = document.selectSingleNode( "/persons/person/address/province" ); String country = node1.getText(); String province = node2.getText(); String city = node3.getText(); System.out.println("country:"+country+"province:"+province+"city:"+city); } } } System.out.println("person2元素的id属性值为:"+parser.getAttributeValue("//person2")); System.out.println("person2元素的id属性值为:"+parser.getElementValue("//person2/address/province")); } }
参考文档:
w3cschool
待续..........