XML解析技术
DOM技术:文档对象模型,需要将整个XML加入内存才能解析,占用内存比较多
SAX技术:一边加载,一边解析,一边释放内存,比较节省内存,基于推模式。
STAX技术:一种至于流的技术,和SAX技术很像,是基于拉模式的。
推模式与拉模式
基于以上技术实现的解析工具有
JAXP同时支持DOM SAX STAX三种技术
DOM4J支持DOM解析方式
XML PULL android移动设备内置的xml解析技术, 支持STAX解析方式
技术选择
在javaee开发中通常使用DOM技术,编程简单。当xml文档过于大时,优先使用SAX/STAX技术。
JAXP解析代码
DOM解析API
全局查找
通过ID查找 getElementById() //无约束的DTD文档不能使用该方法
通过标签名查找 getElementsByTagName()
相对节点位置查找
getChildNodes(): 返回这个节点的所有子节点列表
getNextSibling():
返回该节点下一个兄弟节点
DOMTest.java
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class DOMTest {
@Test
public void demo1() throws ParserConfigurationException,SAXException,
IOException {
// 构造工厂
DocumentBuilderFactory builderFactory = DocumentBuilderFactory
.newInstance();
// 通过工厂获得解析器
DocumentBuilder builder = builderFactory.newDocumentBuilder();
// 使用解析器加载xml文档
Document document = builder.parse("books.xml");
// 通过标签名获得元素
NodeList nodeList = document.getElementsByTagName("name");
for (int i = 0; i < nodeList.getLength(); i++) {
Element e = (Element) nodeList.item(i);
// 获取标签名称
System.out.println(e.getNodeName());
// 获取<name>标签子节点
System.out.println(e.getFirstChild().getNodeValue());
// 获取<name>标签类型
System.out.println(e.getNodeType());
}
}
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class DOMTest {
@Test
public void demo1() throws ParserConfigurationException,SAXException,
IOException {
// 构造工厂
DocumentBuilderFactory builderFactory = DocumentBuilderFactory
.newInstance();
// 通过工厂获得解析器
DocumentBuilder builder = builderFactory.newDocumentBuilder();
// 使用解析器加载xml文档
Document document = builder.parse("books.xml");
// 通过标签名获得元素
NodeList nodeList = document.getElementsByTagName("name");
for (int i = 0; i < nodeList.getLength(); i++) {
Element e = (Element) nodeList.item(i);
// 获取标签名称
System.out.println(e.getNodeName());
// 获取<name>标签子节点
System.out.println(e.getFirstChild().getNodeValue());
// 获取<name>标签类型
System.out.println(e.getNodeType());
}
}
books.xml
<?xml version="1.0" encoding="UTF-8"?>
<books>
<book>
<name>adfafd</name>
<price>123</price>
</book>
<book>
<name>dfadfe</name>
<price>2321</price>
</book>
</books>
<books>
<book>
<name>adfafd</name>
<price>123</price>
</book>
<book>
<name>dfadfe</name>
<price>2321</price>
</book>
</books>
DOM的增加节点操作
public void DOM2Test() throws ParserConfigurationException,255)">IOException,TransformerException,255)">TransformerConfigurationException {
// 创建元素节点
Element time = document.createElement("time");
// 设置节点包含的文本内容
time.setTextContent("2015-12-28");
// 获取欲加入位置
//将节点<time>加入到<name>下
nodeList.item(0).appendChild(time);
// 将新的DOM对象重新写会原文件
TransformerFactory transformerFactory = TransformerFactory
Transformer transformer = transformerFactory.newTransformer();
DOMSource domSource = new DOMSource(document);
StreamResult streamResult = new StreamResult(new File("books.xml"));
transformer.transform(domSource,streamResult);
}
// 创建元素节点
Element time = document.createElement("time");
// 设置节点包含的文本内容
time.setTextContent("2015-12-28");
// 获取欲加入位置
//将节点<time>加入到<name>下
nodeList.item(0).appendChild(time);
// 将新的DOM对象重新写会原文件
TransformerFactory transformerFactory = TransformerFactory
Transformer transformer = transformerFactory.newTransformer();
DOMSource domSource = new DOMSource(document);
StreamResult streamResult = new StreamResult(new File("books.xml"));
transformer.transform(domSource,streamResult);
}
DOM修改节点
// 获取欲修改位置
int nodeListLength = nodeList.getLength();
for (int i = 0; i < nodeListLength; i++) {
if (e.getTextContent().equals("java编程思想")) {
String oldPrice = e.getNextSibling().getTextContent();
int newPrice = (int) (Integer.parseInt(oldPrice) * 1.5);
e.getNextSibling().setTextContent(String.valueOf(newPrice));
int nodeListLength = nodeList.getLength();
for (int i = 0; i < nodeListLength; i++) {
if (e.getTextContent().equals("java编程思想")) {
String oldPrice = e.getNextSibling().getTextContent();
int newPrice = (int) (Integer.parseInt(oldPrice) * 1.5);
e.getNextSibling().setTextContent(String.valueOf(newPrice));
DOM的删除操作
System.out.println(e.getTextContent());
if (e.getTextContent().equals("java编程思想")) {
if (e.getTextContent().equals("java编程思想")) {
e.getParentNode().removeChild(e);
//<name>节点被删除后,<price>节点成为第一个节点,所以要减1
i--;
}
i--;
}
SAX是基于事件驱动的XML处理方法
SAX测试代码:
复写DefaultHandler类
package com.donyu.www;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class MyHandler extends DefaultHandler {
@Override
public void startDocument() throws SAXException {
System.out.println("start Document...");
}
public void startElement(String uri,String localName,String qName,255); white-space:pre">Attributes attributes) throws SAXException {
System.out.println(qName);
if (qName.equals("book"))
//标签的属性以键值对方式存储
System.out.println("id属性为:" + attributes.getValue("id"));
public void characters(char[] ch,int start,int length)
throws SAXException {
String string = new String(ch,start,length);
System.out.println("start character..." + string);
public void endElement(String uri,String qName)
System.out.println("end element..." + qName);
public void endDocument() throws SAXException {
System.out.println("end Document...");
}
}
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class MyHandler extends DefaultHandler {
@Override
public void startDocument() throws SAXException {
System.out.println("start Document...");
}
public void startElement(String uri,String localName,String qName,255); white-space:pre">Attributes attributes) throws SAXException {
System.out.println(qName);
if (qName.equals("book"))
//标签的属性以键值对方式存储
System.out.println("id属性为:" + attributes.getValue("id"));
public void characters(char[] ch,int start,int length)
throws SAXException {
String string = new String(ch,start,length);
System.out.println("start character..." + string);
public void endElement(String uri,String qName)
System.out.println("end element..." + qName);
public void endDocument() throws SAXException {
System.out.println("end Document...");
}
}
@Test
public void SAXTest() throws ParserConfigurationException,255); white-space:pre">IOException {
// 创建解析工厂
SAXParserFactory factory = SAXParserFactory.newInstance();
// 创建解析器
SAXParser saxParser = factory.newSAXParser();
// 初始化回调函数
MyHandler handler = new MyHandler();
public void SAXTest() throws ParserConfigurationException,255); white-space:pre">IOException {
// 创建解析工厂
SAXParserFactory factory = SAXParserFactory.newInstance();
// 创建解析器
SAXParser saxParser = factory.newSAXParser();
// 初始化回调函数
MyHandler handler = new MyHandler();
//将解析的文档和回调函数一并传入
saxParser.parse("books.xml",handler);
}
saxParser.parse("books.xml",handler);
}
STAX测试代码:
测试文档为上面的books.xml
@Test
public void TestPull() throws XmlPullParserException,IOException {
// 创建工厂
XmlPullParserFactory pullParserFactory = XmlPullParserFactory
.newInstance();
// 通过工厂获得解析器
XmlPullParser pullParser = pullParserFactory.newPullParser();
// 将XML文档传入
pullParser.setInput(new FileInputStream("books.xml"),"utf-8");
// 获取事件类型
while (pullParser.getEventType() != XmlPullParser.END_DOCUMENT) {
if (pullParser.getEventType() == XmlPullParser.START_TAG) {
if (pullParser.getName().equals("name"))
System.out.println(pullParser.nextText());
}
pullParser.next();
}
public void TestPull() throws XmlPullParserException,IOException {
// 创建工厂
XmlPullParserFactory pullParserFactory = XmlPullParserFactory
.newInstance();
// 通过工厂获得解析器
XmlPullParser pullParser = pullParserFactory.newPullParser();
// 将XML文档传入
pullParser.setInput(new FileInputStream("books.xml"),"utf-8");
// 获取事件类型
while (pullParser.getEventType() != XmlPullParser.END_DOCUMENT) {
if (pullParser.getEventType() == XmlPullParser.START_TAG) {
if (pullParser.getName().equals("name"))
System.out.println(pullParser.nextText());
}
pullParser.next();
}
public void SerializerTest() throws XmlPullParserException,255); white-space:pre">IllegalArgumentException,IllegalStateException,255); white-space:pre">FileNotFoundException,IOException {
XmlSerializer serializer = pullParserFactory.newSerializer();
// 设置序列化文档
serializer.setOutput(new FileOutputStream("books_blank.xml"),255); white-space:pre">serializer.startDocument("utf-8",true);
serializer.startTag(null,"admin");
serializer.text("root");
serializer.endTag(null,255); white-space:pre">serializer.endDocument();
}
XmlSerializer serializer = pullParserFactory.newSerializer();
// 设置序列化文档
serializer.setOutput(new FileOutputStream("books_blank.xml"),255); white-space:pre">serializer.startDocument("utf-8",true);
serializer.startTag(null,"admin");
serializer.text("root");
serializer.endTag(null,255); white-space:pre">serializer.endDocument();
}