DOM方式与SAX方式的比较

来源:互联网 发布:php fix pathinfo 编辑:程序博客网 时间:2024/06/02 12:55

DOM方式


package tigers;

import java.util.*;

import javax.xml.parsers.*;
import org.w3c.dom.*;
import java.io.*;

public class Tiger23 {
    static class Bean23 {
        Map<String, String> attrs; //标记属性
        String name, value; //标记名称和文本内容
        public Bean23(String name, String value, Map<String, String> attrs) {
            this.name = name;
            this.value = value;
            this.attrs = attrs;
        }

        public Map<String, String> getAttrs() {
            return attrs;
        }

        public void setAttrs(Map<String, String> attrs) {
            this.attrs = attrs;
        }

        public String getName() {
            return name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public String getValue() {
            return value;
        }

        public void setValue(String value) {
            this.value = value;
        }
        public String toString() {
            Set<String> keySet = attrs.keySet();
            String str = "";
            for (String key : keySet) {
                String value = attrs.get(key);
                str += " " + key + "='" + value + "'";
            }
            return "/nElement:<" + name + ", <" + value + ">" + ",(" + str + ")>";
        }
    }
    static Map<String, String> getElementAttrs(Element element) {
        Map<String, String> attrs = new HashMap<String, String>();
        if (element.hasAttributes()) {
            NamedNodeMap maps = element.getAttributes();
            for (int i = 0; i < maps.getLength(); i++) {
                Attr attr = (Attr) maps.item(i);
                attrs.put(attr.getName(), attr.getValue());
            }
        }
        return attrs;
    }
    static Collection<Bean23> getElements(Document doc) {
        Collection<Bean23> elements = new ArrayList<Bean23> ();
        Element rootElement = doc.getDocumentElement();
        elements = circle(rootElement, elements);
        return elements;
    }
    static Collection<Bean23> circle(Element element, Collection<Bean23> elements) {
        String name = element.getTagName();
        String value = "";
        Map<String, String> attrs = getElementAttrs(element);
        Bean23 bean = new Bean23(name, value, attrs);
        elements.add(bean);
        NodeList list = element.getChildNodes();
        for (int i = 0; i < list.getLength(); i++) {
            Node node = list.item(i);
            if (node.hasChildNodes()) {
                Element childElement = (Element) node;
                elements = circle(childElement, elements);
                continue;
            } else if (node.getNodeType() != Node.COMMENT_NODE && (node.getNodeValue() != null)) {
                value += node.getNodeValue().trim();
            }
            bean.setValue(value);
        }
        return elements;
    }
    static Collection<Bean23> parse(String xmlFile) {
        Collection<Bean23> elements = null;
        try {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = factory.newDocumentBuilder();
            Document doc = builder.parse(new File(xmlFile));
            elements = getElements(doc);

        } catch (Exception e) {
            e.printStackTrace();
        }
        return elements;
    }
    public static void main(String[] args) {
        Collection<Bean23> elements = new ArrayList<Bean23> ();
        long start = System.currentTimeMillis();
        for (int i = 0; i < 100; i++) {
            elements = parse("c:/test.xml");
            System.gc();
        }
        long end = System.currentTimeMillis();
        System.out.println("<DOM方式>past time: " + (end - start));
        System.out.println(elements);
    }
}

SAX方式


package tigers;

import java.util.*;

import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.Attributes;
import org.xml.sax.XMLReader;

import java.io.*;
import org.xml.sax.InputSource;


public class Tiger22 {
    static class Bean22 { //代表XML中的一个标记。
        private String uri, localName, qName, value = "";
        private Map<String, String> attrs = new HashMap<String, String> (); //当前标记的属性集合
        public Bean22(String uri, String localName, String qName, Map<String, String> attrs) {
            this.uri = uri;
            this.localName = localName;
            this.qName = qName;
            this.attrs = attrs;
        }
      
        public String getValue() {
            return value;
        }
       
        public void setValue(String value) {
            this.value = value;
        }
       
        public Map getAttrs() {
            return attrs;
        }
       
        public String getLocalName() {
            return localName;
        }
       
        public String getQName() {
            return qName;
        }
       
        public String getUri() {
            return uri;
        }
        public String toString() {
            Set<String> keySet = attrs.keySet();
            String str = "";
            for (String key : keySet) {
                String value = attrs.get(key);
                str += " " + key + "='" + value + "'";
            }
            return "/nElement:<" + localName + ", <" + value + ">" + ",(" + str + ")>";
        }
    }
    static class Inner extends DefaultHandler {
        private Bean22 currentElement; //当前标记
        private String text; //当前标记的文本内容
        private Collection<Bean22> elements = new ArrayList<Bean22> (); //存放所有的标记
        public Collection<Bean22> parse(String xmlFile) {
            try {
                XMLReader reader = org.xml.sax.helpers.XMLReaderFactory.createXMLReader();
                reader.setContentHandler(this);
                reader.parse(new InputSource(new FileReader(xmlFile)));
            } catch (Exception e) {
                e.printStackTrace();
            }
            return elements;
        }

        /*这里使用java.util.Map存放属性信息。注意:org.xml.sax.Attributes原本就是集合类型,但由于SAX工作的方式,决定了如果直接使用Attributes存放属性信息,后来解析的标记的属性信息可能覆盖先前解析的标记的属性信息,导致问题出现。所以这里使用Map替代Attributes存放属性信息,可以很好地避免上述问题的发生。*/
        public void startElement(String uri, String localName, String qName, Attributes attrs) {
            Map<String, String> mapAttrs = new HashMap<String, String> (); 
            for (int i = 0; i < attrs.getLength(); i++) {
                mapAttrs.put(attrs.getQName(i), attrs.getValue(i));
            }
            currentElement = new Bean22(uri, localName, qName, mapAttrs);
            elements.add(currentElement);
            text = new String();
        }
   
        public void characters(char[] ch, int start, int end) {
            text += new String(ch, start, end).trim();
            currentElement.setValue(text);
        }
        public void endElement(String uri, String localName, String qName) {
            //不需要实现任何代码。
        }
    }
    public static void main(String[] args) {
        Collection<Bean22> elements = new ArrayList<Bean22> ();
        long start = System.currentTimeMillis();
        for (int i = 0; i < 100; i++) {
            Inner in = new Inner();
            elements = in.parse("c:/test.xml");
            System.gc();
        }
        long end = System.currentTimeMillis();
        System.out.println("<SAX方式>past time: " + (end - start));
        System.out.println(elements);
    }
}

测试文件及测试结果

c:/test.xml

<?xml version="1.0" encoding="UTF-8"?>
<root>
    <elements id="001">
        <!-- element Comment -->
        <element name="aaaa">
            AAAAAAAAA
        </element>
        <element name="bbbb">
            BBBBBBBBB
        </element>
    </elements>
    <mappings>
        <mapping id="1234" elementName="aaaa">
            11223344
        </mapping>
        <mapping id="5678" elementName="aaaa">
            55667788
        </mapping>
    </mappings>
</root>

 

结果:

 

<SAX方式>past time: 5750
[
Element:<root, <>,()>,
Element:<elements, <>,( id='001')>,
Element:<element, <AAAAAAAAA>,( name='aaaa')>,
Element:<element, <BBBBBBBBB>,( name='bbbb')>,
Element:<mappings, <>,()>,
Element:<mapping, <11223344>,( elementName='aaaa' id='1234')>,
Element:<mapping, <55667788>,( elementName='aaaa' id='5678')>]

结果:

 

<DOM方式>past time: 5907
[
Element:<root, <>,()>,
Element:<elements, <>,( id='001')>,
Element:<element, <AAAAAAAAA>,( name='aaaa')>,
Element:<element, <BBBBBBBBB>,( name='bbbb')>,
Element:<mappings, <>,()>,
Element:<mapping, <11223344>,( elementName='aaaa' id='1234')>,
Element:<mapping, <55667788>,( elementName='aaaa' id='5678')>]

结论:

1、使用任何一种方式,第一次解析时都比较耗时,连续解析多次后平均所用时间有很大减少。

2、对于小文件,两者看不出很大差别,SAX甚至比DOM更耗时;文件越大,SAX的优势越大。

3、就编程难度而言,SAX比DOM更容易实现。

原创粉丝点击