View Javadoc
1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.lang.xml.ast;
5   
6   import java.util.ArrayList;
7   import java.util.Collections;
8   import java.util.HashMap;
9   import java.util.List;
10  import java.util.Map;
11  import java.util.regex.Matcher;
12  import java.util.regex.Pattern;
13  
14  import org.apache.commons.lang3.StringUtils;
15  import org.w3c.dom.Document;
16  import org.w3c.dom.DocumentType;
17  import org.w3c.dom.NamedNodeMap;
18  import org.w3c.dom.Node;
19  import org.w3c.dom.NodeList;
20  import org.w3c.dom.ProcessingInstruction;
21  
22  /**
23   *
24   */
25  class DOMLineNumbers {
26      private final Document document;
27      private String xmlString;
28      private List<Integer> lines;
29  
30      public DOMLineNumbers(Document document, String xmlString) {
31          this.document = document;
32          this.xmlString = xmlString;
33      }
34      
35      public void determine() {
36          calculateLinesMap();
37          determineLocation(document, 0);
38      }
39      private int determineLocation(Node n, int index) {
40          int nextIndex = index;
41          if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
42              nextIndex = xmlString.indexOf("<!DOCTYPE", nextIndex);
43          } else if (n.getNodeType() == Node.COMMENT_NODE) {
44              nextIndex = xmlString.indexOf("<!--", nextIndex);
45          } else if (n.getNodeType() == Node.ELEMENT_NODE) {
46              nextIndex = xmlString.indexOf("<" + n.getNodeName(), nextIndex);
47          } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
48              nextIndex = xmlString.indexOf("<![CDATA[", nextIndex);
49          } else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
50              ProcessingInstruction pi = (ProcessingInstruction)n;
51              nextIndex = xmlString.indexOf("<?" + pi.getTarget(), nextIndex);
52          } else if (n.getNodeType() == Node.TEXT_NODE) {
53              String te = unexpandEntities(n, n.getNodeValue());
54              int newIndex = xmlString.indexOf(te, nextIndex);
55              if (newIndex > 0) {
56                  nextIndex = newIndex;
57              }
58          } else if (n.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
59              nextIndex = xmlString.indexOf("&" + n.getNodeName() + ";", nextIndex);
60          }
61          setBeginLocation(n, nextIndex);
62          if (n.hasChildNodes()) {
63              NodeList childs = n.getChildNodes();
64              for (int i = 0; i < childs.getLength(); i++) {
65                  nextIndex = determineLocation(childs.item(i), nextIndex);
66              }
67          }
68          if (n.getNodeType() == Node.ELEMENT_NODE) {
69              nextIndex += 2 + n.getNodeName().length() + 1; // </nodename>
70          } else if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
71              Node nextSibling = n.getNextSibling();
72              if (nextSibling.getNodeType() == Node.ELEMENT_NODE) {
73                  nextIndex = xmlString.indexOf("<" + nextSibling.getNodeName(), nextIndex) - 1;
74              } else if (nextSibling.getNodeType() == Node.COMMENT_NODE) {
75                  nextIndex = xmlString.indexOf("<!--", nextIndex);
76              } else {
77                  nextIndex = xmlString.indexOf(">", nextIndex);
78              }
79          } else if (n.getNodeType() == Node.COMMENT_NODE) {
80              nextIndex += 4 + 3; // <!-- and -->
81              nextIndex += n.getNodeValue().length();
82          } else if (n.getNodeType() == Node.TEXT_NODE) {
83              String te = unexpandEntities(n, n.getNodeValue());
84              nextIndex += te.length();
85          } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
86              nextIndex += "<![CDATA[".length() + n.getNodeValue().length() + "]]>".length();
87          } else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
88              ProcessingInstruction pi = (ProcessingInstruction)n;
89              nextIndex += "<?".length() + pi.getTarget().length() + "?>".length() + pi.getData().length();
90          }
91          setEndLocation(n, nextIndex - 1);
92          return nextIndex;
93      }
94  
95      private String unexpandEntities(Node n, String te) {
96          String result = te;
97          DocumentType doctype = n.getOwnerDocument().getDoctype();
98          // implicit entities
99          result = result.replaceAll(Matcher.quoteReplacement("&"), "&amp;");
100         result = result.replaceAll(Matcher.quoteReplacement("<"), "&lt;");
101         result = result.replaceAll(Matcher.quoteReplacement(">"), "&gt;");
102         result = result.replaceAll(Matcher.quoteReplacement("\""), "&quot;");
103         result = result.replaceAll(Matcher.quoteReplacement("'"), "&apos;");
104 
105         if (doctype != null) {
106             NamedNodeMap entities = doctype.getEntities();
107             String internalSubset = doctype.getInternalSubset();
108             if (internalSubset == null) {
109                 internalSubset = "";
110             }
111             for (int i = 0; i < entities.getLength(); i++) {
112                 Node item = entities.item(i);
113                 String entityName = item.getNodeName();
114                 Node firstChild = item.getFirstChild();
115                 if (firstChild != null) {
116                     result = result.replaceAll(Matcher.quoteReplacement(firstChild.getNodeValue()), "&" + entityName + ";");
117                 } else {
118                     Matcher m = Pattern.compile(Matcher.quoteReplacement("<!ENTITY " + entityName + " ") + "[']([^']*)[']>").matcher(internalSubset);
119                     if (m.find()) {
120                         result = result.replaceAll(Matcher.quoteReplacement(m.group(1)), "&" + entityName + ";");
121                     }
122                 }
123             }
124         }
125         return result;
126     }
127     private void setBeginLocation(Node n, int index) {
128         if (n != null) {
129             int line = toLine(index);
130             n.setUserData(XmlNode.BEGIN_LINE, line, null);
131             n.setUserData(XmlNode.BEGIN_COLUMN, toColumn(line, index), null);
132         }
133     }
134     private void setEndLocation(Node n, int index) {
135         if (n != null) {
136             int line = toLine(index);
137             n.setUserData(XmlNode.END_LINE, line, null);
138             n.setUserData(XmlNode.END_COLUMN, toColumn(line, index), null);
139         }
140     }
141     
142     /**
143      * Calculates a list with the file offsets for each line.
144      */
145     private void calculateLinesMap() {
146         lines = new ArrayList<Integer>();
147 
148         int index = -1;
149         int count = StringUtils.countMatches(xmlString, "\n");
150         for (int line = 1; line <= count; line++) {
151             lines.add(index + 1);
152             index = xmlString.indexOf("\n", index + 1); // fast forward till end of current line
153         }
154         lines.add(index + 1);
155     }
156 
157     private int toLine(int index) {
158         int low = 0;
159         int high = lines.size() - 1;
160 
161         // binary search the best match
162         while (low <= high) {
163             int middle = (low + high) / 2;
164             int middleStart = lines.get(middle);
165             if (middleStart == index) {
166                 return middle + 1; // found
167             }
168 
169             if (middleStart > index) {
170                 high = middle - 1;
171             } else {
172                 low = middle + 1;
173             }
174         }
175 
176         return low; // not found or last checked line, which is the best match;
177     }
178 
179     private int toColumn(int line, int index) {
180         Integer lineStart = lines.get(line - 1);
181         if (lineStart == null) {
182             lineStart = lines.get(lines.size() - 1);
183         }
184         int column = index - lineStart;
185         return column + 1;
186     }
187 }