1
2
3
4 package net.sourceforge.pmd.lang.xml.ast;
5
6 import java.util.ArrayList;
7 import java.util.Collections;
8 import java.util.HashMap;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.regex.Matcher;
12 import java.util.regex.Pattern;
13
14 import org.apache.commons.lang3.StringUtils;
15 import org.w3c.dom.Document;
16 import org.w3c.dom.DocumentType;
17 import org.w3c.dom.NamedNodeMap;
18 import org.w3c.dom.Node;
19 import org.w3c.dom.NodeList;
20 import org.w3c.dom.ProcessingInstruction;
21
22
23
24
25 class DOMLineNumbers {
26 private final Document document;
27 private String xmlString;
28 private List<Integer> lines;
29
30 public DOMLineNumbers(Document document, String xmlString) {
31 this.document = document;
32 this.xmlString = xmlString;
33 }
34
35 public void determine() {
36 calculateLinesMap();
37 determineLocation(document, 0);
38 }
39 private int determineLocation(Node n, int index) {
40 int nextIndex = index;
41 if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
42 nextIndex = xmlString.indexOf("<!DOCTYPE", nextIndex);
43 } else if (n.getNodeType() == Node.COMMENT_NODE) {
44 nextIndex = xmlString.indexOf("<!--", nextIndex);
45 } else if (n.getNodeType() == Node.ELEMENT_NODE) {
46 nextIndex = xmlString.indexOf("<" + n.getNodeName(), nextIndex);
47 } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
48 nextIndex = xmlString.indexOf("<![CDATA[", nextIndex);
49 } else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
50 ProcessingInstruction pi = (ProcessingInstruction)n;
51 nextIndex = xmlString.indexOf("<?" + pi.getTarget(), nextIndex);
52 } else if (n.getNodeType() == Node.TEXT_NODE) {
53 String te = unexpandEntities(n, n.getNodeValue());
54 int newIndex = xmlString.indexOf(te, nextIndex);
55 if (newIndex > 0) {
56 nextIndex = newIndex;
57 }
58 } else if (n.getNodeType() == Node.ENTITY_REFERENCE_NODE) {
59 nextIndex = xmlString.indexOf("&" + n.getNodeName() + ";", nextIndex);
60 }
61 setBeginLocation(n, nextIndex);
62 if (n.hasChildNodes()) {
63 NodeList childs = n.getChildNodes();
64 for (int i = 0; i < childs.getLength(); i++) {
65 nextIndex = determineLocation(childs.item(i), nextIndex);
66 }
67 }
68 if (n.getNodeType() == Node.ELEMENT_NODE) {
69 nextIndex += 2 + n.getNodeName().length() + 1;
70 } else if (n.getNodeType() == Node.DOCUMENT_TYPE_NODE) {
71 Node nextSibling = n.getNextSibling();
72 if (nextSibling.getNodeType() == Node.ELEMENT_NODE) {
73 nextIndex = xmlString.indexOf("<" + nextSibling.getNodeName(), nextIndex) - 1;
74 } else if (nextSibling.getNodeType() == Node.COMMENT_NODE) {
75 nextIndex = xmlString.indexOf("<!--", nextIndex);
76 } else {
77 nextIndex = xmlString.indexOf(">", nextIndex);
78 }
79 } else if (n.getNodeType() == Node.COMMENT_NODE) {
80 nextIndex += 4 + 3;
81 nextIndex += n.getNodeValue().length();
82 } else if (n.getNodeType() == Node.TEXT_NODE) {
83 String te = unexpandEntities(n, n.getNodeValue());
84 nextIndex += te.length();
85 } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
86 nextIndex += "<![CDATA[".length() + n.getNodeValue().length() + "]]>".length();
87 } else if (n.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) {
88 ProcessingInstruction pi = (ProcessingInstruction)n;
89 nextIndex += "<?".length() + pi.getTarget().length() + "?>".length() + pi.getData().length();
90 }
91 setEndLocation(n, nextIndex - 1);
92 return nextIndex;
93 }
94
95 private String unexpandEntities(Node n, String te) {
96 String result = te;
97 DocumentType doctype = n.getOwnerDocument().getDoctype();
98
99 result = result.replaceAll(Matcher.quoteReplacement("&"), "&");
100 result = result.replaceAll(Matcher.quoteReplacement("<"), "<");
101 result = result.replaceAll(Matcher.quoteReplacement(">"), ">");
102 result = result.replaceAll(Matcher.quoteReplacement("\""), """);
103 result = result.replaceAll(Matcher.quoteReplacement("'"), "'");
104
105 if (doctype != null) {
106 NamedNodeMap entities = doctype.getEntities();
107 String internalSubset = doctype.getInternalSubset();
108 if (internalSubset == null) {
109 internalSubset = "";
110 }
111 for (int i = 0; i < entities.getLength(); i++) {
112 Node item = entities.item(i);
113 String entityName = item.getNodeName();
114 Node firstChild = item.getFirstChild();
115 if (firstChild != null) {
116 result = result.replaceAll(Matcher.quoteReplacement(firstChild.getNodeValue()), "&" + entityName + ";");
117 } else {
118 Matcher m = Pattern.compile(Matcher.quoteReplacement("<!ENTITY " + entityName + " ") + "[']([^']*)[']>").matcher(internalSubset);
119 if (m.find()) {
120 result = result.replaceAll(Matcher.quoteReplacement(m.group(1)), "&" + entityName + ";");
121 }
122 }
123 }
124 }
125 return result;
126 }
127 private void setBeginLocation(Node n, int index) {
128 if (n != null) {
129 int line = toLine(index);
130 n.setUserData(XmlNode.BEGIN_LINE, line, null);
131 n.setUserData(XmlNode.BEGIN_COLUMN, toColumn(line, index), null);
132 }
133 }
134 private void setEndLocation(Node n, int index) {
135 if (n != null) {
136 int line = toLine(index);
137 n.setUserData(XmlNode.END_LINE, line, null);
138 n.setUserData(XmlNode.END_COLUMN, toColumn(line, index), null);
139 }
140 }
141
142
143
144
145 private void calculateLinesMap() {
146 lines = new ArrayList<Integer>();
147
148 int index = -1;
149 int count = StringUtils.countMatches(xmlString, "\n");
150 for (int line = 1; line <= count; line++) {
151 lines.add(index + 1);
152 index = xmlString.indexOf("\n", index + 1);
153 }
154 lines.add(index + 1);
155 }
156
157 private int toLine(int index) {
158 int low = 0;
159 int high = lines.size() - 1;
160
161
162 while (low <= high) {
163 int middle = (low + high) / 2;
164 int middleStart = lines.get(middle);
165 if (middleStart == index) {
166 return middle + 1;
167 }
168
169 if (middleStart > index) {
170 high = middle - 1;
171 } else {
172 low = middle + 1;
173 }
174 }
175
176 return low;
177 }
178
179 private int toColumn(int line, int index) {
180 Integer lineStart = lines.get(line - 1);
181 if (lineStart == null) {
182 lineStart = lines.get(lines.size() - 1);
183 }
184 int column = index - lineStart;
185 return column + 1;
186 }
187 }