|
| 1 | +package datadog.trace.bootstrap.instrumentation; |
| 2 | + |
| 3 | +import java.io.StringReader; |
| 4 | +import java.util.ArrayList; |
| 5 | +import java.util.Collections; |
| 6 | +import java.util.HashMap; |
| 7 | +import java.util.List; |
| 8 | +import java.util.Map; |
| 9 | +import javax.xml.parsers.DocumentBuilder; |
| 10 | +import javax.xml.parsers.DocumentBuilderFactory; |
| 11 | +import org.w3c.dom.Attr; |
| 12 | +import org.w3c.dom.Document; |
| 13 | +import org.w3c.dom.Element; |
| 14 | +import org.w3c.dom.NamedNodeMap; |
| 15 | +import org.w3c.dom.Node; |
| 16 | +import org.w3c.dom.NodeList; |
| 17 | +import org.w3c.dom.Text; |
| 18 | +import org.xml.sax.InputSource; |
| 19 | + |
| 20 | +/** |
| 21 | + * Utility class for converting W3C DOM XML structures to Map/List representations that are |
| 22 | + * compatible with WAF analysis and schema extraction. |
| 23 | + * |
| 24 | + * <p>This centralized utility eliminates code duplication across multiple instrumentation modules |
| 25 | + * that need to process XML content for AppSec analysis. |
| 26 | + */ |
| 27 | +public final class XmlDomUtils { |
| 28 | + |
| 29 | + /** Default maximum recursion depth for XML DOM conversion to prevent stack overflow. */ |
| 30 | + public static final int DEFAULT_MAX_CONVERSION_DEPTH = 15; |
| 31 | + |
| 32 | + private XmlDomUtils() { |
| 33 | + // Utility class - prevent instantiation |
| 34 | + } |
| 35 | + |
| 36 | + /** |
| 37 | + * Convert a W3C DOM Document to a WAF-compatible Map/List structure using the default recursion |
| 38 | + * depth. |
| 39 | + * |
| 40 | + * @param document the XML document to convert |
| 41 | + * @return converted structure wrapped in a list for consistency, or null if document is null |
| 42 | + */ |
| 43 | + public static Object convertDocument(Document document) { |
| 44 | + return convertDocument(document, DEFAULT_MAX_CONVERSION_DEPTH); |
| 45 | + } |
| 46 | + |
| 47 | + /** |
| 48 | + * Convert a W3C DOM Document to a WAF-compatible Map/List structure. |
| 49 | + * |
| 50 | + * @param document the XML document to convert |
| 51 | + * @param maxRecursion maximum recursion depth to prevent stack overflow |
| 52 | + * @return converted structure wrapped in a list for consistency, or null if document is null |
| 53 | + */ |
| 54 | + public static Object convertDocument(Document document, int maxRecursion) { |
| 55 | + if (document == null) { |
| 56 | + return null; |
| 57 | + } |
| 58 | + |
| 59 | + return convertW3cNode(document.getDocumentElement(), maxRecursion); |
| 60 | + } |
| 61 | + |
| 62 | + /** |
| 63 | + * Convert a W3C DOM Element to a WAF-compatible Map/List structure using the default recursion |
| 64 | + * depth. |
| 65 | + * |
| 66 | + * @param element the XML element to convert |
| 67 | + * @return converted structure wrapped in a list for consistency, or null if element is null |
| 68 | + */ |
| 69 | + public static Object convertElement(Element element) { |
| 70 | + return convertElement(element, DEFAULT_MAX_CONVERSION_DEPTH); |
| 71 | + } |
| 72 | + |
| 73 | + /** |
| 74 | + * Convert a W3C DOM Element to a WAF-compatible Map/List structure. |
| 75 | + * |
| 76 | + * @param element the XML element to convert |
| 77 | + * @param maxRecursion maximum recursion depth to prevent stack overflow |
| 78 | + * @return converted structure wrapped in a list for consistency, or null if element is null |
| 79 | + */ |
| 80 | + public static Object convertElement(Element element, int maxRecursion) { |
| 81 | + if (element == null) { |
| 82 | + return null; |
| 83 | + } |
| 84 | + |
| 85 | + return convertW3cNode(element, maxRecursion); |
| 86 | + } |
| 87 | + |
| 88 | + /** |
| 89 | + * Convert a W3C DOM Node to a WAF-compatible Map/List structure. |
| 90 | + * |
| 91 | + * <p>This method recursively processes XML nodes, converting: - Elements to Maps with |
| 92 | + * "attributes" and "children" keys - Text nodes to their trimmed string content - Other node |
| 93 | + * types are ignored (return null) |
| 94 | + * |
| 95 | + * @param node the XML node to convert |
| 96 | + * @param maxRecursion maximum recursion depth to prevent stack overflow |
| 97 | + * @return Map for elements, String for text nodes, null for other types or when maxRecursion <= 0 |
| 98 | + */ |
| 99 | + public static Object convertW3cNode(Node node, int maxRecursion) { |
| 100 | + if (node == null || maxRecursion <= 0) { |
| 101 | + return null; |
| 102 | + } |
| 103 | + |
| 104 | + if (node instanceof Element) { |
| 105 | + return convertElementNode((Element) node, maxRecursion); |
| 106 | + } else if (node instanceof Text) { |
| 107 | + return convertTextNode((Text) node); |
| 108 | + } |
| 109 | + |
| 110 | + // Ignore other node types (comments, processing instructions, etc.) |
| 111 | + return null; |
| 112 | + } |
| 113 | + |
| 114 | + /** Convert an Element node to a Map with attributes and children. */ |
| 115 | + private static Map<String, Object> convertElementNode(Element element, int maxRecursion) { |
| 116 | + Map<String, String> attributes = Collections.emptyMap(); |
| 117 | + if (element.hasAttributes()) { |
| 118 | + attributes = new HashMap<>(); |
| 119 | + NamedNodeMap attrMap = element.getAttributes(); |
| 120 | + for (int i = 0; i < attrMap.getLength(); i++) { |
| 121 | + Attr item = (Attr) attrMap.item(i); |
| 122 | + attributes.put(item.getName(), item.getValue()); |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + List<Object> children = Collections.emptyList(); |
| 127 | + if (element.hasChildNodes()) { |
| 128 | + NodeList childNodes = element.getChildNodes(); |
| 129 | + children = new ArrayList<>(childNodes.getLength()); |
| 130 | + for (int i = 0; i < childNodes.getLength(); i++) { |
| 131 | + Node item = childNodes.item(i); |
| 132 | + Object childResult = convertW3cNode(item, maxRecursion - 1); |
| 133 | + if (childResult != null) { |
| 134 | + children.add(childResult); |
| 135 | + } |
| 136 | + } |
| 137 | + } |
| 138 | + |
| 139 | + Map<String, Object> repr = new HashMap<>(); |
| 140 | + if (!attributes.isEmpty()) { |
| 141 | + repr.put("attributes", attributes); |
| 142 | + } |
| 143 | + if (!children.isEmpty()) { |
| 144 | + repr.put("children", children); |
| 145 | + } |
| 146 | + return repr; |
| 147 | + } |
| 148 | + |
| 149 | + /** Convert a Text node to its trimmed string content. */ |
| 150 | + private static String convertTextNode(Text textNode) { |
| 151 | + String textContent = textNode.getTextContent(); |
| 152 | + if (textContent != null) { |
| 153 | + textContent = textContent.trim(); |
| 154 | + if (!textContent.isEmpty()) { |
| 155 | + return textContent; |
| 156 | + } |
| 157 | + } |
| 158 | + return null; |
| 159 | + } |
| 160 | + |
| 161 | + /** |
| 162 | + * Check if a string contains XML content by examining both strings and DOM objects. |
| 163 | + * |
| 164 | + * @param obj the object to check |
| 165 | + * @return true if the object contains XML content, false otherwise |
| 166 | + */ |
| 167 | + public static boolean isXmlContent(Object obj) { |
| 168 | + if (obj == null) { |
| 169 | + return false; |
| 170 | + } |
| 171 | + |
| 172 | + // Check for W3C DOM XML objects |
| 173 | + if (obj instanceof Document || obj instanceof Element || obj instanceof Node) { |
| 174 | + return true; |
| 175 | + } |
| 176 | + |
| 177 | + // Check for XML string content |
| 178 | + if (obj instanceof String) { |
| 179 | + String content = (String) obj; |
| 180 | + if (content.trim().isEmpty()) { |
| 181 | + return false; |
| 182 | + } |
| 183 | + String trimmed = content.trim(); |
| 184 | + |
| 185 | + // Explicitly exclude JSON content |
| 186 | + if (trimmed.startsWith("{") || trimmed.startsWith("[")) { |
| 187 | + return false; |
| 188 | + } |
| 189 | + |
| 190 | + // Check for XML declaration |
| 191 | + if (trimmed.startsWith("<?xml")) { |
| 192 | + return true; |
| 193 | + } |
| 194 | + |
| 195 | + // Check for XML element (must start with < and end with >, and contain at least one closing |
| 196 | + // tag or self-closing tag) |
| 197 | + if (trimmed.startsWith("<") |
| 198 | + && trimmed.endsWith(">") |
| 199 | + && (trimmed.contains("</") || trimmed.contains("/>"))) { |
| 200 | + return true; |
| 201 | + } |
| 202 | + } |
| 203 | + |
| 204 | + return false; |
| 205 | + } |
| 206 | + |
| 207 | + /** |
| 208 | + * Check if a string contains XML content by looking for XML declaration or root element. |
| 209 | + * |
| 210 | + * @param content the string content to check |
| 211 | + * @return true if the string contains XML content, false otherwise |
| 212 | + */ |
| 213 | + public static boolean isXmlContent(String content) { |
| 214 | + if (content == null || content.trim().isEmpty()) { |
| 215 | + return false; |
| 216 | + } |
| 217 | + String trimmed = content.trim(); |
| 218 | + |
| 219 | + // Explicitly exclude JSON content |
| 220 | + if (trimmed.startsWith("{") || trimmed.startsWith("[")) { |
| 221 | + return false; |
| 222 | + } |
| 223 | + |
| 224 | + return trimmed.startsWith("<?xml") |
| 225 | + || (trimmed.startsWith("<") |
| 226 | + && trimmed.endsWith(">") |
| 227 | + && (trimmed.contains("</") || trimmed.contains("/>"))); |
| 228 | + } |
| 229 | + |
| 230 | + /** |
| 231 | + * Process XML content (strings or DOM objects) for WAF compatibility using the default recursion |
| 232 | + * depth. This ensures XML attack payloads are properly detected by the WAF. |
| 233 | + * |
| 234 | + * @param xmlObj the XML object to process (can be Document, Element, Node, or String) |
| 235 | + * @return processed XML structure compatible with WAF analysis, or null if processing fails |
| 236 | + */ |
| 237 | + public static Object processXmlForWaf(Object xmlObj) { |
| 238 | + return processXmlForWaf(xmlObj, DEFAULT_MAX_CONVERSION_DEPTH); |
| 239 | + } |
| 240 | + |
| 241 | + /** |
| 242 | + * Process XML content (strings or DOM objects) for WAF compatibility. This ensures XML attack |
| 243 | + * payloads are properly detected by the WAF. |
| 244 | + * |
| 245 | + * @param xmlObj the XML object to process (can be Document, Element, Node, or String) |
| 246 | + * @param maxRecursion maximum recursion depth to prevent stack overflow |
| 247 | + * @return processed XML structure compatible with WAF analysis, or null if processing fails |
| 248 | + */ |
| 249 | + public static Object processXmlForWaf(Object xmlObj, int maxRecursion) { |
| 250 | + if (xmlObj == null) { |
| 251 | + return null; |
| 252 | + } |
| 253 | + |
| 254 | + // Handle W3C DOM objects directly |
| 255 | + if (xmlObj instanceof Document) { |
| 256 | + return convertDocument((Document) xmlObj, maxRecursion); |
| 257 | + } |
| 258 | + |
| 259 | + if (xmlObj instanceof Element) { |
| 260 | + return convertElement((Element) xmlObj, maxRecursion); |
| 261 | + } |
| 262 | + |
| 263 | + if (xmlObj instanceof Node) { |
| 264 | + // Return the converted node directly |
| 265 | + return convertW3cNode((Node) xmlObj, maxRecursion); |
| 266 | + } |
| 267 | + |
| 268 | + // Handle XML strings by parsing them first |
| 269 | + if (xmlObj instanceof String) { |
| 270 | + try { |
| 271 | + return parseXmlStringToWafFormat((String) xmlObj, maxRecursion); |
| 272 | + } catch (Exception e) { |
| 273 | + // Return null if parsing fails - let caller handle logging |
| 274 | + return null; |
| 275 | + } |
| 276 | + } |
| 277 | + |
| 278 | + return null; |
| 279 | + } |
| 280 | + |
| 281 | + /** |
| 282 | + * Convert XML string to WAF-compatible format using the default recursion depth. This ensures XML |
| 283 | + * attack payloads are properly detected by the WAF. |
| 284 | + * |
| 285 | + * @param xmlContent the XML string content to parse |
| 286 | + * @return parsed XML structure compatible with WAF analysis |
| 287 | + * @throws Exception if XML parsing fails |
| 288 | + */ |
| 289 | + public static Object parseXmlStringToWafFormat(String xmlContent) throws Exception { |
| 290 | + return parseXmlStringToWafFormat(xmlContent, DEFAULT_MAX_CONVERSION_DEPTH); |
| 291 | + } |
| 292 | + |
| 293 | + /** |
| 294 | + * Convert XML string to WAF-compatible format following Spring framework pattern. This ensures |
| 295 | + * XML attack payloads are properly detected by the WAF. |
| 296 | + * |
| 297 | + * @param xmlContent the XML string content to parse |
| 298 | + * @param maxRecursion maximum recursion depth to prevent stack overflow |
| 299 | + * @return parsed XML structure compatible with WAF analysis |
| 300 | + * @throws Exception if XML parsing fails |
| 301 | + */ |
| 302 | + public static Object parseXmlStringToWafFormat(String xmlContent, int maxRecursion) |
| 303 | + throws Exception { |
| 304 | + if (xmlContent == null || xmlContent.trim().isEmpty()) { |
| 305 | + return null; |
| 306 | + } |
| 307 | + |
| 308 | + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
| 309 | + // Security settings to prevent XXE attacks during parsing |
| 310 | + factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); |
| 311 | + factory.setFeature("http://xml.org/sax/features/external-general-entities", false); |
| 312 | + factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); |
| 313 | + factory.setExpandEntityReferences(false); |
| 314 | + |
| 315 | + DocumentBuilder builder = factory.newDocumentBuilder(); |
| 316 | + Document document = builder.parse(new InputSource(new StringReader(xmlContent))); |
| 317 | + |
| 318 | + return convertDocument(document, maxRecursion); |
| 319 | + } |
| 320 | + |
| 321 | + /** |
| 322 | + * Convert XML string to WAF-compatible format using the default recursion depth. This is a |
| 323 | + * convenience method that wraps parseXmlStringToWafFormat and handles exceptions internally. |
| 324 | + * |
| 325 | + * @param xmlContent the XML string content to handle |
| 326 | + * @return parsed XML structure compatible with WAF analysis, or null if parsing fails |
| 327 | + */ |
| 328 | + public static Object handleXmlString(String xmlContent) { |
| 329 | + return handleXmlString(xmlContent, DEFAULT_MAX_CONVERSION_DEPTH); |
| 330 | + } |
| 331 | + |
| 332 | + /** |
| 333 | + * Convert XML string to WAF-compatible format. This is a convenience method that wraps |
| 334 | + * parseXmlStringToWafFormat and handles exceptions internally. |
| 335 | + * |
| 336 | + * @param xmlContent the XML string content to handle |
| 337 | + * @param maxRecursion maximum recursion depth to prevent stack overflow |
| 338 | + * @return parsed XML structure compatible with WAF analysis, or null if parsing fails |
| 339 | + */ |
| 340 | + public static Object handleXmlString(String xmlContent, int maxRecursion) { |
| 341 | + try { |
| 342 | + return parseXmlStringToWafFormat(xmlContent, maxRecursion); |
| 343 | + } catch (Exception e) { |
| 344 | + // Return null if parsing fails - let caller handle logging |
| 345 | + return null; |
| 346 | + } |
| 347 | + } |
| 348 | +} |
0 commit comments