Skip to content

Commit cb66848

Browse files
committed
Adding missing XML response schema collection
Signed-off-by: sezen.leblay <[email protected]>
1 parent 03d997e commit cb66848

File tree

26 files changed

+2125
-64
lines changed

26 files changed

+2125
-64
lines changed
Lines changed: 348 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,348 @@
1+
package datadog.trace.bootstrap.instrumentation;
2+
3+
import java.io.StringReader;
4+
import java.util.ArrayList;
5+
import java.util.Collections;
6+
import java.util.HashMap;
7+
import java.util.List;
8+
import java.util.Map;
9+
import javax.xml.parsers.DocumentBuilder;
10+
import javax.xml.parsers.DocumentBuilderFactory;
11+
import org.w3c.dom.Attr;
12+
import org.w3c.dom.Document;
13+
import org.w3c.dom.Element;
14+
import org.w3c.dom.NamedNodeMap;
15+
import org.w3c.dom.Node;
16+
import org.w3c.dom.NodeList;
17+
import org.w3c.dom.Text;
18+
import org.xml.sax.InputSource;
19+
20+
/**
21+
* Utility class for converting W3C DOM XML structures to Map/List representations that are
22+
* compatible with WAF analysis and schema extraction.
23+
*
24+
* <p>This centralized utility eliminates code duplication across multiple instrumentation modules
25+
* that need to process XML content for AppSec analysis.
26+
*/
27+
public final class XmlDomUtils {
28+
29+
/** Default maximum recursion depth for XML DOM conversion to prevent stack overflow. */
30+
public static final int DEFAULT_MAX_CONVERSION_DEPTH = 15;
31+
32+
private XmlDomUtils() {
33+
// Utility class - prevent instantiation
34+
}
35+
36+
/**
37+
* Convert a W3C DOM Document to a WAF-compatible Map/List structure using the default recursion
38+
* depth.
39+
*
40+
* @param document the XML document to convert
41+
* @return converted structure wrapped in a list for consistency, or null if document is null
42+
*/
43+
public static Object convertDocument(Document document) {
44+
return convertDocument(document, DEFAULT_MAX_CONVERSION_DEPTH);
45+
}
46+
47+
/**
48+
* Convert a W3C DOM Document to a WAF-compatible Map/List structure.
49+
*
50+
* @param document the XML document to convert
51+
* @param maxRecursion maximum recursion depth to prevent stack overflow
52+
* @return converted structure wrapped in a list for consistency, or null if document is null
53+
*/
54+
public static Object convertDocument(Document document, int maxRecursion) {
55+
if (document == null) {
56+
return null;
57+
}
58+
59+
return convertW3cNode(document.getDocumentElement(), maxRecursion);
60+
}
61+
62+
/**
63+
* Convert a W3C DOM Element to a WAF-compatible Map/List structure using the default recursion
64+
* depth.
65+
*
66+
* @param element the XML element to convert
67+
* @return converted structure wrapped in a list for consistency, or null if element is null
68+
*/
69+
public static Object convertElement(Element element) {
70+
return convertElement(element, DEFAULT_MAX_CONVERSION_DEPTH);
71+
}
72+
73+
/**
74+
* Convert a W3C DOM Element to a WAF-compatible Map/List structure.
75+
*
76+
* @param element the XML element to convert
77+
* @param maxRecursion maximum recursion depth to prevent stack overflow
78+
* @return converted structure wrapped in a list for consistency, or null if element is null
79+
*/
80+
public static Object convertElement(Element element, int maxRecursion) {
81+
if (element == null) {
82+
return null;
83+
}
84+
85+
return convertW3cNode(element, maxRecursion);
86+
}
87+
88+
/**
89+
* Convert a W3C DOM Node to a WAF-compatible Map/List structure.
90+
*
91+
* <p>This method recursively processes XML nodes, converting: - Elements to Maps with
92+
* "attributes" and "children" keys - Text nodes to their trimmed string content - Other node
93+
* types are ignored (return null)
94+
*
95+
* @param node the XML node to convert
96+
* @param maxRecursion maximum recursion depth to prevent stack overflow
97+
* @return Map for elements, String for text nodes, null for other types or when maxRecursion <= 0
98+
*/
99+
public static Object convertW3cNode(Node node, int maxRecursion) {
100+
if (node == null || maxRecursion <= 0) {
101+
return null;
102+
}
103+
104+
if (node instanceof Element) {
105+
return convertElementNode((Element) node, maxRecursion);
106+
} else if (node instanceof Text) {
107+
return convertTextNode((Text) node);
108+
}
109+
110+
// Ignore other node types (comments, processing instructions, etc.)
111+
return null;
112+
}
113+
114+
/** Convert an Element node to a Map with attributes and children. */
115+
private static Map<String, Object> convertElementNode(Element element, int maxRecursion) {
116+
Map<String, String> attributes = Collections.emptyMap();
117+
if (element.hasAttributes()) {
118+
attributes = new HashMap<>();
119+
NamedNodeMap attrMap = element.getAttributes();
120+
for (int i = 0; i < attrMap.getLength(); i++) {
121+
Attr item = (Attr) attrMap.item(i);
122+
attributes.put(item.getName(), item.getValue());
123+
}
124+
}
125+
126+
List<Object> children = Collections.emptyList();
127+
if (element.hasChildNodes()) {
128+
NodeList childNodes = element.getChildNodes();
129+
children = new ArrayList<>(childNodes.getLength());
130+
for (int i = 0; i < childNodes.getLength(); i++) {
131+
Node item = childNodes.item(i);
132+
Object childResult = convertW3cNode(item, maxRecursion - 1);
133+
if (childResult != null) {
134+
children.add(childResult);
135+
}
136+
}
137+
}
138+
139+
Map<String, Object> repr = new HashMap<>();
140+
if (!attributes.isEmpty()) {
141+
repr.put("attributes", attributes);
142+
}
143+
if (!children.isEmpty()) {
144+
repr.put("children", children);
145+
}
146+
return repr;
147+
}
148+
149+
/** Convert a Text node to its trimmed string content. */
150+
private static String convertTextNode(Text textNode) {
151+
String textContent = textNode.getTextContent();
152+
if (textContent != null) {
153+
textContent = textContent.trim();
154+
if (!textContent.isEmpty()) {
155+
return textContent;
156+
}
157+
}
158+
return null;
159+
}
160+
161+
/**
162+
* Check if a string contains XML content by examining both strings and DOM objects.
163+
*
164+
* @param obj the object to check
165+
* @return true if the object contains XML content, false otherwise
166+
*/
167+
public static boolean isXmlContent(Object obj) {
168+
if (obj == null) {
169+
return false;
170+
}
171+
172+
// Check for W3C DOM XML objects
173+
if (obj instanceof Document || obj instanceof Element || obj instanceof Node) {
174+
return true;
175+
}
176+
177+
// Check for XML string content
178+
if (obj instanceof String) {
179+
String content = (String) obj;
180+
if (content.trim().isEmpty()) {
181+
return false;
182+
}
183+
String trimmed = content.trim();
184+
185+
// Explicitly exclude JSON content
186+
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
187+
return false;
188+
}
189+
190+
// Check for XML declaration
191+
if (trimmed.startsWith("<?xml")) {
192+
return true;
193+
}
194+
195+
// Check for XML element (must start with < and end with >, and contain at least one closing
196+
// tag or self-closing tag)
197+
if (trimmed.startsWith("<")
198+
&& trimmed.endsWith(">")
199+
&& (trimmed.contains("</") || trimmed.contains("/>"))) {
200+
return true;
201+
}
202+
}
203+
204+
return false;
205+
}
206+
207+
/**
208+
* Check if a string contains XML content by looking for XML declaration or root element.
209+
*
210+
* @param content the string content to check
211+
* @return true if the string contains XML content, false otherwise
212+
*/
213+
public static boolean isXmlContent(String content) {
214+
if (content == null || content.trim().isEmpty()) {
215+
return false;
216+
}
217+
String trimmed = content.trim();
218+
219+
// Explicitly exclude JSON content
220+
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
221+
return false;
222+
}
223+
224+
return trimmed.startsWith("<?xml")
225+
|| (trimmed.startsWith("<")
226+
&& trimmed.endsWith(">")
227+
&& (trimmed.contains("</") || trimmed.contains("/>")));
228+
}
229+
230+
/**
231+
* Process XML content (strings or DOM objects) for WAF compatibility using the default recursion
232+
* depth. This ensures XML attack payloads are properly detected by the WAF.
233+
*
234+
* @param xmlObj the XML object to process (can be Document, Element, Node, or String)
235+
* @return processed XML structure compatible with WAF analysis, or null if processing fails
236+
*/
237+
public static Object processXmlForWaf(Object xmlObj) {
238+
return processXmlForWaf(xmlObj, DEFAULT_MAX_CONVERSION_DEPTH);
239+
}
240+
241+
/**
242+
* Process XML content (strings or DOM objects) for WAF compatibility. This ensures XML attack
243+
* payloads are properly detected by the WAF.
244+
*
245+
* @param xmlObj the XML object to process (can be Document, Element, Node, or String)
246+
* @param maxRecursion maximum recursion depth to prevent stack overflow
247+
* @return processed XML structure compatible with WAF analysis, or null if processing fails
248+
*/
249+
public static Object processXmlForWaf(Object xmlObj, int maxRecursion) {
250+
if (xmlObj == null) {
251+
return null;
252+
}
253+
254+
// Handle W3C DOM objects directly
255+
if (xmlObj instanceof Document) {
256+
return convertDocument((Document) xmlObj, maxRecursion);
257+
}
258+
259+
if (xmlObj instanceof Element) {
260+
return convertElement((Element) xmlObj, maxRecursion);
261+
}
262+
263+
if (xmlObj instanceof Node) {
264+
// Return the converted node directly
265+
return convertW3cNode((Node) xmlObj, maxRecursion);
266+
}
267+
268+
// Handle XML strings by parsing them first
269+
if (xmlObj instanceof String) {
270+
try {
271+
return parseXmlStringToWafFormat((String) xmlObj, maxRecursion);
272+
} catch (Exception e) {
273+
// Return null if parsing fails - let caller handle logging
274+
return null;
275+
}
276+
}
277+
278+
return null;
279+
}
280+
281+
/**
282+
* Convert XML string to WAF-compatible format using the default recursion depth. This ensures XML
283+
* attack payloads are properly detected by the WAF.
284+
*
285+
* @param xmlContent the XML string content to parse
286+
* @return parsed XML structure compatible with WAF analysis
287+
* @throws Exception if XML parsing fails
288+
*/
289+
public static Object parseXmlStringToWafFormat(String xmlContent) throws Exception {
290+
return parseXmlStringToWafFormat(xmlContent, DEFAULT_MAX_CONVERSION_DEPTH);
291+
}
292+
293+
/**
294+
* Convert XML string to WAF-compatible format following Spring framework pattern. This ensures
295+
* XML attack payloads are properly detected by the WAF.
296+
*
297+
* @param xmlContent the XML string content to parse
298+
* @param maxRecursion maximum recursion depth to prevent stack overflow
299+
* @return parsed XML structure compatible with WAF analysis
300+
* @throws Exception if XML parsing fails
301+
*/
302+
public static Object parseXmlStringToWafFormat(String xmlContent, int maxRecursion)
303+
throws Exception {
304+
if (xmlContent == null || xmlContent.trim().isEmpty()) {
305+
return null;
306+
}
307+
308+
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
309+
// Security settings to prevent XXE attacks during parsing
310+
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
311+
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
312+
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
313+
factory.setExpandEntityReferences(false);
314+
315+
DocumentBuilder builder = factory.newDocumentBuilder();
316+
Document document = builder.parse(new InputSource(new StringReader(xmlContent)));
317+
318+
return convertDocument(document, maxRecursion);
319+
}
320+
321+
/**
322+
* Convert XML string to WAF-compatible format using the default recursion depth. This is a
323+
* convenience method that wraps parseXmlStringToWafFormat and handles exceptions internally.
324+
*
325+
* @param xmlContent the XML string content to handle
326+
* @return parsed XML structure compatible with WAF analysis, or null if parsing fails
327+
*/
328+
public static Object handleXmlString(String xmlContent) {
329+
return handleXmlString(xmlContent, DEFAULT_MAX_CONVERSION_DEPTH);
330+
}
331+
332+
/**
333+
* Convert XML string to WAF-compatible format. This is a convenience method that wraps
334+
* parseXmlStringToWafFormat and handles exceptions internally.
335+
*
336+
* @param xmlContent the XML string content to handle
337+
* @param maxRecursion maximum recursion depth to prevent stack overflow
338+
* @return parsed XML structure compatible with WAF analysis, or null if parsing fails
339+
*/
340+
public static Object handleXmlString(String xmlContent, int maxRecursion) {
341+
try {
342+
return parseXmlStringToWafFormat(xmlContent, maxRecursion);
343+
} catch (Exception e) {
344+
// Return null if parsing fails - let caller handle logging
345+
return null;
346+
}
347+
}
348+
}

0 commit comments

Comments
 (0)