Skip to content

Commit 6c61fc4

Browse files
Prevent CRLF injections described in CVE-2019-12387
Author: markrwilliams Reviewers: glyph Fixes: ticket:9647 Twisted's HTTP client APIs were vulnerable to maliciously constructed HTTP methods, hosts, and/or paths, URI components such as paths and query parameters. These vulnerabilities were beyond the header name and value injection vulnerabilities addressed in: https://twistedmatrix.com/trac/ticket/9420 #999 The following client APIs will raise a ValueError if given a method, host, or URI that includes newlines or other disallowed characters: - twisted.web.client.Agent.request - twisted.web.client.ProxyAgent.request - twisted.web.client.Request.__init__ - twisted.web.client.Request.writeTo ProxyAgent is patched separately from Agent because unlike other agents (e.g. CookieAgent) it is not implemented as an Agent wrapper. Request.__init__ checks its method and URI so that errors occur closer to their originating input. Request.method and Request.uri are both public APIs, however, so Request.writeTo (via Request._writeHeaders) also checks the validity of both before writing anything to the wire. Additionally, the following deprecated client APIs have also been patched: - twisted.web.client.HTTPPageGetter.__init__ - twisted.web.client.HTTPPageDownloader.__init__ - twisted.web.client.HTTPClientFactory.__init__ - twisted.web.client.HTTPClientFactory.setURL - twisted.web.client.HTTPDownloader.__init__ - twisted.web.client.HTTPDownloader.setURL - twisted.web.client.getPage - twisted.web.client.downloadPage These have been patched prior to their removal so that they won't be vulnerable in the last Twisted release that includes them. They represent a best effort, because testing every combination of these public APIs would require more code than deprecated APIs warrant. In all cases URI components, including hostnames, are restricted to the characters allowed in path components. This mirrors the CPython patch (for bpo-30458) that addresses equivalent vulnerabilities: python/cpython@bb8071a HTTP methods, however, are checked against the set of characters described in RFC-7230.
1 parent d0bcf0b commit 6c61fc4

File tree

6 files changed

+725
-11
lines changed

6 files changed

+725
-11
lines changed

src/twisted/web/_newclient.py

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
from __future__ import division, absolute_import
3030
__metaclass__ = type
3131

32+
import re
33+
3234
from zope.interface import implementer
3335

3436
from twisted.python.compat import networkString
@@ -579,6 +581,74 @@ def connectionLost(self, reason):
579581

580582

581583

584+
_VALID_METHOD = re.compile(
585+
br"\A[%s]+\Z" % (
586+
bytes().join(
587+
(
588+
b"!", b"#", b"$", b"%", b"&", b"'", b"*",
589+
b"+", b"-", b".", b"^", b"_", b"`", b"|", b"~",
590+
b"\x30-\x39",
591+
b"\x41-\x5a",
592+
b"\x61-\x7A",
593+
),
594+
),
595+
),
596+
)
597+
598+
599+
600+
def _ensureValidMethod(method):
601+
"""
602+
An HTTP method is an HTTP token, which consists of any visible
603+
ASCII character that is not a delimiter (i.e. one of
604+
C{"(),/:;<=>?@[\\]{}}.)
605+
606+
@param method: the method to check
607+
@type method: L{bytes}
608+
609+
@return: the method if it is valid
610+
@rtype: L{bytes}
611+
612+
@raise ValueError: if the method is not valid
613+
614+
@see: U{https://tools.ietf.org/html/rfc7230#section-3.1.1},
615+
U{https://tools.ietf.org/html/rfc7230#section-3.2.6},
616+
U{https://tools.ietf.org/html/rfc5234#appendix-B.1}
617+
"""
618+
if _VALID_METHOD.match(method):
619+
return method
620+
raise ValueError("Invalid method {!r}".format(method))
621+
622+
623+
624+
_VALID_URI = re.compile(br'\A[\x21-\x7e]+\Z')
625+
626+
627+
628+
def _ensureValidURI(uri):
629+
"""
630+
A valid URI cannot contain control characters (i.e., characters
631+
between 0-32, inclusive and 127) or non-ASCII characters (i.e.,
632+
characters with values between 128-255, inclusive).
633+
634+
@param uri: the URI to check
635+
@type uri: L{bytes}
636+
637+
@return: the URI if it is valid
638+
@rtype: L{bytes}
639+
640+
@raise ValueError: if the URI is not valid
641+
642+
@see: U{https://tools.ietf.org/html/rfc3986#section-3.3},
643+
U{https://tools.ietf.org/html/rfc3986#appendix-A},
644+
U{https://tools.ietf.org/html/rfc5234#appendix-B.1}
645+
"""
646+
if _VALID_URI.match(uri):
647+
return uri
648+
raise ValueError("Invalid URI {!r}".format(uri))
649+
650+
651+
582652
@implementer(IClientRequest)
583653
class Request:
584654
"""
@@ -618,8 +688,8 @@ def __init__(self, method, uri, headers, bodyProducer, persistent=False):
618688
connection, defaults to C{False}.
619689
@type persistent: L{bool}
620690
"""
621-
self.method = method
622-
self.uri = uri
691+
self.method = _ensureValidMethod(method)
692+
self.uri = _ensureValidURI(uri)
623693
self.headers = headers
624694
self.bodyProducer = bodyProducer
625695
self.persistent = persistent
@@ -664,8 +734,15 @@ def _writeHeaders(self, transport, TEorCL):
664734
# method would probably be good. It would be nice if this method
665735
# weren't limited to issuing HTTP/1.1 requests.
666736
requestLines = []
667-
requestLines.append(b' '.join([self.method, self.uri,
668-
b'HTTP/1.1\r\n']))
737+
requestLines.append(
738+
b' '.join(
739+
[
740+
_ensureValidMethod(self.method),
741+
_ensureValidURI(self.uri),
742+
b'HTTP/1.1\r\n',
743+
]
744+
),
745+
)
669746
if not self.persistent:
670747
requestLines.append(b'Connection: close\r\n')
671748
if TEorCL is not None:

src/twisted/web/client.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ def urlunparse(parts):
4747
from twisted.web.http_headers import Headers
4848
from twisted.logger import Logger
4949

50+
from twisted.web._newclient import _ensureValidURI, _ensureValidMethod
51+
52+
5053

5154
class PartialDownloadError(error.Error):
5255
"""
@@ -78,11 +81,13 @@ class HTTPPageGetter(http.HTTPClient):
7881

7982
_completelyDone = True
8083

81-
_specialHeaders = set((b'host', b'user-agent', b'cookie', b'content-length'))
84+
_specialHeaders = set(
85+
(b'host', b'user-agent', b'cookie', b'content-length'),
86+
)
8287

8388
def connectionMade(self):
84-
method = getattr(self.factory, 'method', b'GET')
85-
self.sendCommand(method, self.factory.path)
89+
method = _ensureValidMethod(getattr(self.factory, 'method', b'GET'))
90+
self.sendCommand(method, _ensureValidURI(self.factory.path))
8691
if self.factory.scheme == b'http' and self.factory.port != 80:
8792
host = self.factory.host + b':' + intToBytes(self.factory.port)
8893
elif self.factory.scheme == b'https' and self.factory.port != 443:
@@ -362,7 +367,7 @@ def __init__(self, url, method=b'GET', postdata=None, headers=None,
362367
# just in case a broken http/1.1 decides to keep connection alive
363368
self.headers.setdefault(b"connection", b"close")
364369
self.postdata = postdata
365-
self.method = method
370+
self.method = _ensureValidMethod(method)
366371

367372
self.setURL(url)
368373

@@ -389,6 +394,7 @@ def __repr__(self):
389394
return "<%s: %s>" % (self.__class__.__name__, self.url)
390395

391396
def setURL(self, url):
397+
_ensureValidURI(url.strip())
392398
self.url = url
393399
uri = URI.fromBytes(url)
394400
if uri.scheme and uri.host:
@@ -733,7 +739,7 @@ def _makeGetterFactory(url, factoryFactory, contextFactory=None,
733739
734740
@return: The factory created by C{factoryFactory}
735741
"""
736-
uri = URI.fromBytes(url)
742+
uri = URI.fromBytes(_ensureValidURI(url.strip()))
737743
factory = factoryFactory(url, *args, **kwargs)
738744
if uri.scheme == b'https':
739745
from twisted.internet import ssl
@@ -1493,6 +1499,9 @@ def _requestWithEndpoint(self, key, endpoint, method, parsedURI,
14931499
if not isinstance(method, bytes):
14941500
raise TypeError('method={!r} is {}, but must be bytes'.format(
14951501
method, type(method)))
1502+
1503+
method = _ensureValidMethod(method)
1504+
14961505
# Create minimal headers, if necessary:
14971506
if headers is None:
14981507
headers = Headers()
@@ -1717,6 +1726,7 @@ def request(self, method, uri, headers=None, bodyProducer=None):
17171726
17181727
@see: L{twisted.web.iweb.IAgent.request}
17191728
"""
1729+
uri = _ensureValidURI(uri.strip())
17201730
parsedURI = URI.fromBytes(uri)
17211731
try:
17221732
endpoint = self._getEndpoint(parsedURI)
@@ -1750,6 +1760,8 @@ def request(self, method, uri, headers=None, bodyProducer=None):
17501760
"""
17511761
Issue a new request via the configured proxy.
17521762
"""
1763+
uri = _ensureValidURI(uri.strip())
1764+
17531765
# Cache *all* connections under the same key, since we are only
17541766
# connecting to a single destination, the proxy:
17551767
key = ("http-proxy", self._proxyEndpoint)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
All HTTP clients in twisted.web.client now raise a ValueError when called with a method and/or URL that contain invalid characters. This mitigates CVE-2019-12387. Thanks to Alex Brasetvik for reporting this vulnerability.
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
"""
2+
Helpers for URI and method injection tests.
3+
4+
@see: U{CVE-2019-12387}
5+
"""
6+
7+
import string
8+
9+
10+
UNPRINTABLE_ASCII = (
11+
frozenset(range(0, 128)) -
12+
frozenset(bytearray(string.printable, 'ascii'))
13+
)
14+
15+
NONASCII = frozenset(range(128, 256))
16+
17+
18+
19+
class MethodInjectionTestsMixin(object):
20+
"""
21+
A mixin that runs HTTP method injection tests. Define
22+
L{MethodInjectionTestsMixin.attemptRequestWithMaliciousMethod} in
23+
a L{twisted.trial.unittest.SynchronousTestCase} subclass to test
24+
how HTTP client code behaves when presented with malicious HTTP
25+
methods.
26+
27+
@see: U{CVE-2019-12387}
28+
"""
29+
30+
def attemptRequestWithMaliciousMethod(self, method):
31+
"""
32+
Attempt to send a request with the given method. This should
33+
synchronously raise a L{ValueError} if either is invalid.
34+
35+
@param method: the method (e.g. C{GET\x00})
36+
37+
@param uri: the URI
38+
39+
@type method:
40+
"""
41+
raise NotImplementedError()
42+
43+
44+
def test_methodWithCLRFRejected(self):
45+
"""
46+
Issuing a request with a method that contains a carriage
47+
return and line feed fails with a L{ValueError}.
48+
"""
49+
with self.assertRaises(ValueError) as cm:
50+
method = b"GET\r\nX-Injected-Header: value"
51+
self.attemptRequestWithMaliciousMethod(method)
52+
self.assertRegex(str(cm.exception), "^Invalid method")
53+
54+
55+
def test_methodWithUnprintableASCIIRejected(self):
56+
"""
57+
Issuing a request with a method that contains unprintable
58+
ASCII characters fails with a L{ValueError}.
59+
"""
60+
for c in UNPRINTABLE_ASCII:
61+
method = b"GET%s" % (bytearray([c]),)
62+
with self.assertRaises(ValueError) as cm:
63+
self.attemptRequestWithMaliciousMethod(method)
64+
self.assertRegex(str(cm.exception), "^Invalid method")
65+
66+
67+
def test_methodWithNonASCIIRejected(self):
68+
"""
69+
Issuing a request with a method that contains non-ASCII
70+
characters fails with a L{ValueError}.
71+
"""
72+
for c in NONASCII:
73+
method = b"GET%s" % (bytearray([c]),)
74+
with self.assertRaises(ValueError) as cm:
75+
self.attemptRequestWithMaliciousMethod(method)
76+
self.assertRegex(str(cm.exception), "^Invalid method")
77+
78+
79+
80+
class URIInjectionTestsMixin(object):
81+
"""
82+
A mixin that runs HTTP URI injection tests. Define
83+
L{MethodInjectionTestsMixin.attemptRequestWithMaliciousURI} in a
84+
L{twisted.trial.unittest.SynchronousTestCase} subclass to test how
85+
HTTP client code behaves when presented with malicious HTTP
86+
URIs.
87+
"""
88+
89+
def attemptRequestWithMaliciousURI(self, method):
90+
"""
91+
Attempt to send a request with the given URI. This should
92+
synchronously raise a L{ValueError} if either is invalid.
93+
94+
@param uri: the URI.
95+
96+
@type method:
97+
"""
98+
raise NotImplementedError()
99+
100+
101+
def test_hostWithCRLFRejected(self):
102+
"""
103+
Issuing a request with a URI whose host contains a carriage
104+
return and line feed fails with a L{ValueError}.
105+
"""
106+
with self.assertRaises(ValueError) as cm:
107+
uri = b"http://twisted\r\n.invalid/path"
108+
self.attemptRequestWithMaliciousURI(uri)
109+
self.assertRegex(str(cm.exception), "^Invalid URI")
110+
111+
112+
def test_hostWithWithUnprintableASCIIRejected(self):
113+
"""
114+
Issuing a request with a URI whose host contains unprintable
115+
ASCII characters fails with a L{ValueError}.
116+
"""
117+
for c in UNPRINTABLE_ASCII:
118+
uri = b"http://twisted%s.invalid/OK" % (bytearray([c]),)
119+
with self.assertRaises(ValueError) as cm:
120+
self.attemptRequestWithMaliciousURI(uri)
121+
self.assertRegex(str(cm.exception), "^Invalid URI")
122+
123+
124+
def test_hostWithNonASCIIRejected(self):
125+
"""
126+
Issuing a request with a URI whose host contains non-ASCII
127+
characters fails with a L{ValueError}.
128+
"""
129+
for c in NONASCII:
130+
uri = b"http://twisted%s.invalid/OK" % (bytearray([c]),)
131+
with self.assertRaises(ValueError) as cm:
132+
self.attemptRequestWithMaliciousURI(uri)
133+
self.assertRegex(str(cm.exception), "^Invalid URI")
134+
135+
136+
def test_pathWithCRLFRejected(self):
137+
"""
138+
Issuing a request with a URI whose path contains a carriage
139+
return and line feed fails with a L{ValueError}.
140+
"""
141+
with self.assertRaises(ValueError) as cm:
142+
uri = b"http://twisted.invalid/\r\npath"
143+
self.attemptRequestWithMaliciousURI(uri)
144+
self.assertRegex(str(cm.exception), "^Invalid URI")
145+
146+
147+
def test_pathWithWithUnprintableASCIIRejected(self):
148+
"""
149+
Issuing a request with a URI whose path contains unprintable
150+
ASCII characters fails with a L{ValueError}.
151+
"""
152+
for c in UNPRINTABLE_ASCII:
153+
uri = b"http://twisted.invalid/OK%s" % (bytearray([c]),)
154+
with self.assertRaises(ValueError) as cm:
155+
self.attemptRequestWithMaliciousURI(uri)
156+
self.assertRegex(str(cm.exception), "^Invalid URI")
157+
158+
159+
def test_pathWithNonASCIIRejected(self):
160+
"""
161+
Issuing a request with a URI whose path contains non-ASCII
162+
characters fails with a L{ValueError}.
163+
"""
164+
for c in NONASCII:
165+
uri = b"http://twisted.invalid/OK%s" % (bytearray([c]),)
166+
with self.assertRaises(ValueError) as cm:
167+
self.attemptRequestWithMaliciousURI(uri)
168+
self.assertRegex(str(cm.exception), "^Invalid URI")

0 commit comments

Comments
 (0)