Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/11173.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed cookie unquoting to properly handle octal escape sequences in cookie values (e.g., ``\012`` for newline) by vendoring the correct ``_unquote`` implementation from Python's ``http.cookies`` module -- by :user:`bdraco`.
47 changes: 38 additions & 9 deletions aiohttp/_cookie_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,49 @@ def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]:
return mrsl_val


def _unquote(text: str) -> str:
_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub


def _unquote_replace(m: re.Match[str]) -> str:
"""
Replace function for _unquote_sub regex substitution.

Handles escaped characters in cookie values:
- Octal sequences are converted to their character representation
- Other escaped characters are unescaped by removing the backslash
"""
if m[1]:
return chr(int(m[1], 8))
return m[2]


def _unquote(value: str) -> str:
"""
Unquote a cookie value.

Vendored from http.cookies._unquote to ensure compatibility.

Note: The original implementation checked for None, but we've removed
that check since all callers already ensure the value is not None.
"""
# If there are no quotes, return as-is
if len(text) < 2 or text[0] != '"' or text[-1] != '"':
return text
# Remove quotes and handle escaped characters
text = text[1:-1]
# Replace escaped quotes and backslashes
text = text.replace('\\"', '"').replace("\\\\", "\\")
return text
# If there aren't any doublequotes,
# then there can't be any special characters. See RFC 2109.
if len(value) < 2:
return value
if value[0] != '"' or value[-1] != '"':
return value

# We have to assume that we must decode this string.
# Down to work.

# Remove the "s
value = value[1:-1]

# Check for special sequences. Examples:
# \012 --> \n
# \" --> "
#
return _unquote_sub(_unquote_replace, value)


def parse_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]:
Expand Down
1 change: 1 addition & 0 deletions docs/spelling_wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ uvloop
uWSGI
vcvarsall
vendored
vendoring
waituntil
wakeup
wakeups
Expand Down
240 changes: 239 additions & 1 deletion tests/test_cookie_helpers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
"""Tests for internal cookie helper functions."""

from http.cookies import CookieError, Morsel, SimpleCookie
from http.cookies import (
CookieError,
Morsel,
SimpleCookie,
_unquote as simplecookie_unquote,
)

import pytest

from aiohttp import _cookie_helpers as helpers
from aiohttp._cookie_helpers import (
_unquote,
parse_cookie_headers,
preserve_morsel_with_coded_value,
)
Expand Down Expand Up @@ -1029,3 +1035,235 @@ def test_parse_cookie_headers_date_formats_with_attributes() -> None:
assert result[1][1]["expires"] == "Wednesday, 09-Jun-30 10:18:14 GMT"
assert result[1][1]["domain"] == ".example.com"
assert result[1][1]["samesite"] == "Strict"


@pytest.mark.parametrize(
("input_str", "expected"),
[
# Unquoted strings should remain unchanged
("simple", "simple"),
("with spaces", "with spaces"),
("", ""),
('"', '"'), # String too short to be quoted
('some"text', 'some"text'), # Quotes not at beginning/end
('text"with"quotes', 'text"with"quotes'),
],
)
def test_unquote_basic(input_str: str, expected: str) -> None:
"""Test basic _unquote functionality."""
assert _unquote(input_str) == expected


@pytest.mark.parametrize(
("input_str", "expected"),
[
# Basic quoted strings
('"quoted"', "quoted"),
('"with spaces"', "with spaces"),
('""', ""), # Empty quoted string
# Quoted string with special characters
('"hello, world!"', "hello, world!"),
('"path=/test"', "path=/test"),
],
)
def test_unquote_quoted_strings(input_str: str, expected: str) -> None:
"""Test _unquote with quoted strings."""
assert _unquote(input_str) == expected


@pytest.mark.parametrize(
("input_str", "expected"),
[
# Escaped quotes should be unescaped
(r'"say \"hello\""', 'say "hello"'),
(r'"nested \"quotes\" here"', 'nested "quotes" here'),
# Multiple escaped quotes
(r'"\"start\" middle \"end\""', '"start" middle "end"'),
],
)
def test_unquote_escaped_quotes(input_str: str, expected: str) -> None:
"""Test _unquote with escaped quotes."""
assert _unquote(input_str) == expected


@pytest.mark.parametrize(
("input_str", "expected"),
[
# Single escaped backslash
(r'"path\\to\\file"', "path\\to\\file"),
# Backslash before quote
(r'"end with slash\\"', "end with slash\\"),
# Mixed escaped characters
(r'"path\\to\\\"file\""', 'path\\to\\"file"'),
],
)
def test_unquote_escaped_backslashes(input_str: str, expected: str) -> None:
"""Test _unquote with escaped backslashes."""
assert _unquote(input_str) == expected


@pytest.mark.parametrize(
("input_str", "expected"),
[
# Common octal sequences
(r'"\012"', "\n"), # newline
(r'"\011"', "\t"), # tab
(r'"\015"', "\r"), # carriage return
(r'"\040"', " "), # space
# Octal sequences in context
(r'"line1\012line2"', "line1\nline2"),
(r'"tab\011separated"', "tab\tseparated"),
# Multiple octal sequences
(r'"\012\011\015"', "\n\t\r"),
# Mixed octal and regular text
(r'"hello\040world\041"', "hello world!"),
],
)
def test_unquote_octal_sequences(input_str: str, expected: str) -> None:
"""Test _unquote with octal escape sequences."""
assert _unquote(input_str) == expected


@pytest.mark.parametrize(
("input_str", "expected"),
[
# Test boundary values
(r'"\000"', "\x00"), # null character
(r'"\001"', "\x01"),
(r'"\177"', "\x7f"), # DEL character
(r'"\200"', "\x80"), # Extended ASCII
(r'"\377"', "\xff"), # Max octal value
# Invalid octal sequences (not 3 digits or > 377) are treated as regular escapes
(r'"\400"', "400"), # 400 octal = 256 decimal, too large
(r'"\777"', "777"), # 777 octal = 511 decimal, too large
],
)
def test_unquote_octal_full_range(input_str: str, expected: str) -> None:
"""Test _unquote with full range of valid octal sequences."""
assert _unquote(input_str) == expected


@pytest.mark.parametrize(
("input_str", "expected"),
[
# Mix of quotes, backslashes, and octal
(r'"say \"hello\"\012new line"', 'say "hello"\nnew line'),
(r'"path\\to\\file\011\011data"', "path\\to\\file\t\tdata"),
# Complex mixed example
(r'"\042quoted\042 and \134backslash\134"', '"quoted" and \\backslash\\'),
# Escaped characters that aren't special
(r'"\a\b\c"', "abc"), # \a, \b, \c -> a, b, c
],
)
def test_unquote_mixed_escapes(input_str: str, expected: str) -> None:
"""Test _unquote with mixed escape sequences."""
assert _unquote(input_str) == expected


@pytest.mark.parametrize(
("input_str", "expected"),
[
# String that starts with quote but doesn't end with one
('"not closed', '"not closed'),
# String that ends with quote but doesn't start with one
('not opened"', 'not opened"'),
# Multiple quotes
('"""', '"'),
('""""', '""'),
# Backslash at the end without anything to escape
(r'"ends with\"', "ends with\\"),
# Empty escape
(r'"test\"', "test\\"),
# Just escaped characters
(r'"\"\"\""', '"""'),
],
)
def test_unquote_edge_cases(input_str: str, expected: str) -> None:
"""Test _unquote edge cases."""
assert _unquote(input_str) == expected


@pytest.mark.parametrize(
("input_str", "expected"),
[
# JSON-like data
(r'"{\"user\":\"john\",\"id\":123}"', '{"user":"john","id":123}'),
# URL-encoded then quoted
('"hello%20world"', "hello%20world"),
# Path with backslashes (Windows-style)
(r'"C:\\Users\\John\\Documents"', "C:\\Users\\John\\Documents"),
# Complex session data
(
r'"session_data=\"user123\";expires=2024"',
'session_data="user123";expires=2024',
),
],
)
def test_unquote_real_world_examples(input_str: str, expected: str) -> None:
"""Test _unquote with real-world cookie value examples."""
assert _unquote(input_str) == expected


@pytest.mark.parametrize(
"test_value",
[
'""',
'"simple"',
r'"with \"quotes\""',
r'"with \\backslash\\"',
r'"\012newline"',
r'"complex\042quote\134slash\012"',
'"not-quoted',
'also-not-quoted"',
r'"mixed\011\042\134test"',
],
)
def test_unquote_compatibility_with_simplecookie(test_value: str) -> None:
"""Test that _unquote behaves like SimpleCookie's unquoting."""
assert _unquote(test_value) == simplecookie_unquote(test_value), (
f"Mismatch for {test_value!r}: "
f"our={_unquote(test_value)!r}, "
f"SimpleCookie={simplecookie_unquote(test_value)!r}"
)


@pytest.mark.parametrize(
("header", "expected_name", "expected_value", "expected_coded"),
[
# Test cookie values with octal escape sequences
(r'name="\012newline\012"', "name", "\nnewline\n", r'"\012newline\012"'),
(
r'tab="\011separated\011values"',
"tab",
"\tseparated\tvalues",
r'"\011separated\011values"',
),
(
r'mixed="hello\040world\041"',
"mixed",
"hello world!",
r'"hello\040world\041"',
),
(
r'complex="\042quoted\042 text with \012 newline"',
"complex",
'"quoted" text with \n newline',
r'"\042quoted\042 text with \012 newline"',
),
],
)
def test_parse_cookie_headers_uses_unquote_with_octal(
header: str, expected_name: str, expected_value: str, expected_coded: str
) -> None:
"""Test that parse_cookie_headers correctly unquotes values with octal sequences and preserves coded_value."""
result = parse_cookie_headers([header])

assert len(result) == 1
name, morsel = result[0]

# Check that octal sequences were properly decoded in the value
assert name == expected_name
assert morsel.value == expected_value

# Check that coded_value preserves the original quoted string
assert morsel.coded_value == expected_coded
Loading