Skip to content

Commit 2680259

Browse files
authored
Detect invalid license expressions containing () (#879)
The following condensed list of expressions are all currently mislabelled as valid by `canonicalize_license_expression()`. ( ) or MIT MIT and ( ) ( ) and MIT or MIT ( ) with ( MIT ) ( ) with ( ) or MIT MIT with ( ) with ( ) or MIT The trick of converting license expressions to Python statements runs afoul of `()` being an empty tuple (i.e. valid syntax and falsy). Depending on how `()` is combined with real `False`s, it may or may not get caught by the subsequent `eval(python_expression) is False` check: MIT or () -> False or () -> () # Caught () or MIT -> () or False -> False # Not caught Consider a license expression invalid if a `)` comes immediately after a `(` token. This also removes the one case where the `eval()` would return something other than `False` so that `eval()` can now be downgraded to a `compile()` (which I find very anxiety relieving even though I can't think of any way the old `eval()` could ever have been exposed to more than `False`s or `()`s).
1 parent f89652b commit 2680259

File tree

2 files changed

+18
-8
lines changed

2 files changed

+18
-8
lines changed

src/packaging/licenses/__init__.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,10 @@ def canonicalize_license_expression(
8080

8181
tokens = license_expression.split()
8282

83-
# Rather than implementing boolean logic, we create an expression that Python can
84-
# parse. Everything that is not involved with the grammar itself is treated as
85-
# `False` and the expression should evaluate as such.
83+
# Rather than implementing a parenthesis/boolean logic parser, create an
84+
# expression that Python can parse. Everything that is not involved with the
85+
# grammar itself is replaced with the placeholder `False` and the resultant
86+
# expression should become a valid Python expression.
8687
python_tokens = []
8788
for token in tokens:
8889
if token not in {"or", "and", "with", "(", ")"}:
@@ -92,16 +93,16 @@ def canonicalize_license_expression(
9293
elif token == "(" and python_tokens and python_tokens[-1] not in {"or", "and"}:
9394
message = f"Invalid license expression: {raw_license_expression!r}"
9495
raise InvalidLicenseExpression(message)
96+
elif token == ")" and python_tokens and python_tokens[-1] == "(":
97+
message = f"Invalid license expression: {raw_license_expression!r}"
98+
raise InvalidLicenseExpression(message)
9599
else:
96100
python_tokens.append(token)
97101

98102
python_expression = " ".join(python_tokens)
99103
try:
100-
invalid = eval(python_expression, globals(), locals())
101-
except Exception:
102-
invalid = True
103-
104-
if invalid is not False:
104+
compile(python_expression, "", "eval")
105+
except SyntaxError:
105106
message = f"Invalid license expression: {raw_license_expression!r}"
106107
raise InvalidLicenseExpression(message) from None
107108

tests/test_metadata.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,11 +710,20 @@ def test_valid_license_expression(self, license_expression, expected):
710710
"with mit",
711711
"(mit",
712712
"mit)",
713+
") mit",
714+
"mit (",
713715
"mit or or apache-2.0",
714716
# Missing an operator before `(`.
715717
"mit or apache-2.0 (bsd-3-clause and MPL-2.0)",
716718
# "2-BSD-Clause is not a valid license.
717719
"Apache-2.0 OR 2-BSD-Clause",
720+
# Empty parenthesis.
721+
"()",
722+
"( ) or mit",
723+
"mit and ( )",
724+
"( ) or mit and ( )",
725+
"( ) with ( ) or mit",
726+
"mit with ( ) with ( ) or mit",
718727
],
719728
)
720729
def test_invalid_license_expression(self, license_expression):

0 commit comments

Comments
 (0)