Skip to content

Commit fa3cfc5

Browse files
committed
New lint: HIGHER_DATA_SENSITIVITY_REQUIRED for when an event extra key could potentially contain sensitive data
1 parent 90a1ad4 commit fa3cfc5

File tree

4 files changed

+223
-0
lines changed

4 files changed

+223
-0
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
## Unreleased
44

5+
- New lint: `HIGHER_DATA_SENSITIVITY_REQUIRED` for when an event extra key could potentially contain sensitive data ([bug 1973017](https://bugzilla.mozilla.org/show_bug.cgi?id=1973017))
6+
57
## 17.2.0
68

79
- Forbid redefinition of metrics, categories, or pings within the same YAML document ([bug 1921089](https://bugzilla.mozilla.org/show_bug.cgi?id=1921089))

glean_parser/lint.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,36 @@ def check_empty_datareview(
320320
yield "List of data reviews should not contain empty strings or TODO markers."
321321

322322

323+
def check_event_extras_potential_data_sensitivity_required(
324+
metric: metrics.Metric, parser_config: Dict[str, Any]
325+
) -> LintGenerator:
326+
# Only looking at event metrics
327+
if not isinstance(metric, metrics.Event):
328+
return
329+
330+
# TODO(bug 1890648): Not all metrics have `data_sensitivity` defined.
331+
has_data_sensitivity = hasattr(metric, "data_sensitivity")
332+
# If already marked as "highly sensitive" no need for further checks
333+
if has_data_sensitivity and any(
334+
[
335+
sensitivity == metrics.DataSensitivity.highly_sensitive
336+
for sensitivity in metric.data_sensitivity
337+
]
338+
):
339+
return
340+
341+
# List of potentially sensitive extra key names we want to flag.
342+
potential_sensitive_names = ["url", "uri"]
343+
name_list = ", ".join(potential_sensitive_names)
344+
345+
for extra_key in metric.extra_keys.keys():
346+
if extra_key in potential_sensitive_names:
347+
yield (
348+
f"`{extra_key}` could potentially be used to collect sensitive data. Increase the metric's data sensitivity or disable the lint."
349+
+ f" (This lint applies for the following extra key names: {name_list})"
350+
)
351+
352+
323353
def check_redundant_ping(
324354
pings: pings.Ping, parser_config: Dict[str, Any]
325355
) -> LintGenerator:
@@ -432,6 +462,10 @@ def check_name_too_similar(
432462
"METRIC_ON_EVENTS_LIFETIME": (check_metric_on_events_lifetime, CheckType.error),
433463
"UNEXPECTED_UNIT": (check_unexpected_unit, CheckType.warning),
434464
"EMPTY_DATAREVIEW": (check_empty_datareview, CheckType.warning),
465+
"HIGHER_DATA_SENSITIVITY_REQUIRED": (
466+
check_event_extras_potential_data_sensitivity_required,
467+
CheckType.warning,
468+
),
435469
}
436470

437471

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Any copyright is dedicated to the Public Domain.
2+
# https://creativecommons.org/publicdomain/zero/1.0/
3+
4+
---
5+
$schema: moz://mozilla.org/schemas/glean/metrics/2-0-0
6+
7+
event:
8+
low_sensitivity:
9+
type: event
10+
description: |
11+
Just testing events
12+
bugs:
13+
- https://bugzilla.mozilla.org/show_bug.cgi?id=1973017
14+
data_reviews:
15+
- http://example.com/reviews
16+
notification_emails:
17+
18+
extra_keys:
19+
url:
20+
type: string
21+
description: "This is key one"
22+
expires: never
23+
data_sensitivity:
24+
- technical
25+
26+
no_data_sensitivity:
27+
type: event
28+
description: |
29+
Just testing events
30+
bugs:
31+
- https://bugzilla.mozilla.org/show_bug.cgi?id=1973017
32+
data_reviews:
33+
- http://example.com/reviews
34+
notification_emails:
35+
36+
extra_keys:
37+
url:
38+
type: string
39+
description: "This is key one"
40+
expires: never
41+
42+
correct_sensitivity:
43+
type: event
44+
description: |
45+
Just testing events
46+
bugs:
47+
- https://bugzilla.mozilla.org/show_bug.cgi?id=1973017
48+
data_reviews:
49+
- http://example.com/reviews
50+
notification_emails:
51+
52+
extra_keys:
53+
url:
54+
type: string
55+
description: "This is key one"
56+
expires: never
57+
data_sensitivity:
58+
- highly_sensitive
59+
60+
exempt:
61+
type: event
62+
description: |
63+
Just testing events
64+
bugs:
65+
- https://bugzilla.mozilla.org/show_bug.cgi?id=1973017
66+
data_reviews:
67+
- http://example.com/reviews
68+
notification_emails:
69+
70+
extra_keys:
71+
url:
72+
type: string
73+
description: "This is key one"
74+
expires: never
75+
data_sensitivity:
76+
- technical
77+
no_lint:
78+
- HIGHER_DATA_SENSITIVITY_REQUIRED

tests/test_lint.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,3 +644,112 @@ def test_unit_on_metrics(metric, num_nits):
644644
assert len(nits) == num_nits
645645
if num_nits > 0:
646646
assert set(["UNEXPECTED_UNIT"]) == set(v.check_name for v in nits)
647+
648+
649+
@pytest.mark.parametrize(
650+
"metric, num_nits",
651+
[
652+
(
653+
{
654+
"metric": {
655+
"type": "event",
656+
"extra_keys": {
657+
"id": {"type": "string", "description": "description"}
658+
},
659+
}
660+
},
661+
0,
662+
),
663+
(
664+
{
665+
"metric": {
666+
"type": "event",
667+
"extra_keys": {
668+
"url": {"type": "string", "description": "description"}
669+
},
670+
}
671+
},
672+
1,
673+
),
674+
(
675+
{
676+
"metric": {
677+
"type": "event",
678+
"extra_keys": {
679+
"url": {"type": "string", "description": "description"}
680+
},
681+
"no_lint": ["HIGHER_DATA_SENSITIVITY_REQUIRED"],
682+
}
683+
},
684+
0,
685+
),
686+
(
687+
{
688+
"metric": {
689+
"type": "event",
690+
"extra_keys": {
691+
"url": {"type": "string", "description": "description"}
692+
},
693+
"data_sensitivity": ["technical"],
694+
}
695+
},
696+
1,
697+
),
698+
(
699+
{
700+
"metric": {
701+
"type": "event",
702+
"extra_keys": {
703+
"url": {"type": "string", "description": "description"}
704+
},
705+
"data_sensitivity": ["highly_sensitive"],
706+
}
707+
},
708+
0,
709+
),
710+
(
711+
{
712+
"metric": {
713+
"type": "event",
714+
"extra_keys": {
715+
"url": {"type": "string", "description": "description"}
716+
},
717+
"data_sensitivity": ["technical", "highly_sensitive"],
718+
}
719+
},
720+
0,
721+
),
722+
],
723+
)
724+
def test_events_data_sensitivity(metric, num_nits):
725+
content = {"category": metric}
726+
content = util.add_required(content)
727+
all_metrics = parser.parse_objects(content)
728+
729+
errs = list(all_metrics)
730+
assert len(errs) == 0
731+
732+
nits = lint.lint_metrics(all_metrics.value)
733+
734+
assert len(nits) == num_nits
735+
if num_nits > 0:
736+
assert set(["HIGHER_DATA_SENSITIVITY_REQUIRED"]) == set(
737+
v.check_name for v in nits
738+
)
739+
740+
741+
def test_events_data_sensitivity_from_file():
742+
"""Test that the 'glinter' reports issues with the old event API."""
743+
all_metrics = parser.parse_objects([ROOT / "data" / "events_data_sensitivity.yaml"])
744+
errs = list(all_metrics)
745+
assert len(errs) == 0
746+
747+
nits = lint.lint_metrics(all_metrics.value, parser_config={})
748+
assert len(nits) == 2
749+
assert nits[0].check_name == "HIGHER_DATA_SENSITIVITY_REQUIRED"
750+
assert nits[0].name == "event.low_sensitivity"
751+
assert "data sensitivity" in nits[0].msg
752+
753+
assert nits[1].check_name == "HIGHER_DATA_SENSITIVITY_REQUIRED"
754+
assert nits[1].name == "event.no_data_sensitivity"
755+
assert "data sensitivity" in nits[1].msg

0 commit comments

Comments
 (0)