Skip to content

Commit dbc7021

Browse files
fix(biome_js_analyze): fix useValidLang rejecting BCP 47 language tags with script subtags (#8118)
1 parent 01c2981 commit dbc7021

File tree

8 files changed

+207
-34
lines changed

8 files changed

+207
-34
lines changed

.changeset/angry-carpets-switch.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
"@biomejs/biome": patch
3+
---
4+
5+
Fixed [#8117](https://github.com/biomejs/biome/issues/8117): [`useValidLang`](https://biomejs.dev/linter/rules/use-valid-lang/) now accepts valid [BCP 47 language tags](https://developer.mozilla.org/en-US/docs/Glossary/BCP_47_language_tag) with script subtags.
6+
7+
**Valid:**
8+
9+
```html
10+
<html lang="zh-Hans-CN"></html>
11+
```

crates/biome_aria_metadata/build.rs

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,21 @@ const ISO_COUNTRIES: &[&str] = &[
3232

3333
const ISO_LANGUAGES: &[&str] = &[
3434
"ab", "aa", "af", "sq", "am", "ar", "an", "hy", "as", "ay", "az", "ba", "eu", "bn", "dz", "bh",
35-
"bi", "br", "bg", "my", "be", "km", "ca", "zh", "zh-Hans", "zh-Hant", "co", "hr", "cs", "da",
36-
"nl", "en", "eo", "et", "fo", "fa", "fj", "fi", "fr", "fy", "gl", "gd", "gv", "ka", "de", "el",
37-
"kl", "gn", "gu", "ht", "ha", "he", "iw", "hi", "hu", "is", "io", "id", "in", "ia", "ie", "iu",
38-
"ik", "ga", "it", "ja", "jv", "kn", "ks", "kk", "rw", "ky", "rn", "ko", "ku", "lo", "la", "lv",
39-
"li", "ln", "lt", "mk", "mg", "ms", "ml", "mt", "mi", "mr", "mo", "mn", "na", "ne", "no", "nb",
40-
"nn", "oc", "or", "om", "ps", "pl", "pt", "pa", "qu", "rm", "ro", "ru", "sm", "sg", "sa", "sr",
41-
"sh", "st", "tn", "sn", "ii", "sd", "si", "ss", "sk", "sl", "so", "es", "su", "sw", "sv", "tl",
42-
"tg", "ta", "tt", "te", "th", "bo", "ti", "to", "ts", "tr", "tk", "tw", "ug", "uk", "ur", "uz",
43-
"vi", "vo", "wa", "cy", "wo", "xh", "yi", "ji", "yo", "zu",
35+
"bi", "br", "bg", "my", "be", "km", "ca", "zh", "co", "hr", "cs", "da", "nl", "en", "eo", "et",
36+
"fo", "fa", "fj", "fi", "fr", "fy", "gl", "gd", "gv", "ka", "de", "el", "kl", "gn", "gu", "ht",
37+
"ha", "he", "iw", "hi", "hu", "is", "io", "id", "in", "ia", "ie", "iu", "ik", "ga", "it", "ja",
38+
"jv", "kn", "ks", "kk", "rw", "ky", "rn", "ko", "ku", "lo", "la", "lv", "li", "ln", "lt", "mk",
39+
"mg", "ms", "ml", "mt", "mi", "mr", "mo", "mn", "na", "ne", "no", "nb", "nn", "oc", "or", "om",
40+
"ps", "pl", "pt", "pa", "qu", "rm", "ro", "ru", "sm", "sg", "sa", "sr", "sh", "st", "tn", "sn",
41+
"ii", "sd", "si", "ss", "sk", "sl", "so", "es", "su", "sw", "sv", "tl", "tg", "ta", "tt", "te",
42+
"th", "bo", "ti", "to", "ts", "tr", "tk", "tw", "ug", "uk", "ur", "uz", "vi", "vo", "wa", "cy",
43+
"wo", "xh", "yi", "ji", "yo", "zu",
44+
];
45+
46+
const ISO_SCRIPTS: &[&str] = &[
47+
"Arab", "Armn", "Beng", "Cyrl", "Deva", "Ethi", "Grek", "Gujr", "Guru", "Hang", "Hani", "Hans",
48+
"Hant", "Hebr", "Hira", "Kana", "Khmr", "Laoo", "Latn", "Mlym", "Mymr", "Orya", "Sinh", "Taml",
49+
"Telu", "Thai", "Tibt", "Zyyy",
4450
];
4551

4652
#[derive(Debug, Default, biome_deserialize_macros::Merge, serde::Deserialize)]
@@ -266,12 +272,14 @@ fn main() -> io::Result<()> {
266272

267273
let iso_countries = generate_enums(ISO_COUNTRIES, "IsoCountries");
268274
let iso_languages = generate_enums(ISO_LANGUAGES, "IsoLanguages");
275+
let iso_scripts = generate_enums(ISO_SCRIPTS, "IsoScripts");
269276

270277
let tokens = quote! {
271278
#aria_attributes
272279
#aria_roles
273280
#iso_countries
274281
#iso_languages
282+
#iso_scripts
275283
};
276284
let ast = tokens.to_string();
277285

crates/biome_aria_metadata/src/lib.rs

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,23 @@ pub const ISO_COUNTRIES: [&str; 233] = [
2020
"UZ", "VU", "VE", "VN", "WF", "EH", "YE", "ZM", "ZW",
2121
];
2222

23-
pub const ISO_LANGUAGES: [&str; 152] = [
23+
pub const ISO_LANGUAGES: [&str; 150] = [
2424
"ab", "aa", "af", "sq", "am", "ar", "an", "hy", "as", "ay", "az", "ba", "eu", "bn", "dz", "bh",
25-
"bi", "br", "bg", "my", "be", "km", "ca", "zh", "zh-Hans", "zh-Hant", "co", "hr", "cs", "da",
26-
"nl", "en", "eo", "et", "fo", "fa", "fj", "fi", "fr", "fy", "gl", "gd", "gv", "ka", "de", "el",
27-
"kl", "gn", "gu", "ht", "ha", "he", "iw", "hi", "hu", "is", "io", "id", "in", "ia", "ie", "iu",
28-
"ik", "ga", "it", "ja", "jv", "kn", "ks", "kk", "rw", "ky", "rn", "ko", "ku", "lo", "la", "lv",
29-
"li", "ln", "lt", "mk", "mg", "ms", "ml", "mt", "mi", "mr", "mo", "mn", "na", "ne", "no", "nb",
30-
"nn", "oc", "or", "om", "ps", "pl", "pt", "pa", "qu", "rm", "ro", "ru", "sm", "sg", "sa", "sr",
31-
"sh", "st", "tn", "sn", "ii", "sd", "si", "ss", "sk", "sl", "so", "es", "su", "sw", "sv", "tl",
32-
"tg", "ta", "tt", "te", "th", "bo", "ti", "to", "ts", "tr", "tk", "tw", "ug", "uk", "ur", "uz",
33-
"vi", "vo", "wa", "cy", "wo", "xh", "yi", "ji", "yo", "zu",
25+
"bi", "br", "bg", "my", "be", "km", "ca", "zh", "co", "hr", "cs", "da", "nl", "en", "eo", "et",
26+
"fo", "fa", "fj", "fi", "fr", "fy", "gl", "gd", "gv", "ka", "de", "el", "kl", "gn", "gu", "ht",
27+
"ha", "he", "iw", "hi", "hu", "is", "io", "id", "in", "ia", "ie", "iu", "ik", "ga", "it", "ja",
28+
"jv", "kn", "ks", "kk", "rw", "ky", "rn", "ko", "ku", "lo", "la", "lv", "li", "ln", "lt", "mk",
29+
"mg", "ms", "ml", "mt", "mi", "mr", "mo", "mn", "na", "ne", "no", "nb", "nn", "oc", "or", "om",
30+
"ps", "pl", "pt", "pa", "qu", "rm", "ro", "ru", "sm", "sg", "sa", "sr", "sh", "st", "tn", "sn",
31+
"ii", "sd", "si", "ss", "sk", "sl", "so", "es", "su", "sw", "sv", "tl", "tg", "ta", "tt", "te",
32+
"th", "bo", "ti", "to", "ts", "tr", "tk", "tw", "ug", "uk", "ur", "uz", "vi", "vo", "wa", "cy",
33+
"wo", "xh", "yi", "ji", "yo", "zu",
34+
];
35+
36+
pub const ISO_SCRIPTS: [&str; 28] = [
37+
"Arab", "Armn", "Beng", "Cyrl", "Deva", "Ethi", "Grek", "Gujr", "Guru", "Hang", "Hani", "Hans",
38+
"Hant", "Hebr", "Hira", "Kana", "Khmr", "Laoo", "Latn", "Mlym", "Mymr", "Orya", "Sinh", "Taml",
39+
"Telu", "Thai", "Tibt", "Zyyy",
3440
];
3541

3642
/// Returns a list of valid ISO countries
@@ -43,6 +49,11 @@ pub fn is_valid_language(language: &str) -> bool {
4349
IsoLanguages::from_str(language).is_ok()
4450
}
4551

52+
/// Returns whether the given script code is a valid ISO script
53+
pub fn is_valid_script(script: &str) -> bool {
54+
IsoScripts::from_str(script).is_ok()
55+
}
56+
4657
/// An array of all available countries
4758
pub fn countries() -> &'static [&'static str] {
4859
&ISO_COUNTRIES
@@ -53,6 +64,11 @@ pub fn languages() -> &'static [&'static str] {
5364
&ISO_LANGUAGES
5465
}
5566

67+
/// An array of all available scripts
68+
pub fn scripts() -> &'static [&'static str] {
69+
&ISO_SCRIPTS
70+
}
71+
5672
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
5773
pub enum AriaAttributeKind {
5874
Property,

crates/biome_js_analyze/src/lint/a11y/use_valid_lang.rs

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use biome_analyze::context::RuleContext;
22
use biome_analyze::{Ast, Rule, RuleDiagnostic, RuleSource, declare_lint_rule};
3-
use biome_aria_metadata::{is_valid_country, is_valid_language};
3+
use biome_aria_metadata::{is_valid_country, is_valid_language, is_valid_script};
44
use biome_console::markup;
55
use biome_diagnostics::Severity;
66
use biome_js_syntax::jsx_ext::AnyJsxElement;
@@ -43,6 +43,7 @@ declare_lint_rule! {
4343
enum InvalidKind {
4444
Language,
4545
Country,
46+
Script,
4647
Value,
4748
}
4849

@@ -66,27 +67,64 @@ impl Rule for UseValidLang {
6667
let attribute_static_value = attribute_value.as_static_value()?;
6768
let attribute_text = attribute_static_value.text();
6869
let mut split_value = attribute_text.split('-');
69-
match (split_value.next(), split_value.next()) {
70-
(Some(language), Some(country)) => {
71-
if !is_valid_language(language) {
70+
match (split_value.next(), split_value.next(), split_value.next()) {
71+
(Some(language), Some(script), Some(country)) => {
72+
if split_value.next().is_some() {
73+
return Some(UseValidLangState {
74+
attribute_range: attribute_value.range(),
75+
invalid_kind: InvalidKind::Value,
76+
});
77+
} else if !is_valid_language(language) {
7278
return Some(UseValidLangState {
7379
attribute_range: attribute_value.range(),
7480
invalid_kind: InvalidKind::Language,
7581
});
82+
} else if !is_valid_script(script) {
83+
return Some(UseValidLangState {
84+
attribute_range: attribute_value.range(),
85+
invalid_kind: InvalidKind::Script,
86+
});
7687
} else if !is_valid_country(country) {
7788
return Some(UseValidLangState {
7889
attribute_range: attribute_value.range(),
7990
invalid_kind: InvalidKind::Country,
8091
});
81-
} else if split_value.next().is_some() {
92+
}
93+
}
94+
95+
(Some(language), Some(script_or_country), None) => {
96+
if !is_valid_language(language) {
8297
return Some(UseValidLangState {
8398
attribute_range: attribute_value.range(),
84-
invalid_kind: InvalidKind::Value,
99+
invalid_kind: InvalidKind::Language,
85100
});
101+
} else if !is_valid_script(script_or_country)
102+
&& !is_valid_country(script_or_country)
103+
{
104+
match script_or_country.len() {
105+
4 => {
106+
return Some(UseValidLangState {
107+
attribute_range: attribute_value.range(),
108+
invalid_kind: InvalidKind::Script,
109+
});
110+
}
111+
2 | 3 => {
112+
return Some(UseValidLangState {
113+
attribute_range: attribute_value.range(),
114+
invalid_kind: InvalidKind::Country,
115+
});
116+
}
117+
_ => {
118+
return Some(UseValidLangState {
119+
attribute_range: attribute_value.range(),
120+
invalid_kind: InvalidKind::Value,
121+
});
122+
}
123+
}
86124
}
87125
}
88126

89-
(Some(language), None) => {
127+
(Some(language), None, None) => {
90128
if !is_valid_language(language) {
91129
return Some(UseValidLangState {
92130
attribute_range: attribute_value.range(),
@@ -130,6 +168,16 @@ impl Rule for UseValidLang {
130168

131169
diagnostic.footer_list("Some of valid countries:", countries)
132170
}
171+
InvalidKind::Script => {
172+
let scripts = biome_aria_metadata::scripts();
173+
let scripts = if scripts.len() > 15 {
174+
&scripts[..15]
175+
} else {
176+
scripts
177+
};
178+
179+
diagnostic.footer_list("Some of valid scripts:", scripts)
180+
}
133181
InvalidKind::Value => diagnostic,
134182
};
135183
Some(diagnostic)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
let a = <html lang="lorem" />;
22
let a = <html lang="en-babab" />;
33
let a = <html lang="en-GB-something" />;
4+
let a = <html lang="zh-Xxxx" />;
5+
let a = <html lang="zh-Hans-ZZ" />;
6+
let a = <html lang="en-US-GB-Extra" />;

crates/biome_js_analyze/tests/specs/a11y/useValidLang/invalid.jsx.snap

Lines changed: 91 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
---
22
source: crates/biome_js_analyze/tests/spec_tests.rs
33
expression: invalid.jsx
4-
snapshot_kind: text
54
---
65
# Input
76
```jsx
87
let a = <html lang="lorem" />;
98
let a = <html lang="en-babab" />;
109
let a = <html lang="en-GB-something" />;
10+
let a = <html lang="zh-Xxxx" />;
11+
let a = <html lang="zh-Hans-ZZ" />;
12+
let a = <html lang="en-US-GB-Extra" />;
1113
1214
```
1315

@@ -52,7 +54,88 @@ invalid.jsx:2:20 lint/a11y/useValidLang ━━━━━━━━━━━━━
5254
> 2 │ let a = <html lang="en-babab" />;
5355
│ ^^^^^^^^^^
5456
3 │ let a = <html lang="en-GB-something" />;
55-
4 │
57+
4 │ let a = <html lang="zh-Xxxx" />;
58+
59+
60+
```
61+
62+
```
63+
invalid.jsx:3:20 lint/a11y/useValidLang ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
64+
65+
× Provide a valid value for the lang attribute.
66+
67+
1 │ let a = <html lang="lorem" />;
68+
2 │ let a = <html lang="en-babab" />;
69+
> 3 │ let a = <html lang="en-GB-something" />;
70+
│ ^^^^^^^^^^^^^^^^^
71+
4 │ let a = <html lang="zh-Xxxx" />;
72+
5 │ let a = <html lang="zh-Hans-ZZ" />;
73+
74+
i Some of valid scripts:
75+
76+
- Arab
77+
- Armn
78+
- Beng
79+
- Cyrl
80+
- Deva
81+
- Ethi
82+
- Grek
83+
- Gujr
84+
- Guru
85+
- Hang
86+
- Hani
87+
- Hans
88+
- Hant
89+
- Hebr
90+
- Hira
91+
92+
93+
```
94+
95+
```
96+
invalid.jsx:4:20 lint/a11y/useValidLang ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
97+
98+
× Provide a valid value for the lang attribute.
99+
100+
2 │ let a = <html lang="en-babab" />;
101+
3 │ let a = <html lang="en-GB-something" />;
102+
> 4 │ let a = <html lang="zh-Xxxx" />;
103+
│ ^^^^^^^^^
104+
5 │ let a = <html lang="zh-Hans-ZZ" />;
105+
6 │ let a = <html lang="en-US-GB-Extra" />;
106+
107+
i Some of valid scripts:
108+
109+
- Arab
110+
- Armn
111+
- Beng
112+
- Cyrl
113+
- Deva
114+
- Ethi
115+
- Grek
116+
- Gujr
117+
- Guru
118+
- Hang
119+
- Hani
120+
- Hans
121+
- Hant
122+
- Hebr
123+
- Hira
124+
125+
126+
```
127+
128+
```
129+
invalid.jsx:5:20 lint/a11y/useValidLang ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
130+
131+
× Provide a valid value for the lang attribute.
132+
133+
3 │ let a = <html lang="en-GB-something" />;
134+
4 │ let a = <html lang="zh-Xxxx" />;
135+
> 5 │ let a = <html lang="zh-Hans-ZZ" />;
136+
│ ^^^^^^^^^^^^
137+
6 │ let a = <html lang="en-US-GB-Extra" />;
138+
7 │
56139
57140
i Some of valid countries:
58141
@@ -76,15 +159,15 @@ invalid.jsx:2:20 lint/a11y/useValidLang ━━━━━━━━━━━━━
76159
```
77160

78161
```
79-
invalid.jsx:3:20 lint/a11y/useValidLang ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
162+
invalid.jsx:6:20 lint/a11y/useValidLang ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
80163
81164
× Provide a valid value for the lang attribute.
82165
83-
1 │ let a = <html lang="lorem" />;
84-
2 │ let a = <html lang="en-babab" />;
85-
> 3 │ let a = <html lang="en-GB-something" />;
86-
│ ^^^^^^^^^^^^^^^^^
87-
4
166+
4 │ let a = <html lang="zh-Xxxx" />;
167+
5 │ let a = <html lang="zh-Hans-ZZ" />;
168+
> 6 │ let a = <html lang="en-US-GB-Extra" />;
169+
│ ^^^^^^^^^^^^^^^^
170+
7
88171
89172
90173
```

crates/biome_js_analyze/tests/specs/a11y/useValidLang/valid.jsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ let a = <html lang="en"></html>;
55
let a = <html lang={lang}></html>;
66
let a = <html lang="nb"></html>;
77
let a = <html lang="nn"></html>;
8+
let a = <html lang="zh-Hant"></html>;
9+
let a = <html lang="zh-Hans-CN"></html>;

crates/biome_js_analyze/tests/specs/a11y/useValidLang/valid.jsx.snap

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,7 @@ let a = <html lang="en"></html>;
1111
let a = <html lang={lang}></html>;
1212
let a = <html lang="nb"></html>;
1313
let a = <html lang="nn"></html>;
14+
let a = <html lang="zh-Hant"></html>;
15+
let a = <html lang="zh-Hans-CN"></html>;
1416
1517
```

0 commit comments

Comments
 (0)