Skip to content

Commit 148c3e7

Browse files
[Refac] Refactor the bahavior of jsonschema converter to speed up. (#417)
Notice that there will be some identical string structures in the same schema. For example, `{"type":"string", "maxLength": 1024}` can be contained in the schema a lot of times. This PR build a single rule for each identical string specs, and this can improve the efficiency in some cases. --------- Signed-off-by: Yuchuan <[email protected]>
1 parent bf2d7d2 commit 148c3e7

File tree

3 files changed

+89
-50
lines changed

3 files changed

+89
-50
lines changed

cpp/json_schema_converter.cc

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88

99
#include <climits>
1010
#include <cstdint>
11+
#include <functional>
1112
#include <iostream>
1213
#include <optional>
1314
#include <sstream>
1415
#include <string>
16+
#include <unordered_map>
1517
#include <unordered_set>
1618
#include <utility>
1719
#include <vector>
@@ -400,6 +402,22 @@ class JSONSchemaConverter {
400402
int min_length = 0;
401403
int max_length = -1;
402404
std::pair<std::string, std::string> wrapper;
405+
bool operator==(const StringSpec& other) const {
406+
return pattern == other.pattern && min_length == other.min_length &&
407+
max_length == other.max_length && wrapper == other.wrapper;
408+
}
409+
};
410+
411+
struct StringSpecHash {
412+
size_t operator()(const StringSpec& spec) const {
413+
return HashCombine(
414+
std::hash<std::string>()(spec.pattern),
415+
spec.min_length,
416+
spec.max_length,
417+
std::hash<std::string>()(spec.wrapper.first),
418+
std::hash<std::string>()(spec.wrapper.second)
419+
);
420+
}
403421
};
404422

405423
Result<StringSpec, SchemaError> ParseStringSchema(
@@ -558,6 +576,8 @@ class JSONSchemaConverter {
558576
std::unordered_map<std::string, std::string> uri_to_rule_cache_;
559577
// The maximum number of whitespaces allowed when any_whitespace_ is true.
560578
std::optional<int> max_whitespace_cnt_;
579+
// The map from string spec to the rule name.
580+
std::unordered_map<StringSpec, std::string, StringSpecHash> string_spec_to_rule_name_and_context_;
561581

562582
const std::string kWhiteSpace =
563583
max_whitespace_cnt_.has_value()
@@ -1956,22 +1976,48 @@ std::string JSONSchemaConverter::VisitString(
19561976
XGRAMMAR_LOG(FATAL) << std::move(string_spec_result).UnwrapErr().what();
19571977
}
19581978
auto string_spec = std::move(string_spec_result).Unwrap();
1959-
std::string result;
1979+
1980+
// Check if we have already generated a rule for this string spec.
1981+
if (string_spec_to_rule_name_and_context_.find(string_spec) !=
1982+
string_spec_to_rule_name_and_context_.end()) {
1983+
const auto& existing_rule_name = string_spec_to_rule_name_and_context_.at(string_spec);
1984+
return existing_rule_name;
1985+
}
1986+
1987+
if (string_spec.pattern == "[\"] " + kBasicStringSub && string_spec.min_length == 0 &&
1988+
string_spec.max_length == -1 && string_spec.wrapper.first.empty() &&
1989+
string_spec.wrapper.second.empty()) {
1990+
// It's the creation of the basic string rule.
1991+
string_spec_to_rule_name_and_context_[string_spec] = kBasicString;
1992+
return string_spec.pattern;
1993+
}
1994+
1995+
if (string_spec.pattern == kXMLString && string_spec.min_length == 0 &&
1996+
string_spec.max_length == -1 && string_spec.wrapper.first.empty() &&
1997+
string_spec.wrapper.second.empty()) {
1998+
string_spec_to_rule_name_and_context_[string_spec] = kXMLString;
1999+
return kXMLString;
2000+
}
2001+
2002+
// Generate a new rule name for this string spec.
2003+
std::string spec_context;
19602004
if (!string_spec.wrapper.first.empty()) {
1961-
result += "\"" + string_spec.wrapper.first + "\" ";
2005+
spec_context += "\"" + string_spec.wrapper.first + "\" ";
19622006
}
1963-
result += string_spec.pattern;
2007+
spec_context += string_spec.pattern;
19642008
if (string_spec.min_length != 0 || string_spec.max_length != -1) {
19652009
std::string repetition_range;
19662010
repetition_range +=
19672011
"{" + std::to_string(string_spec.min_length) + "," +
19682012
(string_spec.max_length == -1 ? "" : std::to_string(string_spec.max_length)) + "}";
1969-
result += repetition_range;
2013+
spec_context += repetition_range;
19702014
}
19712015
if (!string_spec.wrapper.second.empty()) {
1972-
result += " \"" + string_spec.wrapper.second + "\"";
2016+
spec_context += " \"" + string_spec.wrapper.second + "\"";
19732017
}
1974-
return result;
2018+
std::string spec_rule_name = ebnf_script_creator_.AddRule("string", spec_context);
2019+
string_spec_to_rule_name_and_context_[string_spec] = spec_rule_name;
2020+
return spec_rule_name;
19752021
}
19762022

19772023
std::string JSONSchemaConverter::VisitBoolean(
@@ -3173,6 +3219,7 @@ Result<JSONSchemaConverter::StringSpec, SchemaError> JSONSchemaConverter::ParseS
31733219
}
31743220
default: {
31753221
XGRAMMAR_LOG(FATAL) << "Unsupported JSON Format type: " << static_cast<int>(json_format);
3222+
XGRAMMAR_UNREACHABLE();
31763223
}
31773224
}
31783225
}

tests/python/test_function_calling_converter.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -338,9 +338,12 @@ def test_string_format_length_schema(input_str: str, accepted: bool):
338338
basic_null ::= "null"
339339
basic_array ::= (("[" [ \n\t]* basic_any ([ \n\t]* "," [ \n\t]* basic_any)* [ \n\t]* "]") | ("[" [ \n\t]* "]"))
340340
basic_object ::= ("{" [ \n\t]* basic_string [ \n\t]* ":" [ \n\t]* basic_any ([ \n\t]* "," [ \n\t]* basic_string [ \n\t]* ":" [ \n\t]* basic_any)* [ \n\t]* "}") | "{" [ \n\t]* "}"
341-
root_prop_0 ::= [^<>&\r\n]{1,}
342-
root_prop_1_prop_0 ::= "\"" [0-9]{5} "\""
343-
root_prop_1_prop_1 ::= "\"" ( ( [a-zA-Z0-9_!#$%&'*+/=?^`{|}~-]+ ( "." [a-zA-Z0-9_!#$%&'*+/=?^`{|}~-]+ )* ) | "\\" "\"" ( "\\" [ -~] | [ !#-[\]-~] )* "\\" "\"" ) "@" ( [A-Za-z0-9] ( [\-A-Za-z0-9]* [A-Za-z0-9] )? ) ( ( "." [A-Za-z0-9] [\-A-Za-z0-9]* [A-Za-z0-9] )* ) "\""
341+
string ::= [^<>&\r\n]{1,}
342+
root_prop_0 ::= string
343+
string_0 ::= "\"" [0-9]{5} "\""
344+
root_prop_1_prop_0 ::= string_0
345+
string_1 ::= "\"" ( ( [a-zA-Z0-9_!#$%&'*+/=?^`{|}~-]+ ( "." [a-zA-Z0-9_!#$%&'*+/=?^`{|}~-]+ )* ) | "\\" "\"" ( "\\" [ -~] | [ !#-[\]-~] )* "\\" "\"" ) "@" ( [A-Za-z0-9] ( [\-A-Za-z0-9]* [A-Za-z0-9] )? ) ( ( "." [A-Za-z0-9] [\-A-Za-z0-9]* [A-Za-z0-9] )* ) "\""
346+
root_prop_1_prop_1 ::= string_1
344347
root_prop_1_part_0 ::= [ \n\t]* "," [ \n\t]* "\"email\"" [ \n\t]* ":" [ \n\t]* root_prop_1_prop_1 ""
345348
root_prop_1 ::= "{" [ \n\t]* (("\"phone\"" [ \n\t]* ":" [ \n\t]* root_prop_1_prop_0 root_prop_1_part_0)) [ \n\t]* "}"
346349
root_part_0 ::= [ \n\t]* "<parameter=contact_info>" [ \n\t]* root_prop_1 [ \n\t]* "</parameter>" ""

tests/python/test_json_schema_converter.py

Lines changed: 30 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,9 +1565,8 @@ def test_email_format(instance: str, accepted: bool):
15651565
schema = {"type": "string", "format": "email"}
15661566

15671567
expected_grammar = basic_json_rules_ebnf + (
1568-
r"""root ::= "\"" ( ( [a-zA-Z0-9_!#$%&'*+/=?^`{|}~-]+ ( "." [a-zA-Z0-9_!#$%&'*+/=?^`{|}~-]+ )* ) """
1569-
r"""| "\\" "\"" ( "\\" [ -~] | [ !#-[\]-~] )* "\\" "\"" ) "@" ( [A-Za-z0-9] ( [\-A-Za-z0-9]* [A-Za-z0-9] )? ) """
1570-
r"""( ( "." [A-Za-z0-9] [\-A-Za-z0-9]* [A-Za-z0-9] )* ) "\""
1568+
r"""string ::= "\"" ( ( [a-zA-Z0-9_!#$%&'*+/=?^`{|}~-]+ ( "." [a-zA-Z0-9_!#$%&'*+/=?^`{|}~-]+ )* ) | "\\" "\"" ( "\\" [ -~] | [ !#-[\]-~] )* "\\" "\"" ) "@" ( [A-Za-z0-9] ( [\-A-Za-z0-9]* [A-Za-z0-9] )? ) ( ( "." [A-Za-z0-9] [\-A-Za-z0-9]* [A-Za-z0-9] )* ) "\""
1569+
root ::= string
15711570
"""
15721571
)
15731572
check_schema_with_grammar(schema, expected_grammar)
@@ -1591,7 +1590,8 @@ def test_date_format(instance: str, accepted: bool):
15911590
schema = {"type": "string", "format": "date"}
15921591

15931592
expected_grammar = basic_json_rules_ebnf + (
1594-
r"""root ::= "\"" ( [0-9]{4} "-" ( "0" [1-9] | "1" [0-2] ) "-" ( "0" [1-9] | [1-2] [0-9] | "3" [01] ) ) "\""
1593+
r"""string ::= "\"" ( [0-9]{4} "-" ( "0" [1-9] | "1" [0-2] ) "-" ( "0" [1-9] | [1-2] [0-9] | "3" [01] ) ) "\""
1594+
root ::= string
15951595
"""
15961596
)
15971597
check_schema_with_grammar(schema, expected_grammar)
@@ -1627,7 +1627,8 @@ def test_time_format(instance: str, accepted: bool):
16271627
schema = {"type": "string", "format": "time"}
16281628

16291629
expected_grammar = basic_json_rules_ebnf + (
1630-
r"""root ::= "\"" ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] ":" ( [0-5] [0-9] | "6" "0" ) ( "." [0-9]+ )? ( "Z" | [+-] ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] ) "\""
1630+
r"""string ::= "\"" ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] ":" ( [0-5] [0-9] | "6" "0" ) ( "." [0-9]+ )? ( "Z" | [+-] ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] ) "\""
1631+
root ::= string
16311632
"""
16321633
)
16331634
check_schema_with_grammar(schema, expected_grammar)
@@ -1667,7 +1668,8 @@ def test_duration_format(instance: str, accepted: bool):
16671668
schema = {"type": "string", "format": "duration"}
16681669

16691670
expected_grammar = basic_json_rules_ebnf + (
1670-
r"""root ::= "\"" "P" ( ( [0-9]+ "D" | [0-9]+ "M" ( [0-9]+ "D" )? | [0-9]+ "Y" ( [0-9]+ "M" ( [0-9]+ "D" )? )? ) ( "T" ( [0-9]+ "S" | [0-9]+ "M" ( [0-9]+ "S" )? | [0-9]+ "H" ( [0-9]+ "M" ( [0-9]+ "S" )? )? ) )? | "T" ( [0-9]+ "S" | [0-9]+ "M" ( [0-9]+ "S" )? | [0-9]+ "H" ( [0-9]+ "M" ( [0-9]+ "S" )? )? ) | [0-9]+ "W" ) "\""
1671+
r"""string ::= "\"" "P" ( ( [0-9]+ "D" | [0-9]+ "M" ( [0-9]+ "D" )? | [0-9]+ "Y" ( [0-9]+ "M" ( [0-9]+ "D" )? )? ) ( "T" ( [0-9]+ "S" | [0-9]+ "M" ( [0-9]+ "S" )? | [0-9]+ "H" ( [0-9]+ "M" ( [0-9]+ "S" )? )? ) )? | "T" ( [0-9]+ "S" | [0-9]+ "M" ( [0-9]+ "S" )? | [0-9]+ "H" ( [0-9]+ "M" ( [0-9]+ "S" )? )? ) | [0-9]+ "W" ) "\""
1672+
root ::= string
16711673
"""
16721674
)
16731675
check_schema_with_grammar(schema, expected_grammar)
@@ -1734,15 +1736,8 @@ def test_ipv6_format(instance: str, accepted: bool):
17341736
schema = {"type": "string", "format": "ipv6"}
17351737

17361738
expected_grammar = basic_json_rules_ebnf + (
1737-
r"""root ::= "\"" ( ( [0-9a-fA-F]{1,4} ":" ){7,7} [0-9a-fA-F]{1,4} | ( [0-9a-fA-F]{1,4} ":" ){1,7} ":" """
1738-
r"""| ( [0-9a-fA-F]{1,4} ":" ){1,6} ":" [0-9a-fA-F]{1,4} | ( [0-9a-fA-F]{1,4} ":" ){1,5} ( ":" [0-9a-fA-F]{1,4} ){1,2} """
1739-
r"""| ( [0-9a-fA-F]{1,4} ":" ){1,4} ( ":" [0-9a-fA-F]{1,4} ){1,3} | ( [0-9a-fA-F]{1,4} ":" ){1,3} """
1740-
r"""( ":" [0-9a-fA-F]{1,4} ){1,4} | ( [0-9a-fA-F]{1,4} ":" ){1,2} ( ":" [0-9a-fA-F]{1,4} ){1,5} | """
1741-
r"""[0-9a-fA-F]{1,4} ":" ( ( ":" [0-9a-fA-F]{1,4} ){1,6} ) | ":" ( ( ":" [0-9a-fA-F]{1,4} ){1,7} | ":" ) """
1742-
r"""| ":" ":" ( "f" "f" "f" "f" ( ":" "0"{1,4} ){0,1} ":" ){0,1} ( ( "2" "5" [0-5] | ( "2" [0-4] """
1743-
r"""| "1"{0,1} [0-9] ){0,1} [0-9] ) "." ){3,3} ( "2" "5" [0-5] | ( "2" [0-4] | "1"{0,1} [0-9] ){0,1} [0-9] ) """
1744-
r"""| ( [0-9a-fA-F]{1,4} ":" ){1,4} ":" ( ( "2" "5" [0-5] | ( "2" [0-4] | "1"{0,1} [0-9] ){0,1} [0-9] ) "." ){3,3} """
1745-
r"""( "2" "5" [0-5] | ( "2" [0-4] | "1"{0,1} [0-9] ){0,1} [0-9] ) ) "\""
1739+
r"""string ::= "\"" ( ( [0-9a-fA-F]{1,4} ":" ){7,7} [0-9a-fA-F]{1,4} | ( [0-9a-fA-F]{1,4} ":" ){1,7} ":" | ( [0-9a-fA-F]{1,4} ":" ){1,6} ":" [0-9a-fA-F]{1,4} | ( [0-9a-fA-F]{1,4} ":" ){1,5} ( ":" [0-9a-fA-F]{1,4} ){1,2} | ( [0-9a-fA-F]{1,4} ":" ){1,4} ( ":" [0-9a-fA-F]{1,4} ){1,3} | ( [0-9a-fA-F]{1,4} ":" ){1,3} ( ":" [0-9a-fA-F]{1,4} ){1,4} | ( [0-9a-fA-F]{1,4} ":" ){1,2} ( ":" [0-9a-fA-F]{1,4} ){1,5} | [0-9a-fA-F]{1,4} ":" ( ( ":" [0-9a-fA-F]{1,4} ){1,6} ) | ":" ( ( ":" [0-9a-fA-F]{1,4} ){1,7} | ":" ) | ":" ":" ( "f" "f" "f" "f" ( ":" "0"{1,4} ){0,1} ":" ){0,1} ( ( "2" "5" [0-5] | ( "2" [0-4] | "1"{0,1} [0-9] ){0,1} [0-9] ) "." ){3,3} ( "2" "5" [0-5] | ( "2" [0-4] | "1"{0,1} [0-9] ){0,1} [0-9] ) | ( [0-9a-fA-F]{1,4} ":" ){1,4} ":" ( ( "2" "5" [0-5] | ( "2" [0-4] | "1"{0,1} [0-9] ){0,1} [0-9] ) "." ){3,3} ( "2" "5" [0-5] | ( "2" [0-4] | "1"{0,1} [0-9] ){0,1} [0-9] ) ) "\""
1740+
root ::= string
17461741
"""
17471742
)
17481743
check_schema_with_grammar(schema, expected_grammar)
@@ -1771,7 +1766,8 @@ def test_ipv4_format(instance: str, accepted: bool):
17711766
schema = {"type": "string", "format": "ipv4"}
17721767

17731768
expected_grammar = basic_json_rules_ebnf + (
1774-
r"""root ::= "\"" ( ( "2" "5" [0-5] | "2" [0-4] [0-9] | [0-1]? [0-9]? [0-9] ) "." ){3} ( "2" "5" [0-5] | "2" [0-4] [0-9] | [0-1]? [0-9]? [0-9] ) "\""
1769+
r"""string ::= "\"" ( ( "2" "5" [0-5] | "2" [0-4] [0-9] | [0-1]? [0-9]? [0-9] ) "." ){3} ( "2" "5" [0-5] | "2" [0-4] [0-9] | [0-1]? [0-9]? [0-9] ) "\""
1770+
root ::= string
17751771
"""
17761772
)
17771773
check_schema_with_grammar(schema, expected_grammar)
@@ -1809,7 +1805,8 @@ def test_hostname_format(instance: str, accepted: bool):
18091805
schema = {"type": "string", "format": "hostname"}
18101806

18111807
expected_grammar = basic_json_rules_ebnf + (
1812-
r"""root ::= "\"" ( [a-z0-9] ( [a-z0-9-]* [a-z0-9] )? ) ( "." [a-z0-9] ( [a-z0-9-]* [a-z0-9] )? )* "\""
1808+
r"""string ::= "\"" ( [a-z0-9] ( [a-z0-9-]* [a-z0-9] )? ) ( "." [a-z0-9] ( [a-z0-9-]* [a-z0-9] )? )* "\""
1809+
root ::= string
18131810
"""
18141811
)
18151812
check_schema_with_grammar(schema, expected_grammar)
@@ -1841,7 +1838,8 @@ def test_uuid_format(instance: str, accepted: bool):
18411838
schema = {"type": "string", "format": "uuid"}
18421839

18431840
expected_grammar = basic_json_rules_ebnf + (
1844-
r"""root ::= "\"" [0-9A-Fa-f]{8} "-" [0-9A-Fa-f]{4} "-" [0-9A-Fa-f]{4} "-" [0-9A-Fa-f]{4} "-" [0-9A-Fa-f]{12} "\""
1841+
r"""string ::= "\"" [0-9A-Fa-f]{8} "-" [0-9A-Fa-f]{4} "-" [0-9A-Fa-f]{4} "-" [0-9A-Fa-f]{4} "-" [0-9A-Fa-f]{12} "\""
1842+
root ::= string
18451843
"""
18461844
)
18471845
check_schema_with_grammar(schema, expected_grammar)
@@ -1876,12 +1874,8 @@ def test_uri_format(instance: str, accepted: bool):
18761874
schema = {"type": "string", "format": "uri"}
18771875

18781876
expected_grammar = basic_json_rules_ebnf + (
1879-
r"""root ::= "\"" [a-zA-Z] [a-zA-Z+.-]* ":" ( "/" "/" ( ( [a-zA-Z0-9_.~!$&'()*+,;=:-] | "%" """
1880-
r"""[0-9A-Fa-f] [0-9A-Fa-f] )* "@" )? ( [a-zA-Z0-9_.~!$&'()*+,;=-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* """
1881-
r"""( ":" [0-9]* )? ( "/" ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )* | "/"? ( """
1882-
r"""( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )+ ( "/" ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] """
1883-
r"""| "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )* )? ) ( "\?" ( [a-zA-Z0-9_.~!$&'()*+,;=:@/\?-] | "%" [0-9A-Fa-f] """
1884-
r"""[0-9A-Fa-f] )* )? ( "#" ( [a-zA-Z0-9_.~!$&'()*+,;=:@/\?-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )? "\""
1877+
r"""string ::= "\"" [a-zA-Z] [a-zA-Z+.-]* ":" ( "/" "/" ( ( [a-zA-Z0-9_.~!$&'()*+,;=:-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* "@" )? ( [a-zA-Z0-9_.~!$&'()*+,;=-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* ( ":" [0-9]* )? ( "/" ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )* | "/"? ( ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )+ ( "/" ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )* )? ) ( "\?" ( [a-zA-Z0-9_.~!$&'()*+,;=:@/\?-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )? ( "#" ( [a-zA-Z0-9_.~!$&'()*+,;=:@/\?-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )? "\""
1878+
root ::= string
18851879
"""
18861880
)
18871881
check_schema_with_grammar(schema, expected_grammar)
@@ -1916,14 +1910,8 @@ def test_uri_reference_format(instance: str, accepted: bool):
19161910
schema = {"type": "string", "format": "uri-reference"}
19171911

19181912
expected_grammar = basic_json_rules_ebnf + (
1919-
r"""root ::= "\"" ( "/" "/" ( ( [a-zA-Z0-9_.~!$&'()*+,;=:-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] """
1920-
r""")* "@" )? ( [a-zA-Z0-9_.~!$&'()*+,;=-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* ( ":" [0-9]* )? """
1921-
r"""( "/" ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )* | "/" ( """
1922-
r"""( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )+ ( "/" ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] """
1923-
r"""| "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )* )? | ( [a-zA-Z0-9_.~!$&'()*+,;=@-] | "%" """
1924-
r"""[0-9A-Fa-f] [0-9A-Fa-f] )+ ( "/" ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )* )? """
1925-
r"""( "\?" ( [a-zA-Z0-9_.~!$&'()*+,;=:@/\?-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )? ( "#" """
1926-
r"""( [a-zA-Z0-9_.~!$&'()*+,;=:@/\?-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )? "\""
1913+
r"""string ::= "\"" ( "/" "/" ( ( [a-zA-Z0-9_.~!$&'()*+,;=:-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* "@" )? ( [a-zA-Z0-9_.~!$&'()*+,;=-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* ( ":" [0-9]* )? ( "/" ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )* | "/" ( ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )+ ( "/" ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )* )? | ( [a-zA-Z0-9_.~!$&'()*+,;=@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )+ ( "/" ( [a-zA-Z0-9_.~!$&'()*+,;=:@-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )* )? ( "\?" ( [a-zA-Z0-9_.~!$&'()*+,;=:@/\?-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )? ( "#" ( [a-zA-Z0-9_.~!$&'()*+,;=:@/\?-] | "%" [0-9A-Fa-f] [0-9A-Fa-f] )* )? "\""
1914+
root ::= string
19271915
"""
19281916
)
19291917
check_schema_with_grammar(schema, expected_grammar)
@@ -1962,11 +1950,8 @@ def test_uri_template_format(instance: str, accepted: bool):
19621950
schema = {"type": "string", "format": "uri-template"}
19631951

19641952
expected_grammar = basic_json_rules_ebnf + (
1965-
r"""root ::= "\"" ( ( [!#-$&(-;=\?-[\]_a-z~] | "%" [0-9A-Fa-f] [0-9A-Fa-f] ) | "{" """
1966-
r"""( [+#./;\?&=,!@|] )? ( [a-zA-Z0-9_] | "%" [0-9A-Fa-f] [0-9A-Fa-f] ) ( "."? """
1967-
r"""( [a-zA-Z0-9_] | "%" [0-9A-Fa-f] [0-9A-Fa-f] ) )* ( ":" [1-9] [0-9]? [0-9]? [0-9]? """
1968-
r"""| "*" )? ( "," ( [a-zA-Z0-9_] | "%" [0-9A-Fa-f] [0-9A-Fa-f] ) ( "."? ( [a-zA-Z0-9_] """
1969-
r"""| "%" [0-9A-Fa-f] [0-9A-Fa-f] ) )* ( ":" [1-9] [0-9]? [0-9]? [0-9]? | "*" )? )* "}" )* "\""
1953+
r"""string ::= "\"" ( ( [!#-$&(-;=\?-[\]_a-z~] | "%" [0-9A-Fa-f] [0-9A-Fa-f] ) | "{" ( [+#./;\?&=,!@|] )? ( [a-zA-Z0-9_] | "%" [0-9A-Fa-f] [0-9A-Fa-f] ) ( "."? ( [a-zA-Z0-9_] | "%" [0-9A-Fa-f] [0-9A-Fa-f] ) )* ( ":" [1-9] [0-9]? [0-9]? [0-9]? | "*" )? ( "," ( [a-zA-Z0-9_] | "%" [0-9A-Fa-f] [0-9A-Fa-f] ) ( "."? ( [a-zA-Z0-9_] | "%" [0-9A-Fa-f] [0-9A-Fa-f] ) )* ( ":" [1-9] [0-9]? [0-9]? [0-9]? | "*" )? )* "}" )* "\""
1954+
root ::= string
19701955
"""
19711956
)
19721957
check_schema_with_grammar(schema, expected_grammar)
@@ -1990,7 +1975,8 @@ def test_json_pointer_format(instance: str, accepted: bool):
19901975
schema = {"type": "string", "format": "json-pointer"}
19911976

19921977
expected_grammar = basic_json_rules_ebnf + (
1993-
r"""root ::= "\"" ( "/" ( [\0-.] | [0-}] | [\x7f-\U0010ffff] | "~" [01] )* )* "\""
1978+
r"""string ::= "\"" ( "/" ( [\0-.] | [0-}] | [\x7f-\U0010ffff] | "~" [01] )* )* "\""
1979+
root ::= string
19941980
"""
19951981
)
19961982
check_schema_with_grammar(schema, expected_grammar)
@@ -2017,7 +2003,8 @@ def test_relative_json_pointer_format(instance: str, accepted: bool):
20172003
schema = {"type": "string", "format": "relative-json-pointer"}
20182004

20192005
expected_grammar = basic_json_rules_ebnf + (
2020-
r"""root ::= "\"" ( "0" | [1-9] [0-9]* ) ( "#" | ( "/" ( [\0-.] | [0-}] | [\x7f-\U0010ffff] | "~" [01] )* )* ) "\""
2006+
r"""string ::= "\"" ( "0" | [1-9] [0-9]* ) ( "#" | ( "/" ( [\0-.] | [0-}] | [\x7f-\U0010ffff] | "~" [01] )* )* ) "\""
2007+
root ::= string
20212008
"""
20222009
)
20232010
check_schema_with_grammar(schema, expected_grammar)
@@ -2029,7 +2016,8 @@ def test_min_max_length():
20292016
schema = {"type": "string", "minLength": 1, "maxLength": 10}
20302017

20312018
ebnf_grammar = basic_json_rules_ebnf + (
2032-
r"""root ::= "\"" [^"\\\r\n]{1,10} "\""
2019+
r"""string ::= "\"" [^"\\\r\n]{1,10} "\""
2020+
root ::= string
20332021
"""
20342022
)
20352023

@@ -2053,7 +2041,8 @@ def test_type_array():
20532041

20542042
ebnf_grammar = basic_json_rules_ebnf + (
20552043
r"""root_integer ::= ( ( [1-9] | "1" "0" ) )
2056-
root_string ::= "\"" [^"\\\r\n]{1,10} "\""
2044+
string ::= "\"" [^"\\\r\n]{1,10} "\""
2045+
root_string ::= string
20572046
root ::= root_integer | root_string
20582047
"""
20592048
)

0 commit comments

Comments
 (0)