Skip to content

Commit bfcfa3d

Browse files
[Fix]Fix the incorrect acceptance of some invalid characters in the strings of JSON. (#318)
This PR manages to fix the bug mentioned in #286. At present, `[\0-\x1f]` will be accepted incorrectly in the strings of the input JSON schema, which should be recognized as invalid ones. This PR solved the problem. --------- Co-authored-by: Yixin Dong <[email protected]>
1 parent 9fecc61 commit bfcfa3d

File tree

3 files changed

+7
-7
lines changed

3 files changed

+7
-7
lines changed

cpp/json_schema_converter.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -555,8 +555,8 @@ void JSONSchemaConverter::AddHelperRules() {
555555
);
556556
ebnf_script_creator_.AddRule(
557557
kBasicStringSub,
558-
"(\"\\\"\" | [^\"\\\\\\r\\n] " + kBasicStringSub + " | \"\\\\\" " + kBasicEscape + " " +
559-
kBasicStringSub + ") (= [ \\n\\t]* [,}\\]:])"
558+
"(\"\\\"\" | [^\\0-\\x1f\\\"\\\\\\r\\n] " + kBasicStringSub + " | \"\\\\\" " + kBasicEscape +
559+
" " + kBasicStringSub + ") (= [ \\n\\t]* [,}\\]:])"
560560
);
561561
}
562562

tests/python/test_grammar_matcher_structural_tag.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,20 +120,20 @@ def test_tag_dispatch_mask_generation_correctness():
120120
expected_grammar_test_structural_tag = r"""root ::= TagDispatch(("<function=f", trigger_rule_0), ("<function=g", trigger_rule_1))
121121
trigger_rule_0 ::= (("1>" root_1 "</function>") | ("2>" root_2 "</function>"))
122122
basic_escape ::= (([\"\\/bfnrt]) | ("u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9])) (=(basic_string_sub))
123-
basic_string_sub ::= (("\"") | ([^\"\\\r\n] basic_string_sub) | ("\\" basic_escape basic_string_sub)) (=([ \n\t]* [,}\]:]))
123+
basic_string_sub ::= (("\"") | ([^\0-\x1f\"\\\r\n] basic_string_sub) | ("\\" basic_escape basic_string_sub)) (=([ \n\t]* [,}\]:]))
124124
basic_integer ::= (("0") | (basic_integer_1 [1-9] [0-9]*)) (=([ \n\t]* "}"))
125125
basic_string ::= (("\"" basic_string_sub)) (=([ \n\t]* "," [ \n\t]* "\"arg2\"" [ \n\t]* ":" [ \n\t]* basic_integer [ \n\t]* "}"))
126126
root_1 ::= (("{" [ \n\t]* "\"arg1\"" [ \n\t]* ":" [ \n\t]* basic_string [ \n\t]* "," [ \n\t]* "\"arg2\"" [ \n\t]* ":" [ \n\t]* basic_integer [ \n\t]* "}"))
127127
basic_integer_1 ::= ("" | ("-")) (=([1-9] [0-9]*))
128128
basic_escape_1 ::= (([\"\\/bfnrt]) | ("u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9])) (=(basic_string_sub_1))
129-
basic_string_sub_1 ::= (("\"") | ([^\"\\\r\n] basic_string_sub_1) | ("\\" basic_escape_1 basic_string_sub_1)) (=([ \n\t]* [,}\]:]))
129+
basic_string_sub_1 ::= (("\"") | ([^\0-\x1f\"\\\r\n] basic_string_sub_1) | ("\\" basic_escape_1 basic_string_sub_1)) (=([ \n\t]* [,}\]:]))
130130
basic_integer_2 ::= (("0") | (basic_integer_1_1 [1-9] [0-9]*)) (=([ \n\t]* "}"))
131131
basic_string_1 ::= (("\"" basic_string_sub_1)) (=([ \n\t]* "," [ \n\t]* "\"arg2\"" [ \n\t]* ":" [ \n\t]* basic_integer_2 [ \n\t]* "}"))
132132
root_2 ::= (("{" [ \n\t]* "\"arg1\"" [ \n\t]* ":" [ \n\t]* basic_string_1 [ \n\t]* "," [ \n\t]* "\"arg2\"" [ \n\t]* ":" [ \n\t]* basic_integer_2 [ \n\t]* "}"))
133133
basic_integer_1_1 ::= ("" | ("-")) (=([1-9] [0-9]*))
134134
trigger_rule_1 ::= ((">" root_3 "</function>"))
135135
basic_escape_2 ::= (([\"\\/bfnrt]) | ("u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9])) (=(basic_string_sub_2))
136-
basic_string_sub_2 ::= (("\"") | ([^\"\\\r\n] basic_string_sub_2) | ("\\" basic_escape_2 basic_string_sub_2)) (=([ \n\t]* [,}\]:]))
136+
basic_string_sub_2 ::= (("\"") | ([^\0-\x1f\"\\\r\n] basic_string_sub_2) | ("\\" basic_escape_2 basic_string_sub_2)) (=([ \n\t]* [,}\]:]))
137137
basic_number ::= ((basic_number_7 basic_number_3 basic_number_6)) (=([ \n\t]* "," [ \n\t]* "\"arg4\"" [ \n\t]* ":" [ \n\t]* root_prop_1 [ \n\t]* "}"))
138138
basic_string_2 ::= (("\"" basic_string_sub_2))
139139
root_prop_1 ::= (("[" [ \n\t]* basic_string_2 root_prop_1_1 [ \n\t]* "]") | ("[" [ \n\t]* "]")) (=([ \n\t]* "}"))

tests/python/test_json_schema_converter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
)
1616

1717
basic_json_rules_ebnf = r"""basic_escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
18-
basic_string_sub ::= ("\"" | [^"\\\r\n] basic_string_sub | "\\" basic_escape basic_string_sub) (= [ \n\t]* [,}\]:])
18+
basic_string_sub ::= ("\"" | [^\0-\x1f\"\\\r\n] basic_string_sub | "\\" basic_escape basic_string_sub) (= [ \n\t]* [,}\]:])
1919
basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
2020
basic_integer ::= ("0" | "-"? [1-9] [0-9]*)
2121
basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
@@ -27,7 +27,7 @@
2727
"""
2828

2929
basic_json_rules_ebnf_no_space = r"""basic_escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
30-
basic_string_sub ::= ("\"" | [^"\\\r\n] basic_string_sub | "\\" basic_escape basic_string_sub) (= [ \n\t]* [,}\]:])
30+
basic_string_sub ::= ("\"" | [^\0-\x1f\"\\\r\n] basic_string_sub | "\\" basic_escape basic_string_sub) (= [ \n\t]* [,}\]:])
3131
basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
3232
basic_integer ::= ("0" | "-"? [1-9] [0-9]*)
3333
basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?

0 commit comments

Comments
 (0)