Skip to content

Commit 2fefe24

Browse files
[Test] Add more utf-8 tests for structural tags. (#424)
This PR adds more tests for structural tags. Signed-off-by: Yuchuan <[email protected]>
1 parent 1b81f7c commit 2fefe24

File tree

1 file changed

+115
-0
lines changed

1 file changed

+115
-0
lines changed

tests/python/test_structural_tag_converter.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1540,5 +1540,120 @@ def test_structural_tag_error(stag_format: Dict[str, Any]):
15401540
xgr.Grammar.from_structural_tag(structural_tag)
15411541

15421542

1543+
utf8_stag_format_and_instance_accepted = [
1544+
({"type": "const_string", "value": "你好"}, "你好", True),
1545+
({"type": "const_string", "value": "你好"}, "hello", False),
1546+
({"type": "any_text"}, "😊", True),
1547+
(
1548+
{
1549+
"type": "sequence",
1550+
"elements": [
1551+
{"type": "const_string", "value": "开始"},
1552+
{"type": "json_schema", "json_schema": {"type": "string"}},
1553+
{"type": "const_string", "value": "结束"},
1554+
],
1555+
},
1556+
'开始"中间"结束',
1557+
True,
1558+
),
1559+
(
1560+
{
1561+
"type": "sequence",
1562+
"elements": [
1563+
{"type": "const_string", "value": "开始"},
1564+
{"type": "json_schema", "json_schema": {"type": "string"}},
1565+
{"type": "const_string", "value": "结束"},
1566+
],
1567+
},
1568+
"开始中间内容",
1569+
False,
1570+
),
1571+
(
1572+
{"type": "tag", "begin": "标签开始", "content": {"type": "any_text"}, "end": "标签结束"},
1573+
"标签开始一些内容标签结束",
1574+
True,
1575+
),
1576+
(
1577+
{"type": "tag", "begin": "标签开始", "content": {"type": "any_text"}, "end": "标签结束"},
1578+
"标签开始一些内容",
1579+
False,
1580+
),
1581+
(
1582+
{
1583+
"type": "or",
1584+
"elements": [
1585+
{"type": "const_string", "value": "选项一"},
1586+
{"type": "const_string", "value": "选项二"},
1587+
],
1588+
},
1589+
"选项一",
1590+
True,
1591+
),
1592+
(
1593+
{
1594+
"type": "or",
1595+
"elements": [
1596+
{"type": "const_string", "value": "选项一"},
1597+
{"type": "const_string", "value": "选项二"},
1598+
],
1599+
},
1600+
"选项三",
1601+
False,
1602+
),
1603+
(
1604+
{
1605+
"type": "tags_with_separator",
1606+
"tags": [{"begin": "项开始", "content": {"type": "any_text"}, "end": "项结束"}],
1607+
"separator": "分隔符",
1608+
},
1609+
"项开始内容1项结束分隔符项开始内容2项结束",
1610+
True,
1611+
),
1612+
(
1613+
{
1614+
"type": "tags_with_separator",
1615+
"tags": [{"begin": "项开始", "content": {"type": "any_text"}, "end": "项结束"}],
1616+
"separator": "分隔符",
1617+
},
1618+
"项开始内容1项结束项开始内容2项结束",
1619+
False,
1620+
),
1621+
(
1622+
{
1623+
"type": "json_schema",
1624+
"json_schema": {
1625+
"type": "object",
1626+
"properties": {"字段": {"type": "string"}},
1627+
"required": ["字段"],
1628+
"additionalProperties": False,
1629+
},
1630+
},
1631+
'{"字段": "值"}',
1632+
True,
1633+
),
1634+
(
1635+
{
1636+
"type": "qwen_xml_parameter",
1637+
"json_schema": {
1638+
"type": "object",
1639+
"properties": {"参数": {"type": "string"}},
1640+
"required": ["参数"],
1641+
"additionalProperties": False,
1642+
},
1643+
},
1644+
"<parameter=参数>值</parameter>",
1645+
True,
1646+
),
1647+
]
1648+
1649+
1650+
@pytest.mark.parametrize(
1651+
"stag_format, instance, is_accepted", utf8_stag_format_and_instance_accepted
1652+
)
1653+
def test_basic_structural_tag_utf8(stag_format: Dict[str, Any], instance: str, is_accepted: bool):
1654+
"""Test structural tag with UTF-8 characters"""
1655+
check_stag_with_instance(stag_format, instance, is_accepted)
1656+
1657+
15431658
if __name__ == "__main__":
15441659
pytest.main(sys.argv)

0 commit comments

Comments
 (0)