@@ -1540,5 +1540,120 @@ def test_structural_tag_error(stag_format: Dict[str, Any]):
1540
1540
xgr .Grammar .from_structural_tag (structural_tag )
1541
1541
1542
1542
1543
+ utf8_stag_format_and_instance_accepted = [
1544
+ ({"type" : "const_string" , "value" : "你好" }, "你好" , True ),
1545
+ ({"type" : "const_string" , "value" : "你好" }, "hello" , False ),
1546
+ ({"type" : "any_text" }, "😊" , True ),
1547
+ (
1548
+ {
1549
+ "type" : "sequence" ,
1550
+ "elements" : [
1551
+ {"type" : "const_string" , "value" : "开始" },
1552
+ {"type" : "json_schema" , "json_schema" : {"type" : "string" }},
1553
+ {"type" : "const_string" , "value" : "结束" },
1554
+ ],
1555
+ },
1556
+ '开始"中间"结束' ,
1557
+ True ,
1558
+ ),
1559
+ (
1560
+ {
1561
+ "type" : "sequence" ,
1562
+ "elements" : [
1563
+ {"type" : "const_string" , "value" : "开始" },
1564
+ {"type" : "json_schema" , "json_schema" : {"type" : "string" }},
1565
+ {"type" : "const_string" , "value" : "结束" },
1566
+ ],
1567
+ },
1568
+ "开始中间内容" ,
1569
+ False ,
1570
+ ),
1571
+ (
1572
+ {"type" : "tag" , "begin" : "标签开始" , "content" : {"type" : "any_text" }, "end" : "标签结束" },
1573
+ "标签开始一些内容标签结束" ,
1574
+ True ,
1575
+ ),
1576
+ (
1577
+ {"type" : "tag" , "begin" : "标签开始" , "content" : {"type" : "any_text" }, "end" : "标签结束" },
1578
+ "标签开始一些内容" ,
1579
+ False ,
1580
+ ),
1581
+ (
1582
+ {
1583
+ "type" : "or" ,
1584
+ "elements" : [
1585
+ {"type" : "const_string" , "value" : "选项一" },
1586
+ {"type" : "const_string" , "value" : "选项二" },
1587
+ ],
1588
+ },
1589
+ "选项一" ,
1590
+ True ,
1591
+ ),
1592
+ (
1593
+ {
1594
+ "type" : "or" ,
1595
+ "elements" : [
1596
+ {"type" : "const_string" , "value" : "选项一" },
1597
+ {"type" : "const_string" , "value" : "选项二" },
1598
+ ],
1599
+ },
1600
+ "选项三" ,
1601
+ False ,
1602
+ ),
1603
+ (
1604
+ {
1605
+ "type" : "tags_with_separator" ,
1606
+ "tags" : [{"begin" : "项开始" , "content" : {"type" : "any_text" }, "end" : "项结束" }],
1607
+ "separator" : "分隔符" ,
1608
+ },
1609
+ "项开始内容1项结束分隔符项开始内容2项结束" ,
1610
+ True ,
1611
+ ),
1612
+ (
1613
+ {
1614
+ "type" : "tags_with_separator" ,
1615
+ "tags" : [{"begin" : "项开始" , "content" : {"type" : "any_text" }, "end" : "项结束" }],
1616
+ "separator" : "分隔符" ,
1617
+ },
1618
+ "项开始内容1项结束项开始内容2项结束" ,
1619
+ False ,
1620
+ ),
1621
+ (
1622
+ {
1623
+ "type" : "json_schema" ,
1624
+ "json_schema" : {
1625
+ "type" : "object" ,
1626
+ "properties" : {"字段" : {"type" : "string" }},
1627
+ "required" : ["字段" ],
1628
+ "additionalProperties" : False ,
1629
+ },
1630
+ },
1631
+ '{"字段": "值"}' ,
1632
+ True ,
1633
+ ),
1634
+ (
1635
+ {
1636
+ "type" : "qwen_xml_parameter" ,
1637
+ "json_schema" : {
1638
+ "type" : "object" ,
1639
+ "properties" : {"参数" : {"type" : "string" }},
1640
+ "required" : ["参数" ],
1641
+ "additionalProperties" : False ,
1642
+ },
1643
+ },
1644
+ "<parameter=参数>值</parameter>" ,
1645
+ True ,
1646
+ ),
1647
+ ]
1648
+
1649
+
1650
+ @pytest .mark .parametrize (
1651
+ "stag_format, instance, is_accepted" , utf8_stag_format_and_instance_accepted
1652
+ )
1653
+ def test_basic_structural_tag_utf8 (stag_format : Dict [str , Any ], instance : str , is_accepted : bool ):
1654
+ """Test structural tag with UTF-8 characters"""
1655
+ check_stag_with_instance (stag_format , instance , is_accepted )
1656
+
1657
+
1543
1658
if __name__ == "__main__" :
1544
1659
pytest .main (sys .argv )
0 commit comments