|
1 | 1 | import json |
| 2 | +import re |
2 | 3 | from pathlib import Path |
3 | 4 |
|
4 | 5 | import pytest |
@@ -200,7 +201,7 @@ def test_ner_config(cfg_string, request): |
200 | 201 | labels = split_labels(labels) |
201 | 202 | task = pipe.task |
202 | 203 | assert isinstance(task, Labeled) |
203 | | - assert task.labels == tuple(labels) |
| 204 | + assert sorted(task.labels) == sorted(tuple(labels)) |
204 | 205 | assert pipe.labels == task.labels |
205 | 206 | assert nlp.pipe_labels["llm"] == list(task.labels) |
206 | 207 |
|
@@ -827,3 +828,55 @@ def test_ner_to_disk(noop_config, tmp_path: Path): |
827 | 828 | nlp2.from_disk(path) |
828 | 829 |
|
829 | 830 | assert task1._label_dict == task2._label_dict == labels |
| 831 | + |
| 832 | + |
| 833 | +def test_label_inconsistency(): |
| 834 | + """Test whether inconsistency between specified labels and labels in examples is detected.""" |
| 835 | + cfg = f""" |
| 836 | + [nlp] |
| 837 | + lang = "en" |
| 838 | + pipeline = ["llm"] |
| 839 | +
|
| 840 | + [components] |
| 841 | +
|
| 842 | + [components.llm] |
| 843 | + factory = "llm" |
| 844 | +
|
| 845 | + [components.llm.task] |
| 846 | + @llm_tasks = "spacy.NER.v2" |
| 847 | + labels = ["PERSON", "LOCATION"] |
| 848 | +
|
| 849 | + [components.llm.task.examples] |
| 850 | + @misc = "spacy.FewShotReader.v1" |
| 851 | + path = {str((Path(__file__).parent / "examples" / "ner_inconsistent.yml"))} |
| 852 | +
|
| 853 | + [components.llm.model] |
| 854 | + @llm_models = "test.NoOpModel.v1" |
| 855 | + """ |
| 856 | + |
| 857 | + config = Config().from_str(cfg) |
| 858 | + with pytest.warns( |
| 859 | + UserWarning, |
| 860 | + match=re.escape( |
| 861 | + "Examples contain labels that are not specified in the task configuration. The latter contains the " |
| 862 | + "following labels: ['LOCATION', 'PERSON']. Labels in examples missing from the task configuration: " |
| 863 | + "['TECH']. Please ensure your label specification and example labels are consistent." |
| 864 | + ), |
| 865 | + ): |
| 866 | + nlp = assemble_from_config(config) |
| 867 | + |
| 868 | + prompt_examples = nlp.get_pipe("llm")._task._prompt_examples |
| 869 | + assert len(prompt_examples) == 2 |
| 870 | + assert prompt_examples[0].text == "Jack and Jill went up the hill." |
| 871 | + assert prompt_examples[0].entities == { |
| 872 | + "LOCATION": ["hill"], |
| 873 | + "PERSON": ["Jack", "Jill"], |
| 874 | + } |
| 875 | + assert ( |
| 876 | + prompt_examples[1].text |
| 877 | + == "Jack and Jill went up the hill and spaCy is a great tool." |
| 878 | + ) |
| 879 | + assert prompt_examples[1].entities == { |
| 880 | + "LOCATION": ["hill"], |
| 881 | + "PERSON": ["Jack", "Jill"], |
| 882 | + } |
0 commit comments