1+ import warnings
12from typing import Callable , Dict , Iterable , List , Optional , Tuple , Type
23
34import jinja2
@@ -36,12 +37,14 @@ def __init__(
3637 self ._case_sensitive_matching = case_sensitive_matching
3738 self ._single_match = single_match
3839
39- self ._check_label_consistency ()
40+ if self ._examples :
41+ self ._examples = self ._check_label_consistency ()
4042
41- def _check_label_consistency (self ) -> None :
42- """Checks consistency of labels between examples and defined labels."""
43- if not self ._examples :
44- return
43+ def _check_label_consistency (self ) -> List [SpanExample ]:
44+ """Checks consistency of labels between examples and defined labels. Emits warning on inconsistency.
45+ RETURNS ():
46+ """
47+ assert self ._examples
4548 example_labels = {
4649 self ._normalizer (key ): key
4750 for example in self ._examples
@@ -52,13 +55,30 @@ def _check_label_consistency(self) -> None:
5255 for key in (set (example_labels .keys ()) - set (self ._label_dict .keys ()))
5356 }
5457 if not set (example_labels .keys ()) <= set (self ._label_dict .keys ()):
55- raise ValueError (
58+ warnings . warn (
5659 f"Examples contain labels that are not specified in the task configuration. The latter contains the "
5760 f"following labels: { sorted (list (set (self ._label_dict .values ())))} . Labels in examples missing from "
5861 f"the task configuration: { sorted (list (unspecified_labels ))} . Please ensure your label specification "
5962 f"and example labels are consistent."
6063 )
6164
65+ # Return examples without non-declared labels. If an example only has undeclared labels, it is discarded.
66+ return [
67+ example
68+ for example in [
69+ SpanExample (
70+ text = example .text ,
71+ entities = {
72+ label : entities
73+ for label , entities in example .entities .items ()
74+ if self ._normalizer (label ) in self ._label_dict
75+ },
76+ )
77+ for example in self ._examples
78+ ]
79+ if len (example .entities )
80+ ]
81+
6282 @property
6383 def labels (self ) -> Tuple [str , ...]:
6484 return tuple (self ._label_dict .values ())
0 commit comments