Skip to content

Commit 22a14f7

Browse files
committed
allow missing fields
Oeiginal fix from #33
1 parent 97e28da commit 22a14f7

File tree

3 files changed

+57
-6
lines changed

3 files changed

+57
-6
lines changed

src/spacy_conll/formatter.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
},
3636
)
3737
def create_conll_formatter(
38-
nlp: Language,
39-
name: str,
38+
nlp: Language, # qa: ignore
39+
name: str, # qa: ignore
4040
conversion_maps: Optional[Dict[str, Dict[str, str]]] = None,
4141
ext_names: Optional[Dict[str, str]] = None,
4242
field_names: Dict[str, str] = None,
@@ -200,12 +200,12 @@ def _set_token_conll(self, token: Token, token_idx: int = 1) -> Token:
200200
token_conll = (
201201
token_idx,
202202
token.text,
203-
token.lemma_,
204-
token.pos_,
205-
token.tag_,
203+
token.lemma_ if token.lemma_ else "_",
204+
token.pos_ if token.pos_ else "_",
205+
token.tag_ if token.tag_ else "_",
206206
str(token.morph) if token.has_morph and str(token.morph) else "_",
207207
head_idx,
208-
token.dep_,
208+
token.dep_ if token.dep_ else "_",
209209
token._.conll_deps_graphs_field,
210210
token._.conll_misc_field,
211211
)

tests/conftest.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from pathlib import Path
22

33
import pytest
4+
from spacy import Vocab
5+
from spacy.tokens import Doc, Token
46
from spacy.tokens.underscore import Underscore
57
from spacy_conll import init_parser
68

@@ -156,3 +158,26 @@ def conllparser_parse_conllfile(spacy_vanila):
156158
return ConllParser(spacy_vanila).parse_conll_as_spacy(
157159
Path(__file__).parent.joinpath("en_ewt-ud-dev.conllu-sample.txt"), input_encoding="utf-8"
158160
)
161+
162+
163+
@pytest.fixture
164+
def spacy_vocab():
165+
return Vocab(strings=["hello", "world"])
166+
167+
168+
@pytest.fixture
169+
def spacy_doc(spacy_vocab):
170+
words = ["hello", "world", "!"]
171+
spaces = [True, False, False]
172+
sent_starts = [True, False, False]
173+
return Doc(
174+
spacy_vocab,
175+
words=words,
176+
spaces=spaces,
177+
sent_starts=sent_starts,
178+
)
179+
180+
181+
@pytest.fixture
182+
def spacy_token(spacy_vocab, spacy_doc):
183+
return Token(spacy_vocab, spacy_doc, 1)

tests/test_formatter.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from collections import OrderedDict
2+
3+
from spacy.tokens import Token
4+
from spacy_conll.formatter import ConllFormatter
5+
6+
7+
def test_set_token_conll(spacy_token: Token):
8+
"""Test for https://github.com/BramVanroy/spacy_conll/issues/29"""
9+
formatter = ConllFormatter()
10+
assert formatter._set_token_conll(spacy_token)._.get("conll") == OrderedDict(
11+
[
12+
("ID", 1),
13+
("FORM", "world"),
14+
("LEMMA", "_"),
15+
("UPOS", "_"),
16+
("XPOS", "_"),
17+
("FEATS", "_"),
18+
("HEAD", 2),
19+
("DEPREL", "_"),
20+
("DEPS", "_"),
21+
("MISC", "SpaceAfter=No"),
22+
]
23+
)
24+
assert (
25+
formatter._set_token_conll(spacy_token)._.get("conll_str") == "1\tworld\t_\t_\t_\t_\t2\t_\t_\tSpaceAfter=No\n"
26+
)

0 commit comments

Comments
 (0)