Skip to content

Commit 1ab04c6

Browse files
committed
refactor: better information hiding
1 parent f70d238 commit 1ab04c6

19 files changed

+675
-575
lines changed

README.md

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Transliteration means representing Cyrillic data (mainly names and geographic lo
88

99
```python
1010
>>> import iuliia
11-
>>> iuliia.translate("Юлия Щеглова", schema=iuliia.WIKIPEDIA)
11+
>>> iuliia.WIKIPEDIA.translate("Юлия Щеглова")
1212
'Yuliya Shcheglova'
1313
```
1414

@@ -31,6 +31,22 @@ pip install iuliia
3131

3232
## Usage
3333

34+
Transliterate using specified schema:
35+
36+
```python
37+
>>> source = "Юлия Щеглова"
38+
>>> iuliia.ICAO_DOC_9303.translate(source)
39+
'Iuliia Shcheglova'
40+
```
41+
42+
Or pick schema by name
43+
44+
```python
45+
>>> schema = iuliia.Schemas.get("wikipedia")
46+
>>> schema.translate(source)
47+
'Yuliya Shcheglova'
48+
```
49+
3450
List all supported schemas:
3551

3652
```python
@@ -70,22 +86,6 @@ yandex_maps Yandex.Maps transliteration schema
7086
yandex_money Yandex.Money transliteration schema
7187
```
7288

73-
Transliterate using specified schema:
74-
75-
```python
76-
>>> source = "Юлия Щеглова"
77-
>>> iuliia.translate(source, schema=iuliia.ICAO_DOC_9303)
78-
'Iuliia Shcheglova'
79-
```
80-
81-
Or pick schema by name
82-
83-
```python
84-
>>> schema = iuliia.Schemas.get("wikipedia")
85-
>>> iuliia.translate(source, schema)
86-
'Yuliya Shcheglova'
87-
```
88-
8989
Command line:
9090

9191
```sh

iuliia/__init__.py

Lines changed: 61 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,69 @@
22
Transliterate Cyrillic → Latin in every possible way
33
"""
44

5-
# flake8: noqa
65
from .engine import translate
76
from .schema import Schema
87
from .schemas import Schemas
98

10-
__all__ = [] # type: ignore
9+
__all__ = [
10+
"translate",
11+
"Schema",
12+
"Schemas",
13+
"ALA_LC",
14+
"ALA_LC_ALT",
15+
"BGN_PCGN",
16+
"BGN_PCGN_ALT",
17+
"BS_2979",
18+
"BS_2979_ALT",
19+
"GOST_16876",
20+
"GOST_16876_ALT",
21+
"GOST_52290",
22+
"GOST_52535",
23+
"GOST_7034",
24+
"GOST_779",
25+
"GOST_779_ALT",
26+
"ICAO_DOC_9303",
27+
"ISO_9_1954",
28+
"ISO_9_1968",
29+
"ISO_9_1968_ALT",
30+
"MOSMETRO",
31+
"MVD_310",
32+
"MVD_310_FR",
33+
"MVD_782",
34+
"SCIENTIFIC",
35+
"TELEGRAM",
36+
"UNGEGN_1987",
37+
"UZ",
38+
"WIKIPEDIA",
39+
"YANDEX_MAPS",
40+
"YANDEX_MONEY",
41+
]
1142

12-
ALA_LC = Schemas.ala_lc.value # type: ignore
13-
ALA_LC_ALT = Schemas.ala_lc_alt.value # type: ignore
14-
BGN_PCGN = Schemas.bgn_pcgn.value # type: ignore
15-
BGN_PCGN_ALT = Schemas.bgn_pcgn_alt.value # type: ignore
16-
BS_2979 = Schemas.bs_2979.value # type: ignore
17-
BS_2979_ALT = Schemas.bs_2979_alt.value # type: ignore
18-
GOST_16876 = Schemas.gost_16876.value # type: ignore
19-
GOST_16876_ALT = Schemas.gost_16876_alt.value # type: ignore
20-
GOST_52290 = Schemas.gost_52290.value # type: ignore
21-
GOST_52535 = Schemas.gost_52535.value # type: ignore
22-
GOST_7034 = Schemas.gost_7034.value # type: ignore
23-
GOST_779 = Schemas.gost_779.value # type: ignore
24-
GOST_779_ALT = Schemas.gost_779_alt.value # type: ignore
25-
ICAO_DOC_9303 = Schemas.icao_doc_9303.value # type: ignore
26-
ISO_9_1954 = Schemas.iso_9_1954.value # type: ignore
27-
ISO_9_1968 = Schemas.iso_9_1968.value # type: ignore
28-
ISO_9_1968_ALT = Schemas.iso_9_1968_alt.value # type: ignore
29-
MOSMETRO = Schemas.mosmetro.value # type: ignore
30-
MVD_310 = Schemas.mvd_310.value # type: ignore
31-
MVD_310_FR = Schemas.mvd_310_fr.value # type: ignore
32-
MVD_782 = Schemas.mvd_782.value # type: ignore
33-
SCIENTIFIC = Schemas.scientific.value # type: ignore
34-
TELEGRAM = Schemas.telegram.value # type: ignore
35-
UNGEGN_1987 = Schemas.ungegn_1987.value # type: ignore
36-
UZ = Schemas.uz.value # type: ignore
37-
WIKIPEDIA = Schemas.wikipedia.value # type: ignore
38-
YANDEX_MAPS = Schemas.yandex_maps.value # type: ignore
39-
YANDEX_MONEY = Schemas.yandex_money.value # type: ignore
43+
ALA_LC = Schemas.ala_lc.value
44+
ALA_LC_ALT = Schemas.ala_lc_alt.value
45+
BGN_PCGN = Schemas.bgn_pcgn.value
46+
BGN_PCGN_ALT = Schemas.bgn_pcgn_alt.value
47+
BS_2979 = Schemas.bs_2979.value
48+
BS_2979_ALT = Schemas.bs_2979_alt.value
49+
GOST_16876 = Schemas.gost_16876.value
50+
GOST_16876_ALT = Schemas.gost_16876_alt.value
51+
GOST_52290 = Schemas.gost_52290.value
52+
GOST_52535 = Schemas.gost_52535.value
53+
GOST_7034 = Schemas.gost_7034.value
54+
GOST_779 = Schemas.gost_779.value
55+
GOST_779_ALT = Schemas.gost_779_alt.value
56+
ICAO_DOC_9303 = Schemas.icao_doc_9303.value
57+
ISO_9_1954 = Schemas.iso_9_1954.value
58+
ISO_9_1968 = Schemas.iso_9_1968.value
59+
ISO_9_1968_ALT = Schemas.iso_9_1968_alt.value
60+
MOSMETRO = Schemas.mosmetro.value
61+
MVD_310 = Schemas.mvd_310.value
62+
MVD_310_FR = Schemas.mvd_310_fr.value
63+
MVD_782 = Schemas.mvd_782.value
64+
SCIENTIFIC = Schemas.scientific.value
65+
TELEGRAM = Schemas.telegram.value
66+
UNGEGN_1987 = Schemas.ungegn_1987.value
67+
UZ = Schemas.uz.value
68+
WIKIPEDIA = Schemas.wikipedia.value
69+
YANDEX_MAPS = Schemas.yandex_maps.value
70+
YANDEX_MONEY = Schemas.yandex_money.value

iuliia/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def main():
2222
sys.exit(1)
2323

2424
source = sys.argv[2]
25-
result = iuliia.translate(source, schema)
25+
result = schema.translate(source)
2626
print(result)
2727

2828

iuliia/engine.py

Lines changed: 11 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,20 @@
11
"""
2-
Translate engine.
2+
Translate engine (deprecated).
33
"""
44

5-
import re
6-
from typing import Iterator
5+
import warnings
76
from .schema import Schema
87

9-
SPLITTER = re.compile(r"\b")
10-
118

129
def translate(source: str, schema: Schema) -> str:
1310
"""
14-
Translate source Cyrillic string into Latin using specified schema.
15-
Translates sentences word by word, delegating specifics of transliteration
16-
to specified schema.
17-
"""
18-
words = (word for word in SPLITTER.split(source) if word)
19-
translated = (_translate_word(word, schema) for word in words)
20-
return "".join(translated)
21-
22-
23-
def _translate_word(word: str, schema: Schema) -> str:
24-
"""Translate word using specified schema."""
25-
stem, ending = _split_word(word)
26-
translated_ending = schema.translate_ending(ending)
27-
if translated_ending:
28-
# There is a specific translation for the ending,
29-
# so we need to translate the stem and ending separately.
30-
translated = _translate_letters(stem, schema)
31-
translated.append(translated_ending)
32-
else:
33-
# There is no specific translation for the ending,
34-
# so we can translate the whole word at once.
35-
translated = _translate_letters(word, schema)
36-
return "".join(translated)
37-
38-
39-
def _translate_letters(word: str, schema: Schema) -> list[str]:
40-
"""Translate letters of a word using specified schema."""
41-
translated = []
42-
for prev, curr, next_ in _letter_reader(word):
43-
letter = schema.translate_letter(prev, curr, next_)
44-
translated.append(letter)
45-
return translated
46-
47-
48-
def _split_word(word: str) -> tuple[str, str]:
49-
"""
50-
Split word into stem and ending.
51-
Ending is the last two letters of the word.
52-
"""
53-
ending_length = 2
54-
if len(word) > ending_length:
55-
stem = word[:-ending_length]
56-
ending = word[-ending_length:]
57-
else:
58-
stem = word
59-
ending = ""
60-
return stem, ending
61-
62-
63-
def _letter_reader(stem: str) -> Iterator[tuple[str, str, str]]:
64-
"""
65-
Yield letters of a word in (prev, curr, next) tuples.
66-
E.g. for "word" it will yield:
67-
("", "w", "o")
68-
("w", "o", "r")
69-
("o", "r", "d")
70-
("r", "d", "")
11+
Translate the Cyrillic string into Latin using the provided schema.
12+
Delegates transliteration specifics to the schema.
13+
DEPRECATED: Use schema.translate() instead.
7114
"""
72-
if not stem:
73-
return
74-
it = iter(stem)
75-
prev, curr = "", next(it, "")
76-
for next_ in it:
77-
yield prev, curr, next_
78-
prev, curr = curr, next_
79-
yield prev, curr, ""
15+
warnings.warn(
16+
"The 'translate' function is deprecated. Use schema.translate() instead.",
17+
DeprecationWarning,
18+
stacklevel=2,
19+
)
20+
return schema.translate(source)

iuliia/lazy.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""
2+
Lazy schema loader.
3+
"""
4+
5+
import json
6+
from pathlib import Path
7+
from .schema import TranslitSchema
8+
from .validator import Validator
9+
10+
11+
class Loader:
12+
"""Loads transliteration schemas from JSON files."""
13+
14+
def __init__(self):
15+
self.base_path = Path(__file__).parent / "schemas"
16+
17+
def load(self, name: str) -> TranslitSchema:
18+
"""Load schema by its name."""
19+
path = self.base_path / f"{name}.json"
20+
if not path.exists():
21+
raise ValueError(f"Schema path does not exist: {path}")
22+
defn = self._load_definition(path)
23+
Validator(defn).run()
24+
return TranslitSchema(
25+
name=defn.get("name", ""),
26+
description=defn.get("description"),
27+
mapping=defn.get("mapping", {}),
28+
prev_mapping=defn.get("prev_mapping"),
29+
next_mapping=defn.get("next_mapping"),
30+
ending_mapping=defn.get("ending_mapping"),
31+
samples=defn.get("samples"),
32+
)
33+
34+
def _load_definition(self, path: str | Path) -> dict:
35+
with open(path, encoding="utf-8") as file:
36+
return json.load(file)
37+
38+
39+
class LazySchema:
40+
"""
41+
Transliteration schema. Translates Cyrillic text into Latin
42+
using a given set of rules (mappings).
43+
Lazy loads schema from JSON file as needed.
44+
"""
45+
46+
def __init__(self, name: str):
47+
self.name = name
48+
self._loader = Loader()
49+
self._schema: TranslitSchema | None = None
50+
51+
@property
52+
def description(self) -> str | None:
53+
"""Schema description."""
54+
if self._schema is None:
55+
self._schema = self._loader.load(self.name)
56+
return self._schema.description
57+
58+
@property
59+
def samples(self) -> list[list[str]]:
60+
"""Schema samples."""
61+
if self._schema is None:
62+
self._schema = self._loader.load(self.name)
63+
return self._schema.samples
64+
65+
def translate(self, source: str) -> str:
66+
"""
67+
Translate source Cyrillic string into Latin.
68+
Translates the source string word by word.
69+
"""
70+
if self._schema is None:
71+
self._schema = self._loader.load(self.name)
72+
return self._schema.translate(source)
73+
74+
def __str__(self):
75+
return self.name
76+
77+
def __repr__(self):
78+
return f"{self.__class__.__name__}('{self.name}')"

iuliia/mapping.py

Lines changed: 0 additions & 60 deletions
This file was deleted.

0 commit comments

Comments
 (0)