Skip to content

Commit 01884f6

Browse files
authored
derive more clones, add to_writer impl, restore lost api (#58)
1 parent afc2d3e commit 01884f6

File tree

14 files changed

+94
-88
lines changed

14 files changed

+94
-88
lines changed

nlprule/src/compile/impls.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use crate::{
2020
id::Category,
2121
DisambiguationRule, Rule,
2222
},
23-
rules::{Rules, RulesLangOptions, RulesOptions},
23+
rules::{Rules, RulesLangOptions},
2424
tokenizer::{
2525
chunk,
2626
multiword::{MultiwordTagger, MultiwordTaggerFields},
@@ -353,10 +353,7 @@ impl Rules {
353353
);
354354
}
355355

356-
Rules {
357-
rules,
358-
options: RulesOptions::default(),
359-
}
356+
Rules { rules }
360357
}
361358
}
362359

nlprule/src/compile/mod.rs

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,35 +61,37 @@ impl BuildFilePaths {
6161
#[derive(Error, Debug)]
6262
#[allow(missing_docs)]
6363
pub enum Error {
64-
#[error("input/output error")]
64+
#[error(transparent)]
6565
Io(#[from] std::io::Error),
66-
#[error("serialization error")]
66+
#[error(transparent)]
6767
Serialization(#[from] bincode::Error),
68-
#[error("JSON deserialization error")]
68+
#[error(transparent)]
69+
NlpruleError(#[from] crate::Error),
70+
#[error(transparent)]
6971
Json(#[from] serde_json::Error),
70-
#[error("error loading SRX")]
72+
#[error(transparent)]
7173
Srx(#[from] srx::Error),
7274
#[error("language options do not exist for '{lang_code}'")]
7375
LanguageOptionsDoNotExist { lang_code: String },
74-
#[error("regex syntax error: {0}")]
76+
#[error(transparent)]
7577
RegexSyntax(#[from] regex_syntax::ast::Error),
7678
#[error("regex compilation error: {0}")]
7779
Regex(Box<dyn std::error::Error + Send + Sync + 'static>),
7880
#[error("unexpected condition: {0}")]
7981
Unexpected(String),
8082
#[error("feature not implemented: {0}")]
8183
Unimplemented(String),
82-
#[error("error parsing to integer: {0}")]
84+
#[error(transparent)]
8385
ParseError(#[from] ParseIntError),
84-
#[error("unknown error")]
86+
#[error("unknown error: {0}")]
8587
Other(#[from] Box<dyn std::error::Error + Send + Sync + 'static>),
8688
}
8789

8890
/// Compiles the binaries from a build directory.
8991
pub fn compile(
9092
build_dir: impl AsRef<Path>,
91-
mut rules_dest: impl io::Write,
92-
mut tokenizer_dest: impl io::Write,
93+
rules_dest: impl io::Write,
94+
tokenizer_dest: impl io::Write,
9395
) -> Result<(), Error> {
9496
let paths = BuildFilePaths::new(&build_dir);
9597

@@ -185,12 +187,11 @@ pub fn compile(
185187
srx::SRX::from_str(&fs::read_to_string(&paths.srx_path)?)?.language_rules(lang_code),
186188
tokenizer_lang_options,
187189
)?;
188-
189-
bincode::serialize_into(&mut tokenizer_dest, &tokenizer)?;
190+
tokenizer.to_writer(tokenizer_dest)?;
190191

191192
info!("Creating grammar rules.");
192193
let rules = Rules::from_xml(&paths.grammar_path, &mut build_info, rules_lang_options);
193-
bincode::serialize_into(&mut rules_dest, &rules)?;
194+
rules.to_writer(rules_dest)?;
194195

195196
// we need to write the regex cache after building the rules, otherwise it isn't fully populated
196197
let f = BufWriter::new(File::create(&paths.regex_cache_path)?);

nlprule/src/filter/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use enum_dispatch::enum_dispatch;
44
use serde::{Deserialize, Serialize};
55

66
#[enum_dispatch]
7-
#[derive(Serialize, Deserialize)]
7+
#[derive(Debug, Serialize, Deserialize, Clone)]
88
pub enum Filter {
99
NoDisambiguationEnglishPartialPosTagFilter,
1010
}
@@ -14,7 +14,7 @@ pub trait Filterable {
1414
fn keep(&self, sentence: &MatchSentence, graph: &MatchGraph) -> bool;
1515
}
1616

17-
#[derive(Serialize, Deserialize)]
17+
#[derive(Debug, Serialize, Deserialize, Clone)]
1818
pub struct NoDisambiguationEnglishPartialPosTagFilter {
1919
pub(crate) id: GraphId,
2020
pub(crate) regexp: Regex,

nlprule/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,9 @@ pub use tokenizer::Tokenizer;
8888
pub enum Error {
8989
#[error(transparent)]
9090
Io(#[from] io::Error),
91+
/// (De)serialization error. Can have occured during deserialization or during serialization.
9192
#[error(transparent)]
92-
Deserialization(#[from] bincode::Error),
93+
Serialization(#[from] bincode::Error),
9394
#[error(transparent)]
9495
IdError(#[from] rule::id::Error),
9596
}

nlprule/src/rule/disambiguation.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ impl PosFilter {
3939
}
4040
}
4141

42-
#[derive(Serialize, Deserialize)]
42+
#[derive(Debug, Serialize, Deserialize, Clone)]
4343
pub enum Disambiguation {
4444
Remove(Vec<either::Either<owned::WordData, PosFilter>>),
4545
Add(Vec<owned::WordData>),
@@ -210,15 +210,15 @@ impl Disambiguation {
210210
}
211211
}
212212

213-
#[derive(Debug, Deserialize, Serialize)]
213+
#[derive(Debug, Deserialize, Serialize, Clone)]
214214
pub struct DisambiguationChange {
215215
pub text: String,
216216
pub char_span: Range<usize>,
217217
pub before: owned::Word,
218218
pub after: owned::Word,
219219
}
220220

221-
#[derive(Debug, Serialize, Deserialize)]
221+
#[derive(Debug, Serialize, Deserialize, Clone)]
222222
pub enum DisambiguationExample {
223223
Unchanged(String),
224224
Changed(DisambiguationChange),

nlprule/src/rule/engine/composition.rs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use unicase::UniCase;
88

99
type Context<'a, 't> = (&'a MatchSentence<'t>, &'a MatchGraph<'t>);
1010

11-
#[derive(Debug, Serialize, Deserialize)]
11+
#[derive(Debug, Serialize, Deserialize, Clone)]
1212
pub struct Matcher {
1313
pub matcher: either::Either<either::Either<String, GraphId>, Regex>,
1414
pub negate: bool,
@@ -80,7 +80,7 @@ impl Matcher {
8080
}
8181
}
8282

83-
#[derive(Debug, Serialize, Deserialize)]
83+
#[derive(Debug, Serialize, Deserialize, Clone)]
8484
pub(crate) struct TextMatcher {
8585
pub(crate) matcher: Matcher,
8686
pub(crate) set: Option<DefaultHashSet<WordIdInt>>,
@@ -119,7 +119,7 @@ impl PosMatcher {
119119
}
120120
}
121121

122-
#[derive(Debug, Serialize, Deserialize)]
122+
#[derive(Debug, Serialize, Deserialize, Clone)]
123123
pub struct WordDataMatcher {
124124
pub(crate) pos_matcher: Option<PosMatcher>,
125125
pub(crate) inflect_matcher: Option<TextMatcher>,
@@ -153,7 +153,7 @@ impl WordDataMatcher {
153153
}
154154
}
155155

156-
#[derive(Debug, Serialize, Deserialize)]
156+
#[derive(Debug, Serialize, Deserialize, Clone)]
157157
pub struct Quantifier {
158158
pub min: usize,
159159
pub max: usize,
@@ -165,7 +165,7 @@ pub trait Atomable: Send + Sync {
165165
}
166166

167167
#[enum_dispatch(Atomable)]
168-
#[derive(Debug, Serialize, Deserialize)]
168+
#[derive(Debug, Serialize, Deserialize, Clone)]
169169
pub enum Atom {
170170
ChunkAtom(concrete::ChunkAtom),
171171
SpaceBeforeAtom(concrete::SpaceBeforeAtom),
@@ -183,7 +183,7 @@ pub mod concrete {
183183
use super::{Atomable, Context, Matcher, TextMatcher, WordDataMatcher};
184184
use serde::{Deserialize, Serialize};
185185

186-
#[derive(Debug, Serialize, Deserialize)]
186+
#[derive(Debug, Serialize, Deserialize, Clone)]
187187
pub struct TextAtom {
188188
pub(crate) matcher: TextMatcher,
189189
}
@@ -197,7 +197,7 @@ pub mod concrete {
197197
}
198198
}
199199

200-
#[derive(Debug, Serialize, Deserialize)]
200+
#[derive(Debug, Serialize, Deserialize, Clone)]
201201
pub struct ChunkAtom {
202202
pub(crate) matcher: Matcher,
203203
}
@@ -211,7 +211,7 @@ pub mod concrete {
211211
}
212212
}
213213

214-
#[derive(Debug, Serialize, Deserialize)]
214+
#[derive(Debug, Serialize, Deserialize, Clone)]
215215
pub struct SpaceBeforeAtom {
216216
pub(crate) value: bool,
217217
}
@@ -224,7 +224,7 @@ pub mod concrete {
224224
}
225225
}
226226

227-
#[derive(Debug, Serialize, Deserialize)]
227+
#[derive(Debug, Serialize, Deserialize, Clone)]
228228
pub struct WordDataAtom {
229229
pub(crate) matcher: WordDataMatcher,
230230
pub(crate) case_sensitive: bool,
@@ -241,7 +241,7 @@ pub mod concrete {
241241
}
242242
}
243243

244-
#[derive(Debug, Serialize, Deserialize, Default)]
244+
#[derive(Debug, Serialize, Deserialize, Default, Clone)]
245245
pub struct TrueAtom {}
246246

247247
impl Atomable for TrueAtom {
@@ -250,7 +250,7 @@ impl Atomable for TrueAtom {
250250
}
251251
}
252252

253-
#[derive(Debug, Serialize, Deserialize, Default)]
253+
#[derive(Debug, Serialize, Deserialize, Default, Clone)]
254254
pub struct FalseAtom {}
255255

256256
impl Atomable for FalseAtom {
@@ -259,7 +259,7 @@ impl Atomable for FalseAtom {
259259
}
260260
}
261261

262-
#[derive(Debug, Serialize, Deserialize)]
262+
#[derive(Debug, Serialize, Deserialize, Clone)]
263263
pub struct AndAtom {
264264
pub(crate) atoms: Vec<Atom>,
265265
}
@@ -270,7 +270,7 @@ impl Atomable for AndAtom {
270270
}
271271
}
272272

273-
#[derive(Debug, Serialize, Deserialize)]
273+
#[derive(Debug, Serialize, Deserialize, Clone)]
274274
pub struct OrAtom {
275275
pub(crate) atoms: Vec<Atom>,
276276
}
@@ -281,7 +281,7 @@ impl Atomable for OrAtom {
281281
}
282282
}
283283

284-
#[derive(Debug, Serialize, Deserialize)]
284+
#[derive(Debug, Serialize, Deserialize, Clone)]
285285
pub struct NotAtom {
286286
pub(crate) atom: Box<Atom>,
287287
}
@@ -292,7 +292,7 @@ impl Atomable for NotAtom {
292292
}
293293
}
294294

295-
#[derive(Debug, Serialize, Deserialize)]
295+
#[derive(Debug, Serialize, Deserialize, Clone)]
296296
pub struct OffsetAtom {
297297
pub(crate) atom: Box<Atom>,
298298
pub(crate) offset: isize,
@@ -489,7 +489,7 @@ impl<'t> MatchGraph<'t> {
489489
}
490490
}
491491

492-
#[derive(Serialize, Deserialize, Debug)]
492+
#[derive(Serialize, Deserialize, Debug, Clone)]
493493
pub struct Part {
494494
pub atom: Atom,
495495
pub quantifier: Quantifier,
@@ -498,7 +498,7 @@ pub struct Part {
498498
pub unify: Option<bool>,
499499
}
500500

501-
#[derive(Serialize, Deserialize, Debug)]
501+
#[derive(Serialize, Deserialize, Debug, Clone)]
502502
pub struct Composition {
503503
pub(crate) parts: Vec<Part>,
504504
pub(crate) id_to_idx: DefaultHashMap<GraphId, usize>,

nlprule/src/rule/engine/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ pub mod composition;
77

88
use composition::{Composition, GraphId, Group, MatchGraph, MatchSentence};
99

10-
#[derive(Serialize, Deserialize, Debug)]
10+
#[derive(Serialize, Deserialize, Debug, Clone)]
1111
pub struct TokenEngine {
1212
pub(crate) composition: Composition,
1313
pub(crate) antipatterns: Vec<Composition>,
@@ -52,7 +52,7 @@ impl TokenEngine {
5252
}
5353
}
5454

55-
#[derive(Serialize, Deserialize, Debug)]
55+
#[derive(Serialize, Deserialize, Debug, Clone)]
5656
pub enum Engine {
5757
Token(TokenEngine),
5858
// regex with the `fancy_regex` backend is large on the stack

nlprule/src/rule/grammar.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use crate::types::*;
33
use crate::utils::{self, regex::Regex};
44
use serde::{Deserialize, Serialize};
55

6-
#[derive(Debug, Serialize, Deserialize)]
6+
#[derive(Debug, Serialize, Deserialize, Clone)]
77
pub enum Conversion {
88
Nop,
99
AllLower,
@@ -25,7 +25,7 @@ impl Conversion {
2525
}
2626

2727
/// An example associated with a [Rule][crate::rule::Rule].
28-
#[derive(Debug, Serialize, Deserialize)]
28+
#[derive(Debug, Serialize, Deserialize, Clone)]
2929
pub struct Example {
3030
pub(crate) text: String,
3131
pub(crate) suggestion: Option<Suggestion>,
@@ -45,7 +45,7 @@ impl Example {
4545
}
4646
}
4747

48-
#[derive(Serialize, Deserialize, Debug)]
48+
#[derive(Serialize, Deserialize, Debug, Clone)]
4949
pub struct PosReplacer {
5050
pub(crate) matcher: PosMatcher,
5151
}
@@ -85,7 +85,7 @@ impl PosReplacer {
8585
}
8686
}
8787

88-
#[derive(Serialize, Deserialize, Debug)]
88+
#[derive(Serialize, Deserialize, Debug, Clone)]
8989
pub struct Match {
9090
pub(crate) id: GraphId,
9191
pub(crate) conversion: Conversion,
@@ -118,14 +118,14 @@ impl Match {
118118
}
119119
}
120120

121-
#[derive(Debug, Serialize, Deserialize)]
121+
#[derive(Debug, Serialize, Deserialize, Clone)]
122122
pub enum SynthesizerPart {
123123
Text(String),
124124
// Regex with the `fancy_regex` backend is large on the stack
125125
Match(Box<Match>),
126126
}
127127

128-
#[derive(Debug, Serialize, Deserialize)]
128+
#[derive(Debug, Serialize, Deserialize, Clone)]
129129
pub struct Synthesizer {
130130
pub(crate) use_titlecase_adjust: bool,
131131
pub(crate) parts: Vec<SynthesizerPart>,

nlprule/src/rule/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use self::{
3131
/// A *Unification* makes an otherwise matching pattern invalid if no combination of its filters
3232
/// matches all tokens marked with "unify".
3333
/// Can also be negated.
34-
#[derive(Serialize, Deserialize, Debug)]
34+
#[derive(Serialize, Deserialize, Debug, Clone)]
3535
pub(crate) struct Unification {
3636
pub(crate) mask: Vec<Option<bool>>,
3737
pub(crate) filters: Vec<Vec<PosFilter>>,
@@ -81,7 +81,7 @@ impl Unification {
8181
/// <disambig action="replace"><wd lemma="have" pos="VB"></wd></disambig>
8282
/// </rule>
8383
/// ```
84-
#[derive(Serialize, Deserialize)]
84+
#[derive(Debug, Serialize, Deserialize, Clone)]
8585
pub struct DisambiguationRule {
8686
pub(crate) id: Index,
8787
pub(crate) engine: Engine,
@@ -371,7 +371,7 @@ impl<'a, 't> Iterator for Suggestions<'a, 't> {
371371
/// <example correction="doesn't">He <marker>dosn't</marker> know about it.</example>
372372
/// </rule>
373373
/// ```
374-
#[derive(Serialize, Deserialize, Debug)]
374+
#[derive(Serialize, Deserialize, Debug, Clone)]
375375
pub struct Rule {
376376
pub(crate) id: Index,
377377
pub(crate) engine: Engine,

0 commit comments

Comments
 (0)