@@ -4,12 +4,6 @@ routines (
4
4
mark_regions
5
5
do_possessive
6
6
do_case
7
- do_comparative
8
- do_diminutive
9
- do_augmentative
10
- do_derivational
11
- do_deriv_single
12
- do_aggressive
13
7
)
14
8
15
9
externals ( stem )
@@ -104,140 +98,13 @@ backwardmode (
104
98
)
105
99
)
106
100
)
107
-
108
- define do_derivational as (
109
- [substring] R1 among (
110
- 'obinec'
111
- 'ovisk' 'ovstv' 'ovi{s^}t' 'ovn{i'}k'
112
- '{a'}sek' 'loun' 'nost' 'teln' 'ovec' 'ov{i'}k' 'ovtv' 'ovin' '{s^}tin'
113
- '{a'}rn' 'och' 'ost' 'ovn' 'oun' 'out' 'ou{s^}' 'u{s^}k'
114
- 'kyn' '{c^}an' 'k{a'}{r^}' 'n{e'}{r^}' 'n{i'}k' 'ctv' 'stv'
115
- '{a'}{c^}' 'a{c^}' '{a'}n' 'an' '{a'}{r^}' 'as'
116
- 'ob' 'ot' 'ov' 'o{n^}' 'ul' 'yn'
117
- '{c^}k' '{c^}n' 'dl' 'nk' 'tv' 'tk' 'vk'
118
- (delete)
119
- 'ion{a'}{r^}'
120
- 'inec' 'itel'
121
- 'i{a'}n' 'ist' 'isk' 'i{s^}k' 'itb'
122
- 'ic' 'in' 'it' 'iv'
123
- (
124
- <- 'i'
125
- palatalise
126
- )
127
- 'enic' 'ec' 'en'
128
- (
129
- <- 'e'
130
- palatalise
131
- )
132
- '{e'}{r^}'
133
- (
134
- <- '{e'}'
135
- palatalise
136
- )
137
- '{e^}n'
138
- (
139
- <- '{e^}'
140
- palatalise
141
- )
142
- '{i'}rn'
143
- '{i'}{r^}' '{i'}n'
144
- (
145
- <- '{i'}'
146
- palatalise
147
- )
148
- )
149
- )
150
- define do_deriv_single as (
151
- [substring] among (
152
- 'c' '{c^}' 'k' 'l' 'n' 't'
153
- (delete)
154
- )
155
- )
156
-
157
-
158
- define do_augmentative as (
159
- [substring] among (
160
- 'ajzn' '{a'}k'
161
- (delete)
162
- 'izn' 'isk'
163
- (
164
- <- 'i'
165
- palatalise
166
- )
167
- )
168
- )
169
-
170
- define do_diminutive as (
171
- [substring] among (
172
- 'ou{s^}ek' '{a'}{c^}ek' 'a{c^}ek' 'o{c^}ek' 'u{c^}ek'
173
- 'anek' 'onek' 'unek' '{a'}nek'
174
- 'e{c^}k' '{e'}{c^}k' 'i{c^}k' '{i'}{c^}k' 'enk' '{e'}nk' 'ink' '{i'}nk'
175
- '{a'}{c^}k' 'a{c^}k' 'o{c^}k' 'u{c^}k' 'ank' 'onk' 'unk'
176
- '{a'}tk' '{a'}nk' 'u{s^}k'
177
- 'k'
178
- (delete)
179
- 'e{c^}ek' 'enek' 'ek'
180
- (
181
- <- 'e'
182
- palatalise
183
- )
184
- '{e'}{c^}ek' '{e'}k'
185
- (
186
- <- '{e'}'
187
- palatalise
188
- )
189
- 'i{c^}ek' 'inek' 'ik'
190
- (
191
- <- 'i'
192
- palatalise
193
- )
194
- '{i'}{c^}ek' '{i'}k'
195
- (
196
- <- '{i'}'
197
- palatalise
198
- )
199
- '{a'}k'
200
- (<- '{a'}')
201
- 'ak'
202
- (<- 'a')
203
- 'ok'
204
- (<- 'o')
205
- 'uk'
206
- (<- 'u')
207
- )
208
- )
209
-
210
- define do_comparative as (
211
- [substring] among (
212
- '{e^}j{s^}'
213
- (
214
- <- '{e^}'
215
- palatalise
216
- )
217
- 'ej{s^}'
218
- (
219
- <- 'e'
220
- palatalise
221
- )
222
- )
223
- )
224
-
225
- define do_aggressive as (
226
- do do_comparative
227
- do do_diminutive
228
- do do_augmentative
229
- do_derivational or do_deriv_single
230
- )
231
101
)
232
102
233
103
define stem as (
234
104
do mark_regions
235
105
backwards (
236
106
do_case
237
107
do_possessive
238
- // light and aggressive are the same to this point
239
- // comment next line for light stemmer
240
- // do_aggressive
241
108
)
242
109
)
243
110
@@ -246,4 +113,3 @@ define stem as (
246
113
// Inf. Process. Manage. 45, 6 (November 2009), 714-720.
247
114
// based on Java code by Ljiljana Dolamic:
248
115
// http://members.unine.ch/jacques.savoy/clef/CzechStemmerLight.txt
249
- // http://members.unine.ch/jacques.savoy/clef/CzechStemmerAgressive.txt
0 commit comments