Skip to content

Commit 68c25e3

Browse files
committed
Dont raise when encountering collation/equivalent lookalikes
1 parent dfc84b5 commit 68c25e3

File tree

4 files changed

+21
-13
lines changed

4 files changed

+21
-13
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
### Fixed
44

5+
- fixed error when scanning some unlikely and redundant but valid character set patterns
6+
- e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
57
- fixed ancestry of some error classes related to syntax version lookup
68
- `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
79
- they inherited from Rubys `::SyntaxError` instead of `Regexp::Syntax::SyntaxError`

lib/regexp_parser/parser.rb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,6 @@ def set(token)
143143
range(token)
144144
when :intersection
145145
intersection(token)
146-
when :collation, :equivalent
147-
node << Literal.new(token, active_opts)
148146
else
149147
raise UnknownTokenError.new('CharacterSet', token)
150148
end

lib/regexp_parser/scanner/scanner.rl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
class_posix = ('[:' . '^'? . class_name_posix . ':]');
3838

3939

40-
# these are not supported in ruby, and need verification
40+
# these are not supported in ruby at the moment
4141
collating_sequence = '[.' . (alpha | [\-])+ . '.]';
4242
character_equivalent = '[=' . alpha . '=]';
4343

@@ -228,13 +228,13 @@
228228
emit(type, class_name.to_sym, text)
229229
};
230230

231-
collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
232-
emit(:set, :collation, copy(data, ts, te))
233-
};
234-
235-
character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
236-
emit(:set, :equivalent, copy(data, ts, te))
237-
};
231+
# These are not supported in ruby at the moment. Enable them if they are.
232+
# collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error) {
233+
# emit(:set, :collation, copy(data, ts, te))
234+
# };
235+
# character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error) {
236+
# emit(:set, :equivalent, copy(data, ts, te))
237+
# };
238238

239239
meta_char > (set_meta, 1) {
240240
emit(:literal, :literal, copy(data, ts, te))

spec/scanner/sets_spec.rb

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,6 @@
6161
include_examples 'scan', /[[:digit:][:space:]]/, 2 => [:posixclass, :space, '[:space:]', 10, 19]
6262
include_examples 'scan', /[[:^digit:]]/, 1 => [:nonposixclass, :digit, '[:^digit:]', 1, 11]
6363

64-
include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
65-
include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
66-
6764
include_examples 'scan', /[a-d&&g-h]/, 4 => [:set, :intersection, '&&', 4, 6]
6865
include_examples 'scan', /[a&&]/, 2 => [:set, :intersection, '&&', 2, 4]
6966
include_examples 'scan', /[&&z]/, 1 => [:set, :intersection, '&&', 1, 3]
@@ -88,6 +85,17 @@
8885
8 => [:set, :range, '-', 9, 10],
8986
10=> [:set, :close, ']', 11, 12]
9087

88+
# Collations/collating sequences and character equivalents are not enabled
89+
# in Ruby at the moment. If they ever are, enable them in the scanner,
90+
# add them to a new syntax version, and handle them in the parser. Until then,
91+
# expect them to be scanned as regular subsets containing literals.
92+
# include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
93+
# include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
94+
include_examples 'scan', '[a[.a-b.]c]', 2 => [:set, :open, '[', 2, 3]
95+
include_examples 'scan', '[a[.a-b.]c]', 3 => [:literal, :literal, '.', 3, 4]
96+
include_examples 'scan', '[a[=e=]c]', 2 => [:set, :open, '[', 2, 3]
97+
include_examples 'scan', '[a[=e=]c]', 3 => [:literal, :literal, '=', 3, 4]
98+
9199
# multi-byte characters should not affect indices
92100
include_examples 'scan', /[れます]/,
93101
0 => [:set, :open, '[', 0, 1],

0 commit comments

Comments
 (0)