Dont raise when encountering collation/equivalent lookalikes

jaynetics · jaynetics · commit 68c25e35f37a · 2020-12-27T22:37:49.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ### Fixed
 
+- fixed error when scanning some unlikely and redundant but valid character set patterns
+  - e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
 - fixed ancestry of some error classes related to syntax version lookup
   - `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
   - they inherited from Rubys `::SyntaxError` instead of `Regexp::Syntax::SyntaxError`
diff --git a/lib/regexp_parser/parser.rb b/lib/regexp_parser/parser.rb
@@ -143,8 +143,6 @@ def set(token)
       range(token)
     when :intersection
       intersection(token)
-    when :collation, :equivalent
-      node << Literal.new(token, active_opts)
     else
       raise UnknownTokenError.new('CharacterSet', token)
     end
diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl
@@ -37,7 +37,7 @@
   class_posix           = ('[:' . '^'? . class_name_posix . ':]');
 
 
-  # these are not supported in ruby, and need verification
+  # these are not supported in ruby at the moment
   collating_sequence    = '[.' . (alpha | [\-])+ . '.]';
   character_equivalent  = '[=' . alpha . '=]';
 
@@ -228,13 +228,13 @@
       emit(type, class_name.to_sym, text)
     };
 
-    collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
-      emit(:set, :collation, copy(data, ts, te))
-    };
-
-    character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
-      emit(:set, :equivalent, copy(data, ts, te))
-    };
+    # These are not supported in ruby at the moment. Enable them if they are.
+    # collating_sequence >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
+    #   emit(:set, :collation, copy(data, ts, te))
+    # };
+    # character_equivalent >(open_bracket, 1) @set_closed @eof(premature_end_error)  {
+    #   emit(:set, :equivalent, copy(data, ts, te))
+    # };
 
     meta_char > (set_meta, 1) {
       emit(:literal, :literal, copy(data, ts, te))
diff --git a/spec/scanner/sets_spec.rb b/spec/scanner/sets_spec.rb
@@ -61,9 +61,6 @@
   include_examples 'scan', /[[:digit:][:space:]]/,  2 => [:posixclass,    :space,    '[:space:]', 10, 19]
   include_examples 'scan', /[[:^digit:]]/,          1 => [:nonposixclass, :digit,    '[:^digit:]', 1, 11]
 
-  include_examples 'scan', /[a[.a-b.]c]/,           2 => [:set,    :collation,       '[.a-b.]',    2,  9]
-  include_examples 'scan', /[a[=e=]c]/,             2 => [:set,    :equivalent,      '[=e=]',      2,  7]
-
   include_examples 'scan', /[a-d&&g-h]/,            4 => [:set,    :intersection,    '&&',         4, 6]
   include_examples 'scan', /[a&&]/,                 2 => [:set,    :intersection,    '&&',         2, 4]
   include_examples 'scan', /[&&z]/,                 1 => [:set,    :intersection,    '&&',         1, 3]
@@ -88,6 +85,17 @@
     8 => [:set,    :range,           '-',          9, 10],
     10=> [:set,    :close,           ']',          11, 12]
 
+  # Collations/collating sequences and character equivalents are not enabled
+  # in Ruby at the moment. If they ever are, enable them in the scanner,
+  # add them to a new syntax version, and handle them in the parser. Until then,
+  # expect them to be scanned as regular subsets containing literals.
+  # include_examples 'scan', /[a[.a-b.]c]/,           2 => [:set,    :collation,       '[.a-b.]',    2,  9]
+  # include_examples 'scan', /[a[=e=]c]/,             2 => [:set,    :equivalent,      '[=e=]',      2,  7]
+  include_examples 'scan', '[a[.a-b.]c]',           2 => [:set,     :open,        '[',      2,  3]
+  include_examples 'scan', '[a[.a-b.]c]',           3 => [:literal, :literal,     '.',      3,  4]
+  include_examples 'scan', '[a[=e=]c]',             2 => [:set,     :open,        '[',      2,  3]
+  include_examples 'scan', '[a[=e=]c]',             3 => [:literal, :literal,     '=',      3,  4]
+
   # multi-byte characters should not affect indices
   include_examples 'scan', /[れます]/,
     0 => [:set,     :open,           '[',          0, 1],