Skip to content

Support UTF-32 escape sequences #276

@jbclements

Description

@jbclements

The pasted file contains a definition for the unicode XID_START character set. Processing it with the current git master causes this error:

jclements-09740:/tmp clements> java -jar /usr/local/lib/antlr-4.0.1-dev-complete.jar ./Bad.g4
Exception in thread "main" java.lang.UnsupportedOperationException: Serialized ATN data element out of range.
    at org.antlr.v4.automata.ATNSerializer.serialize(ATNSerializer.java:324)
    at org.antlr.v4.automata.ATNSerializer.getSerialized(ATNSerializer.java:458)
    at org.antlr.v4.codegen.model.SerializedATN.<init>(SerializedATN.java:46)
    at org.antlr.v4.codegen.model.Lexer.<init>(Lexer.java:72)
    at org.antlr.v4.codegen.OutputModelController.lexer(OutputModelController.java:175)
    at org.antlr.v4.codegen.OutputModelController.buildLexerOutputModel(OutputModelController.java:128)
    at org.antlr.v4.codegen.CodeGenerator.generateLexer(CodeGenerator.java:147)
    at org.antlr.v4.codegen.CodeGenPipeline.process(CodeGenPipeline.java:69)
    at org.antlr.v4.Tool.processNonCombinedGrammar(Tool.java:422)
    at org.antlr.v4.Tool.process(Tool.java:376)
    at org.antlr.v4.Tool.processGrammarsOnCommandLine(Tool.java:343)
    at org.antlr.v4.Tool.main(Tool.java:190)

... and, here's the file Bad.g4. It's 430 lines, just in case my copy buffer lost part of it:

grammar Bad;


XIDSTART :
              '\u0041' .. '\u005a'
            | '\u0061' .. '\u007a'
            | '\u00aa'
            | '\u00b5'
            | '\u00ba'
            | '\u00c0' .. '\u00d6'
            | '\u00d8' .. '\u00f6'
            | '\u00f8' .. '\u01ba'
            | '\u01bb'
            | '\u01bc' .. '\u01bf'
            | '\u01c0' .. '\u01c3'
            | '\u01c4' .. '\u0293'
            | '\u0294'
            | '\u0295' .. '\u02af'
            | '\u02b0' .. '\u02c1'
            | '\u02c6' .. '\u02d1'
            | '\u0ab5' .. '\u0ab9'
            | '\u0abd'
            | '\u0ad0'
            | '\u0ae0' .. '\u0ae1'
            | '\u0b05' .. '\u0b0c'
            | '\u0b0f' .. '\u0b10'
            | '\u0b13' .. '\u0b28'
            | '\u0b2a' .. '\u0b30'
            | '\u0b32' .. '\u0b33'
            | '\u0b35' .. '\u0b39'
            | '\u0b3d'
            | '\u0b5c' .. '\u0b5d'
            | '\u0b5f' .. '\u0b61'
            | '\u0b71'
            | '\u0b83'
            | '\u0b85' .. '\u0b8a'
            | '\u0b8e' .. '\u0b90'
            | '\u0b92' .. '\u0b95'
            | '\u0b99' .. '\u0b9a'
            | '\u0b9c'
            | '\u0b9e' .. '\u0b9f'
            | '\u0ba3' .. '\u0ba4'
            | '\u0ba8' .. '\u0baa'
            | '\u0bae' .. '\u0bb9'
            | '\u0bd0'
            | '\u0c05' .. '\u0c0c'
            | '\u0c0e' .. '\u0c10'
            | '\u0c12' .. '\u0c28'
            | '\u0c2a' .. '\u0c33'
            | '\u0c35' .. '\u0c39'
            | '\u0c3d'
            | '\u0c58' .. '\u0c59'
            | '\u0c60' .. '\u0c61'
            | '\u0c85' .. '\u0c8c'
            | '\u0c8e' .. '\u0c90'
            | '\u0c92' .. '\u0ca8'
            | '\u0caa' .. '\u0cb3'
            | '\u0cb5' .. '\u0cb9'
            | '\u0cbd'
            | '\u0cde'
            | '\u0ce0' .. '\u0ce1'
            | '\u0cf1' .. '\u0cf2'
            | '\u0d05' .. '\u0d0c'
            | '\u0d0e' .. '\u0d10'
            | '\u0d12' .. '\u0d3a'
            | '\u0d3d'
            | '\u0d4e'
            | '\u0d60' .. '\u0d61'
            | '\u0d7a' .. '\u0d7f'
            | '\u0d85' .. '\u0d96'
            | '\u0d9a' .. '\u0db1'
            | '\u0db3' .. '\u0dbb'
            | '\u0dbd'
            | '\u0dc0' .. '\u0dc6'
            | '\u0e01' .. '\u0e30'
            | '\u0e32'
            | '\u0e40' .. '\u0e45'
            | '\u0e46'
            | '\u0e81' .. '\u0e82'
            | '\u0e84'
            | '\u0e87' .. '\u0e88'
            | '\u0e8a'
            | '\u0e8d'
            | '\u0e94' .. '\u0e97'
            | '\u0e99' .. '\u0e9f'
            | '\u0ea1' .. '\u0ea3'
            | '\u0ea5'
            | '\u0ea7'
            | '\u0eaa' .. '\u0eab'
            | '\u0ead' .. '\u0eb0'
            | '\u0eb2'
            | '\u0ebd'
            | '\u0ec0' .. '\u0ec4'
            | '\u0ec6'
            | '\u0edc' .. '\u0edd'
            | '\u0f00'
            | '\u0f40' .. '\u0f47'
            | '\u0f49' .. '\u0f6c'
            | '\u0f88' .. '\u0f8c'
            | '\u1000' .. '\u102a'
            | '\u103f'
            | '\u1050' .. '\u1055'
            | '\u105a' .. '\u105d'
            | '\u1061'
            | '\u1065' .. '\u1066'
            | '\u106e' .. '\u1070'
            | '\u1075' .. '\u1081'
            | '\u108e'
            | '\u10a0' .. '\u10c5'
            | '\u10d0' .. '\u10fa'
            | '\u10fc'
            | '\u1100' .. '\u1248'
            | '\u124a' .. '\u124d'
            | '\u1250' .. '\u1256'
            | '\u1258'
            | '\u125a' .. '\u125d'
            | '\u1260' .. '\u1288'
            | '\u128a' .. '\u128d'
            | '\u1290' .. '\u12b0'
            | '\u12b2' .. '\u12b5'
            | '\u12b8' .. '\u12be'
            | '\u12c0'
            | '\u12c2' .. '\u12c5'
            | '\u12c8' .. '\u12d6'
            | '\u12d8' .. '\u1310'
            | '\u1312' .. '\u1315'
            | '\u1318' .. '\u135a'
            | '\u1380' .. '\u138f'
            | '\u13a0' .. '\u13f4'
            | '\u1401' .. '\u166c'
            | '\u166f' .. '\u167f'
            | '\u1681' .. '\u169a'
            | '\u16a0' .. '\u16ea'
            | '\u16ee' .. '\u16f0'
            | '\u1700' .. '\u170c'
            | '\u170e' .. '\u1711'
            | '\u1720' .. '\u1731'
            | '\u1740' .. '\u1751'
            | '\u1760' .. '\u176c'
            | '\u176e' .. '\u1770'
            | '\u1780' .. '\u17b3'
            | '\u17d7'
            | '\u17dc'
            | '\u1820' .. '\u1842'
            | '\u1843'
            | '\u1844' .. '\u1877'
            | '\u1880' .. '\u18a8'
            | '\u18aa'
            | '\u18b0' .. '\u18f5'
            | '\u1900' .. '\u191c'
            | '\u1950' .. '\u196d'
            | '\u1970' .. '\u1974'
            | '\u1980' .. '\u19ab'
            | '\u19c1' .. '\u19c7'
            | '\u1a00' .. '\u1a16'
            | '\u1a20' .. '\u1a54'
            | '\u1aa7'
            | '\u1b05' .. '\u1b33'
            | '\u1b45' .. '\u1b4b'
            | '\u1b83' .. '\u1ba0'
            | '\u1bae' .. '\u1baf'
            | '\u1bc0' .. '\u1be5'
            | '\u1c00' .. '\u1c23'
            | '\u1c4d' .. '\u1c4f'
            | '\u1c5a' .. '\u1c77'
            | '\u1c78' .. '\u1c7d'
            | '\u1ce9' .. '\u1cec'
            | '\u1cee' .. '\u1cf1'
            | '\u1d00' .. '\u1d2b'
            | '\u1d2c' .. '\u1d61'
            | '\u1d62' .. '\u1d77'
            | '\u1d78'
            | '\u1d79' .. '\u1d9a'
            | '\u1d9b' .. '\u1dbf'
            | '\u1e00' .. '\u1f15'
            | '\u1f18' .. '\u1f1d'
            | '\u1f20' .. '\u1f45'
            | '\u1f48' .. '\u1f4d'
            | '\u1f50' .. '\u1f57'
            | '\u1f59'
            | '\u1f5b'
            | '\u1f5d'
            | '\u1f5f' .. '\u1f7d'
            | '\u1f80' .. '\u1fb4'
            | '\u1fb6' .. '\u1fbc'
            | '\u1fbe'
            | '\u1fc2' .. '\u1fc4'
            | '\u1fc6' .. '\u1fcc'
            | '\u1fd0' .. '\u1fd3'
            | '\u1fd6' .. '\u1fdb'
            | '\u1fe0' .. '\u1fec'
            | '\u1ff2' .. '\u1ff4'
            | '\u1ff6' .. '\u1ffc'
            | '\u2071'
            | '\u207f'
            | '\u2090' .. '\u209c'
            | '\u2102'
            | '\u2107'
            | '\u210a' .. '\u2113'
            | '\u2115'
            | '\u2118'
            | '\u2119' .. '\u211d'
            | '\u2124'
            | '\u2126'
            | '\u2128'
            | '\u212a' .. '\u212d'
            | '\u212e'
            | '\u212f' .. '\u2134'
            | '\u2135' .. '\u2138'
            | '\u2139'
            | '\u213c' .. '\u213f'
            | '\u2145' .. '\u2149'
            | '\u214e'
            | '\u2160' .. '\u2182'
            | '\u2183' .. '\u2184'
            | '\u2185' .. '\u2188'
            | '\u2c00' .. '\u2c2e'
            | '\u2c30' .. '\u2c5e'
            | '\u2c60' .. '\u2c7c'
            | '\u2c7d'
            | '\u2c7e' .. '\u2ce4'
            | '\u2ceb' .. '\u2cee'
            | '\u2d00' .. '\u2d25'
            | '\u2d30' .. '\u2d65'
            | '\u2d6f'
            | '\u2d80' .. '\u2d96'
            | '\u2da0' .. '\u2da6'
            | '\u2da8' .. '\u2dae'
            | '\u2db0' .. '\u2db6'
            | '\u2db8' .. '\u2dbe'
            | '\u2dc0' .. '\u2dc6'
            | '\u2dc8' .. '\u2dce'
            | '\u2dd0' .. '\u2dd6'
            | '\u2dd8' .. '\u2dde'
            | '\u3005'
            | '\u3006'
            | '\u3007'
            | '\u3021' .. '\u3029'
            | '\u3031' .. '\u3035'
            | '\u3038' .. '\u303a'
            | '\u303b'
            | '\u303c'
            | '\u3041' .. '\u3096'
            | '\u309d' .. '\u309e'
            | '\u309f'
            | '\u30a1' .. '\u30fa'
            | '\u30fc' .. '\u30fe'
            | '\u30ff'
            | '\u3105' .. '\u312d'
            | '\u3131' .. '\u318e'
            | '\u31a0' .. '\u31ba'
            | '\u31f0' .. '\u31ff'
            | '\u3400' .. '\u4db5'
            | '\u4e00' .. '\u9fcb'
            | '\ua000' .. '\ua014'
            | '\ua015'
            | '\ua016' .. '\ua48c'
            | '\ua4d0' .. '\ua4f7'
            | '\ua4f8' .. '\ua4fd'
            | '\ua500' .. '\ua60b'
            | '\ua60c'
            | '\ua610' .. '\ua61f'
            | '\ua62a' .. '\ua62b'
            | '\ua640' .. '\ua66d'
            | '\ua66e'
            | '\ua67f'
            | '\ua680' .. '\ua697'
            | '\ua6a0' .. '\ua6e5'
            | '\ua6e6' .. '\ua6ef'
            | '\ua717' .. '\ua71f'
            | '\ua722' .. '\ua76f'
            | '\ua770'
            | '\ua771' .. '\ua787'
            | '\ua788'
            | '\ua78b' .. '\ua78e'
            | '\ua790' .. '\ua791'
            | '\ua7a0' .. '\ua7a9'
            | '\ua7fa'
            | '\ua7fb' .. '\ua801'
            | '\ua803' .. '\ua805'
            | '\ua807' .. '\ua80a'
            | '\ua80c' .. '\ua822'
            | '\ua840' .. '\ua873'
            | '\ua882' .. '\ua8b3'
            | '\ua8f2' .. '\ua8f7'
            | '\ua8fb'
            | '\ua90a' .. '\ua925'
            | '\ua930' .. '\ua946'
            | '\ua960' .. '\ua97c'
            | '\ua984' .. '\ua9b2'
            | '\ua9cf'
            | '\uaa00' .. '\uaa28'
            | '\uaa40' .. '\uaa42'
            | '\uaa44' .. '\uaa4b'
            | '\uaa60' .. '\uaa6f'
            | '\uaa70'
            | '\uaa71' .. '\uaa76'
            | '\uaa7a'
            | '\uaa80' .. '\uaaaf'
            | '\uaab1'
            | '\uaab5' .. '\uaab6'
            | '\uaab9' .. '\uaabd'
            | '\uaac0'
            | '\uaac2'
            | '\uaadb' .. '\uaadc'
            | '\uaadd'
            | '\uab01' .. '\uab06'
            | '\uab09' .. '\uab0e'
            | '\uab11' .. '\uab16'
            | '\uab20' .. '\uab26'
            | '\uab28' .. '\uab2e'
            | '\uabc0' .. '\uabe2'
            | '\uac00' .. '\ud7a3'
            | '\ud7b0' .. '\ud7c6'
            | '\ud7cb' .. '\ud7fb'
            | '\uf900' .. '\ufa2d'
            | '\ufa30' .. '\ufa6d'
            | '\ufa70' .. '\ufad9'
            | '\ufb00' .. '\ufb06'
            | '\ufb13' .. '\ufb17'
            | '\ufb1d'
            | '\ufb1f' .. '\ufb28'
            | '\ufb2a' .. '\ufb36'
            | '\ufb38' .. '\ufb3c'
            | '\ufb3e'
            | '\ufb40' .. '\ufb41'
            | '\ufb43' .. '\ufb44'
            | '\ufb46' .. '\ufbb1'
            | '\ufbd3' .. '\ufc5d'
            | '\ufc64' .. '\ufd3d'
            | '\ufd50' .. '\ufd8f'
            | '\ufd92' .. '\ufdc7'
            | '\ufdf0' .. '\ufdf9'
            | '\ufe71'
            | '\ufe73'
            | '\ufe77'
            | '\ufe79'
            | '\ufe7b'
            | '\ufe7d'
            | '\ufe7f' .. '\ufefc'
            | '\uff21' .. '\uff3a'
            | '\uff41' .. '\uff5a'
            | '\uff66' .. '\uff6f'
            | '\uff70'
            | '\uff71' .. '\uff9d'
            | '\uffa0' .. '\uffbe'
            | '\uffc2' .. '\uffc7'
            | '\uffca' .. '\uffcf'
            | '\uffd2' .. '\uffd7'
            | '\uffda' .. '\uffdc'
            | '\U00010000' .. '\U0001000b'
            | '\U0001000d' .. '\U00010026'
            | '\U00010028' .. '\U0001003a'
            | '\U0001003c' .. '\U0001003d'
            | '\U0001003f' .. '\U0001004d'
            | '\U00010050' .. '\U0001005d'
            | '\U00010080' .. '\U000100fa'
            | '\U00010140' .. '\U00010174'
            | '\U00010280' .. '\U0001029c'
            | '\U000102a0' .. '\U000102d0'
            | '\U00010300' .. '\U0001031e'
            | '\U00010330' .. '\U00010340'
            | '\U00010341'
            | '\U00010342' .. '\U00010349'
            | '\U0001034a'
            | '\U00010380' .. '\U0001039d'
            | '\U000103a0' .. '\U000103c3'
            | '\U000103c8' .. '\U000103cf'
            | '\U000103d1' .. '\U000103d5'
            | '\U00010400' .. '\U0001044f'
            | '\U00010450' .. '\U0001049d'
            | '\U00010800' .. '\U00010805'
            | '\U00010808'
            | '\U0001080a' .. '\U00010835'
            | '\U00010837' .. '\U00010838'
            | '\U0001083c'
            | '\U0001083f' .. '\U00010855'
            | '\U00010900' .. '\U00010915'
            | '\U00010920' .. '\U00010939'
            | '\U00010a00'
            | '\U00010a10' .. '\U00010a13'
            | '\U00010a15' .. '\U00010a17'
            | '\U00010a19' .. '\U00010a33'
            | '\U00010a60' .. '\U00010a7c'
            | '\U00010b00' .. '\U00010b35'
            | '\U00010b40' .. '\U00010b55'
            | '\U00010b60' .. '\U00010b72'
            | '\U00010c00' .. '\U00010c48'
            | '\U00011003' .. '\U00011037'
            | '\U00011083' .. '\U000110af'
            | '\U00012000' .. '\U0001236e'
            | '\U00012400' .. '\U00012462'
            | '\U00013000' .. '\U0001342e'
            | '\U00016800' .. '\U00016a38'
            | '\U0001b000' .. '\U0001b001'
            | '\U0001d400' .. '\U0001d454'
            | '\U0001d456' .. '\U0001d49c'
            | '\U0001d49e' .. '\U0001d49f'
            | '\U0001d4a2'
            | '\U0001d4a5' .. '\U0001d4a6'
            | '\U0001d4a9' .. '\U0001d4ac'
            | '\U0001d4ae' .. '\U0001d4b9'
            | '\U0001d4bb'
            | '\U0001d4bd' .. '\U0001d4c3'
            | '\U0001d4c5' .. '\U0001d505'
            | '\U0001d507' .. '\U0001d50a'
            | '\U0001d50d' .. '\U0001d514'
            | '\U0001d516' .. '\U0001d51c'
            | '\U0001d51e' .. '\U0001d539'
            | '\U0001d53b' .. '\U0001d53e'
            | '\U0001d540' .. '\U0001d544'
            | '\U0001d546'
            | '\U0001d54a' .. '\U0001d550'
            | '\U0001d552' .. '\U0001d6a5'
            | '\U0001d6a8' .. '\U0001d6c0'
            | '\U0001d6c2' .. '\U0001d6da'
            | '\U0001d6dc' .. '\U0001d6fa'
            | '\U0001d6fc' .. '\U0001d714'
            | '\U0001d716' .. '\U0001d734'
            | '\U0001d736' .. '\U0001d74e'
            | '\U0001d750' .. '\U0001d76e'
            | '\U0001d770' .. '\U0001d788'
            | '\U0001d78a' .. '\U0001d7a8'
            | '\U0001d7aa' .. '\U0001d7c2'
            | '\U0001d7c4' .. '\U0001d7cb'
            | '\U00020000' .. '\U0002a6d6'
            | '\U0002a700' .. '\U0002b734'
            | '\U0002b740' .. '\U0002b81d'
            | '\U0002f800' .. '\U0002fa1d'
          ;

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions