Skip to content

Commit 924b3d1

Browse files
authored
Merge pull request #1724 from KvanTTT/more-accurate-error-messages
More accurate error messages
2 parents d3ce604 + fbc49f2 commit 924b3d1

File tree

6 files changed

+63
-50
lines changed

6 files changed

+63
-50
lines changed

tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -390,12 +390,14 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() {
390390
"TOKEN_RANGE_2: [A-FD-J];\n" +
391391
"TOKEN_RANGE_3: 'Z' | 'K'..'R' | 'O'..'V';\n" +
392392
"TOKEN_RANGE_4: 'g'..'l' | [g-l];\n" +
393-
"TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];",
394-
395-
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars \"a-f\" used multiple times in set [aa-f]\n" +
396-
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars \"D-J\" used multiple times in set [A-FD-J]\n" +
397-
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars \"O-V\" used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
398-
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars \"g\" used multiple times in set {'g'..'l'}\n"
393+
"TOKEN_RANGE_WITHOUT_COLLISION: '_' | [a-zA-Z];\n" +
394+
"TOKEN_RANGE_WITH_ESCAPED_CHARS: [\\n-\\r] | '\\n'..'\\r';",
395+
396+
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:2:18: chars 'a'..'f' used multiple times in set [aa-f]\n" +
397+
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:3:18: chars 'D'..'J' used multiple times in set [A-FD-J]\n" +
398+
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4:4:13: chars 'O'..'V' used multiple times in set 'Z' | 'K'..'R' | 'O'..'V'\n" +
399+
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars 'g' used multiple times in set 'g'..'l'\n" +
400+
"warning(" + ErrorType.CHARACTERS_COLLISION_IN_SET.code + "): L.g4::: chars '\\n' used multiple times in set '\\n'..'\\r'\n"
399401
};
400402

401403
testErrors(test, false);

tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ public void testSetUp() throws Exception {
522522
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence\n" +
523523
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence\n" +
524524
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
525-
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: [f-a]\n" +
525+
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
526526
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" +
527527
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:30: invalid charset (range without start or end): [-z]\n" +
528528
"error(" + ErrorType.INVALID_CHAR_SET.code + "): Test.g4:7:37: invalid charset (range without start or end): [a-]\n" +

tool/src/org/antlr/v4/automata/ATNOptimizer.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
package org.antlr.v4.automata;
88

9+
import org.antlr.v4.codegen.model.MatchSet;
10+
import org.antlr.v4.runtime.Token;
911
import org.antlr.v4.runtime.atn.ATN;
1012
import org.antlr.v4.runtime.atn.ATNState;
1113
import org.antlr.v4.runtime.atn.AtomTransition;
@@ -25,6 +27,7 @@
2527
import org.antlr.v4.tool.Rule;
2628

2729
import java.util.ArrayList;
30+
import java.util.Iterator;
2831
import java.util.List;
2932

3033
/**
@@ -111,8 +114,8 @@ private static void optimizeSets(Grammar g, ATN atn) {
111114
// TODO: Token is missing (i.e. position in source will not be displayed).
112115
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName,
113116
null,
114-
String.valueOf(Character.toChars(v)),
115-
matchSet.toString(true));
117+
CharSupport.getANTLRCharLiteralForChar(v),
118+
CharSupport.getIntervalSetEscapedString(matchSet));
116119
break;
117120
}
118121
}

tool/src/org/antlr/v4/automata/LexerATNFactory.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ private CharSetParseState applyPrevStateAndMoveToCodePoint(
517517
ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED,
518518
g.fileName,
519519
charSetAST.getToken(),
520-
CharSupport.toRange(state.prevCodePoint, codePoint, CharSupport.ToRangeMode.BRACKETED));
520+
CharSupport.getRangeEscapedString(state.prevCodePoint, codePoint));
521521
}
522522
checkSetCollision(charSetAST, set, state.prevCodePoint, codePoint);
523523
set.add(state.prevCodePoint, codePoint);
@@ -571,10 +571,7 @@ private void applyPrevState(GrammarAST charSetAST, IntervalSet set, CharSetParse
571571
}
572572

573573
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int el) {
574-
if (set.contains(el)) {
575-
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
576-
el, ast.getText());
577-
}
574+
checkSetCollision(ast, set, el, el);
578575
}
579576

580577
protected void checkSetCollision(GrammarAST ast, IntervalSet set, int a, int b) {
@@ -601,7 +598,7 @@ protected void checkSetCollision(GrammarAST ast, IntervalSet set, int a, int b)
601598
setText = sb.toString();
602599
}
603600
g.tool.errMgr.grammarError(ErrorType.CHARACTERS_COLLISION_IN_SET, g.fileName, ast.getToken(),
604-
CharSupport.toRange(a, b, CharSupport.ToRangeMode.NOT_BRACKETED), setText);
601+
CharSupport.getRangeEscapedString(a, b), setText);
605602
break;
606603
}
607604
}

tool/src/org/antlr/v4/misc/CharSupport.java

Lines changed: 45 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
package org.antlr.v4.misc;
88

99
import org.antlr.v4.runtime.Lexer;
10+
import org.antlr.v4.runtime.Token;
11+
import org.antlr.v4.runtime.misc.Interval;
12+
import org.antlr.v4.runtime.misc.IntervalSet;
13+
14+
import java.util.Iterator;
1015

1116
/** */
1217
public class CharSupport {
@@ -19,11 +24,6 @@ public class CharSupport {
1924
*/
2025
public static String ANTLRLiteralCharValueEscape[] = new String[255];
2126

22-
public enum ToRangeMode {
23-
BRACKETED,
24-
NOT_BRACKETED,
25-
};
26-
2727
static {
2828
ANTLRLiteralEscapedCharValue['n'] = '\n';
2929
ANTLRLiteralEscapedCharValue['r'] = '\r';
@@ -45,28 +45,34 @@ public enum ToRangeMode {
4545
* as \\uXXXX or \\u{XXXXXX} escapes.
4646
*/
4747
public static String getANTLRCharLiteralForChar(int c) {
48-
if ( c< Lexer.MIN_CHAR_VALUE ) {
49-
return "'<INVALID>'";
50-
}
51-
if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
52-
return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
48+
String result;
49+
if ( c < Lexer.MIN_CHAR_VALUE ) {
50+
result = "<INVALID>";
5351
}
54-
if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
55-
!Character.isISOControl((char)c) ) {
56-
if ( c=='\\' ) {
57-
return "'\\\\'";
52+
else {
53+
String charValueEscape = c < ANTLRLiteralCharValueEscape.length ? ANTLRLiteralCharValueEscape[c] : null;
54+
if (charValueEscape != null) {
55+
result = charValueEscape;
5856
}
59-
if ( c=='\'') {
60-
return "'\\''";
57+
else if (Character.UnicodeBlock.of((char) c) == Character.UnicodeBlock.BASIC_LATIN &&
58+
!Character.isISOControl((char) c)) {
59+
if (c == '\\') {
60+
result = "\\\\";
61+
}
62+
else if (c == '\'') {
63+
result = "\\'";
64+
}
65+
else {
66+
result = Character.toString((char) c);
67+
}
68+
}
69+
else if (c <= 0xFFFF) {
70+
result = String.format("\\u%04X", c);
71+
} else {
72+
result = String.format("\\u{%06X}", c);
6173
}
62-
return '\''+Character.toString((char)c)+'\'';
63-
}
64-
if (c <= 0xFFFF) {
65-
return String.format("\\u%04X", c);
66-
}
67-
else {
68-
return String.format("\\u{%06X}", c);
6974
}
75+
return '\'' + result + '\'';
7076
}
7177

7278
/** Given a literal like (the 3 char sequence with single quotes) 'a',
@@ -179,17 +185,22 @@ public static String capitalize(String s) {
179185
return Character.toUpperCase(s.charAt(0)) + s.substring(1);
180186
}
181187

182-
public static String toRange(int codePointStart, int codePointEnd, ToRangeMode mode) {
183-
StringBuilder sb = new StringBuilder();
184-
if (mode == ToRangeMode.BRACKETED) {
185-
sb.append("[");
186-
}
187-
sb.appendCodePoint(codePointStart)
188-
.append("-")
189-
.appendCodePoint(codePointEnd);
190-
if (mode == ToRangeMode.BRACKETED) {
191-
sb.append("]");
188+
public static String getIntervalSetEscapedString(IntervalSet intervalSet) {
189+
StringBuilder buf = new StringBuilder();
190+
Iterator<Interval> iter = intervalSet.getIntervals().iterator();
191+
while (iter.hasNext()) {
192+
Interval interval = iter.next();
193+
buf.append(getRangeEscapedString(interval.a, interval.b));
194+
if (iter.hasNext()) {
195+
buf.append(" | ");
196+
}
192197
}
193-
return sb.toString();
198+
return buf.toString();
199+
}
200+
201+
public static String getRangeEscapedString(int codePointStart, int codePointEnd) {
202+
return codePointStart != codePointEnd
203+
? getANTLRCharLiteralForChar(codePointStart) + ".." + getANTLRCharLiteralForChar(codePointEnd)
204+
: getANTLRCharLiteralForChar(codePointStart);
194205
}
195206
}

tool/src/org/antlr/v4/tool/ErrorType.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1054,7 +1054,7 @@ public enum ErrorType {
10541054
*
10551055
* TODO: Does not work with fragment rules.
10561056
*/
1057-
CHARACTERS_COLLISION_IN_SET(180, "chars \"<arg>\" used multiple times in set <arg2>", ErrorSeverity.WARNING),
1057+
CHARACTERS_COLLISION_IN_SET(180, "chars <arg> used multiple times in set <arg2>", ErrorSeverity.WARNING),
10581058

10591059
/**
10601060
* Compiler Warning 181

0 commit comments

Comments
 (0)