Skip to content

Commit c293e23

Browse files
authored
Merge pull request #3025 from niw/fix_swift_input_stream
Fix Swift runtime ANTLRInputStream that can’t read Unicode scalars
2 parents e4d7f54 + 00e5fae commit c293e23

File tree

4 files changed

+72
-12
lines changed

4 files changed

+72
-12
lines changed

contributors.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ YYYY/MM/DD, github id, Full name, email
286286
2020/11/26, mr-c, Michael R. Crusoe, [email protected]
287287
2020/12/01, maxence-lefebvre, Maxence Lefebvre, [email protected]
288288
2020/12/03, electrum, David Phillips, [email protected]
289+
2021/01/03, niw, Yoshimasa Niwa, [email protected]
289290
2021/01/25, l215884529, Qiheng Liu, [email protected]
290291
2021/02/02, tsotnikov, Taras Sotnikov, [email protected]
291292
2021/02/10, jirislaby, Jiri Slaby, [email protected]

runtime/Swift/Sources/Antlr4/ANTLRFileStream.swift

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,9 @@ public class ANTLRFileStream: ANTLRInputStream {
1313

1414
public init(_ fileName: String, _ encoding: String.Encoding? = nil) throws {
1515
self.fileName = fileName
16-
super.init()
1716
let fileContents = try String(contentsOfFile: fileName, encoding: encoding ?? .utf8)
18-
data = Array(fileContents)
19-
n = data.count
17+
let data = Array(fileContents.unicodeScalars)
18+
super.init(data, data.count)
2019
}
2120

2221
override

runtime/Swift/Sources/Antlr4/ANTLRInputStream.swift

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ public class ANTLRInputStream: CharStream {
1515
///
1616
/// The data being scanned
1717
///
18-
internal var data: [Character]
18+
internal let data: [UnicodeScalar]
1919

2020
///
21-
/// How many characters are actually in the buffer
21+
/// How many unicode scalars are actually in the buffer
2222
///
2323
internal var n: Int
2424

@@ -34,23 +34,33 @@ public class ANTLRInputStream: CharStream {
3434

3535
public init() {
3636
n = 0
37-
data = [Character]()
37+
data = []
3838
}
3939

4040
///
4141
/// Copy data in string to a local char array
4242
///
4343
public init(_ input: String) {
44-
self.data = Array(input)
44+
self.data = Array(input.unicodeScalars)
4545
self.n = data.count
4646
}
4747

4848
///
4949
/// This is the preferred constructor for strings as no data is copied
5050
///
51-
public init(_ data: [Character], _ numberOfActualCharsInArray: Int) {
51+
public init(_ data: [UnicodeScalar], _ numberOfActualUnicodeScalarsInArray: Int) {
5252
self.data = data
53-
self.n = numberOfActualCharsInArray
53+
self.n = numberOfActualUnicodeScalarsInArray
54+
}
55+
56+
///
57+
/// This is only for backward compatibility that accepts array of `Character`.
58+
/// Use `init(_ data: [UnicodeScalar], _ numberOfActualUnicodeScalarsInArray: Int)` instead.
59+
///
60+
public init(_ data: [Character], _ numberOfActualUnicodeScalarsInArray: Int) {
61+
let string = String(data)
62+
self.data = Array(string.unicodeScalars)
63+
self.n = numberOfActualUnicodeScalarsInArray
5464
}
5565

5666
public func reset() {
@@ -90,7 +100,7 @@ public class ANTLRInputStream: CharStream {
90100
}
91101
//print("char LA("+i+")="+(char)data[p+i-1]+"; p="+p);
92102
//print("LA("+i+"); p="+p+" n="+n+" data.length="+data.length);
93-
return data[p + i - 1].unicodeValue
103+
return Int(data[p + i - 1].value)
94104
}
95105

96106
public func LT(_ i: Int) -> Int {
@@ -145,14 +155,19 @@ public class ANTLRInputStream: CharStream {
145155
return ""
146156
}
147157
let stop = min(n, interval.b + 1)
148-
return String(data[start ..< stop])
158+
159+
var unicodeScalarView = String.UnicodeScalarView()
160+
unicodeScalarView.append(contentsOf: data[start ..< stop])
161+
return String(unicodeScalarView)
149162
}
150163

151164
public func getSourceName() -> String {
152165
return name ?? ANTLRInputStream.UNKNOWN_SOURCE_NAME
153166
}
154167

155168
public func toString() -> String {
156-
return String(data)
169+
var unicodeScalarView = String.UnicodeScalarView()
170+
unicodeScalarView.append(contentsOf: data)
171+
return String(unicodeScalarView)
157172
}
158173
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/// Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2+
/// Use of this file is governed by the BSD 3-clause license that
3+
/// can be found in the LICENSE.txt file in the project root.
4+
5+
import Foundation
6+
7+
import XCTest
8+
import Antlr4
9+
10+
class ANTLRInputStreamTests: XCTestCase {
11+
func testASCIICharactersString() {
12+
let inputStream = ANTLRInputStream("Cat")
13+
XCTAssertEqual(inputStream.LA(1), 0x0043)
14+
XCTAssertEqual(inputStream.LA(2), 0x0061)
15+
XCTAssertEqual(inputStream.LA(3), 0x0074)
16+
}
17+
18+
func testBasicMultilingualPlaneCharactersString() {
19+
// Three Japanese hiragana characters.
20+
let inputStream = ANTLRInputStream("\u{3053}\u{306D}\u{3053}")
21+
XCTAssertEqual(inputStream.LA(1), 0x3053)
22+
XCTAssertEqual(inputStream.LA(2), 0x306D)
23+
XCTAssertEqual(inputStream.LA(3), 0x3053)
24+
}
25+
26+
func testSupplementaryMultilingualPlaneCharactersString() {
27+
// Three "Cat", "Cat Face", and "Grinning Cat with Smiling Eyes" emojis
28+
let inputStream = ANTLRInputStream("\u{1F408}\u{1F431}\u{1F638}")
29+
XCTAssertEqual(inputStream.LA(1), 0x1F408)
30+
XCTAssertEqual(inputStream.LA(2), 0x1F431)
31+
XCTAssertEqual(inputStream.LA(3), 0x1F638)
32+
}
33+
34+
func testGraphemeCharactersString() {
35+
// One "Family (Man, Woman, Girl, Boy)" emoji
36+
let inputStream = ANTLRInputStream("\u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}")
37+
XCTAssertEqual(inputStream.LA(1), 0x1F468)
38+
XCTAssertEqual(inputStream.LA(2), 0x200D)
39+
XCTAssertEqual(inputStream.LA(3), 0x1F469)
40+
XCTAssertEqual(inputStream.LA(4), 0x200D)
41+
XCTAssertEqual(inputStream.LA(5), 0x1F467)
42+
XCTAssertEqual(inputStream.LA(6), 0x200D)
43+
XCTAssertEqual(inputStream.LA(7), 0x1F466)
44+
}
45+
}

0 commit comments

Comments
 (0)