Skip to content

Commit 2c4a55c

Browse files
authored
Merge pull request #1849 from shravanrn/master
Fix the utf8 convertor for VS2017
2 parents d0eaf29 + 9220df2 commit 2c4a55c

File tree

4 files changed

+31
-7
lines changed

4 files changed

+31
-7
lines changed

contributors.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,5 +144,6 @@ YYYY/MM/DD, github id, Full name, email
144144
2017/03/15, robertvanderhulst, Robert van der Hulst, [email protected]
145145
2017/03/28, cmd-johnson, Jonas Auer, [email protected]
146146
2017/04/12, lys0716, Yishuang Lu, [email protected]
147+
2017/04/30, shravanrn, Shravan Narayan, [email protected]
147148
2017/05/11, jimallman, Jim Allman, [email protected]
148149
2017/05/26, waf, Will Fuqua, [email protected]

runtime/Cpp/runtime/src/ANTLRInputStream.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ void ANTLRInputStream::load(const std::string &input) {
3535
// Remove the UTF-8 BOM if present.
3636
const char bom[4] = "\xef\xbb\xbf";
3737
if (input.compare(0, 3, bom, 3) == 0)
38-
_data = antlrcpp::utfConverter.from_bytes(input.data() + 3, input.data() + input.size());
38+
_data = antlrcpp::utf8_to_utf32(input.data() + 3, input.data() + input.size());
3939
else
40-
_data = antlrcpp::utfConverter.from_bytes(input);
40+
_data = antlrcpp::utf8_to_utf32(input.data(), input.data() + input.size());
4141
p = 0;
4242
}
4343

@@ -136,7 +136,7 @@ std::string ANTLRInputStream::getText(const Interval &interval) {
136136
return "";
137137
}
138138

139-
return antlrcpp::utfConverter.to_bytes(_data.substr(start, count));
139+
return antlrcpp::utf32_to_utf8(_data.substr(start, count));
140140
}
141141

142142
std::string ANTLRInputStream::getSourceName() const {
@@ -147,7 +147,7 @@ std::string ANTLRInputStream::getSourceName() const {
147147
}
148148

149149
std::string ANTLRInputStream::toString() const {
150-
return antlrcpp::utfConverter.to_bytes(_data);
150+
return antlrcpp::utf32_to_utf8(_data);
151151
}
152152

153153
void ANTLRInputStream::InitializeInstanceFields() {

runtime/Cpp/runtime/src/UnbufferedCharStream.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ std::string UnbufferedCharStream::getText(const misc::Interval &interval) {
195195
}
196196
// convert from absolute to local index
197197
size_t i = interval.a - bufferStartIndex;
198-
return utfConverter.to_bytes(_data.substr(i, interval.length()));
198+
return utf32_to_utf8(_data.substr(i, interval.length()));
199199
}
200200

201201
size_t UnbufferedCharStream::getBufferStartIndex() const {

runtime/Cpp/runtime/src/support/StringUtils.h

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,36 @@
99

1010
namespace antlrcpp {
1111
// For all conversions utf8 <-> utf32.
12-
// VS 2015 has a bug in std::codecvt_utf8<char32_t> (VS 2013 works fine).
13-
#if defined(_MSC_VER) && _MSC_VER == 1900
12+
// VS 2015 and VS 2017 have different bugs in std::codecvt_utf8<char32_t> (VS 2013 works fine).
13+
#if defined(_MSC_VER) && _MSC_VER >= 1900 && _MSC_VER < 2000
1414
static std::wstring_convert<std::codecvt_utf8<__int32>, __int32> utfConverter;
1515
#else
1616
static std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utfConverter;
1717
#endif
1818

19+
//the conversion functions fails in VS2017, so we explicitly use a workaround
20+
template<typename T>
21+
inline std::string utf32_to_utf8(T _data)
22+
{
23+
#if _MSC_VER > 1900 && _MSC_VER < 2000
24+
auto p = reinterpret_cast<const int32_t *>(_data.data());
25+
return antlrcpp::utfConverter.to_bytes(p, p + _data.size());
26+
#else
27+
return antlrcpp::utfConverter.to_bytes(_data);
28+
#endif
29+
}
30+
31+
inline auto utf8_to_utf32(const char* first, const char* last)
32+
{
33+
#if _MSC_VER > 1900 && _MSC_VER < 2000
34+
auto r = antlrcpp::utfConverter.from_bytes(first, last);
35+
std::u32string s = reinterpret_cast<const char32_t *>(r.data());
36+
return s;
37+
#else
38+
return antlrcpp::utfConverter.from_bytes(first, last);
39+
#endif
40+
}
41+
1942
void replaceAll(std::string& str, const std::string& from, const std::string& to);
2043

2144
// string <-> wstring conversion (UTF-16), e.g. for use with Window's wide APIs.

0 commit comments

Comments
 (0)