Skip to content

Commit 72bedcc

Browse files
amaya382DHowett
authored andcommitted
Fix garbling when copying multibyte text via OSC 52 (#7870)
This commit adds a missing conversion utf8 to utf16 in decoding base64 for handling multibyte text in copying via OSC 52. ## Validation Steps Performed * automatically * Tests w/ multibyte characters * manually * case1 * Executed `printf "\x1b]52;;%s\x1b\\" "$(printf '👍👍🏻👍🏼👍🏽👍🏾👍🏿' | base64)"` * Verified `👍👍🏻👍🏼👍🏽👍🏾👍🏿` in my clipboard * case2 * Copied `👍👍🏻👍🏼👍🏽👍🏾👍🏿` by tmux 2.6 default copy function (OSC 52) * Verified `👍👍🏻👍🏼👍🏽👍🏾👍🏿` in my clipboard Closes #7819 (cherry picked from commit 743283e)
1 parent d9c95ca commit 72bedcc

File tree

5 files changed

+45
-15
lines changed

5 files changed

+45
-15
lines changed

.github/actions/spell-check/dictionary/apis.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ serializer
4646
SIZENS
4747
spsc
4848
STDCPP
49+
strchr
4950
syscall
5051
tmp
5152
tx

.github/actions/spell-check/patterns/patterns.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,5 @@ TestUtils::VerifyExpectedString\(tb, L"[^"]+"
1818
\b([A-Za-z])\1{3,}\b
1919
Base64::s_(?:En|De)code\(L"[^"]+"
2020
VERIFY_ARE_EQUAL\(L"[^"]+"
21-
L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
21+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\+/"
2222
std::memory_order_[\w]+

src/terminal/parser/base64.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
using namespace Microsoft::Console::VirtualTerminal;
88

9-
static const wchar_t base64Chars[] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
10-
static const wchar_t padChar = L'=';
9+
static const char base64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
10+
static const char padChar = '=';
1111

1212
#pragma warning(disable : 26446 26447 26482 26485 26493 26494)
1313

@@ -75,15 +75,16 @@ std::wstring Base64::s_Encode(const std::wstring_view src) noexcept
7575
// - true if decoding successfully, otherwise false.
7676
bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
7777
{
78+
std::string mbStr;
7879
int state = 0;
79-
wchar_t tmp;
80+
char tmp;
8081

8182
const auto len = src.size() / 4 * 3;
8283
if (len == 0)
8384
{
8485
return false;
8586
}
86-
dst.reserve(len);
87+
mbStr.reserve(len);
8788

8889
auto iter = src.cbegin();
8990
while (iter < src.cend())
@@ -99,7 +100,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
99100
break;
100101
}
101102

102-
auto pos = wcschr(base64Chars, *iter);
103+
auto pos = strchr(base64Chars, *iter);
103104
if (!pos) // A non-base64 character found.
104105
{
105106
return false;
@@ -108,24 +109,24 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
108109
switch (state)
109110
{
110111
case 0:
111-
tmp = (wchar_t)(pos - base64Chars) << 2;
112+
tmp = (char)(pos - base64Chars) << 2;
112113
state = 1;
113114
break;
114115
case 1:
115-
tmp |= (pos - base64Chars) >> 4;
116-
dst.push_back(tmp);
117-
tmp = (wchar_t)((pos - base64Chars) & 0x0f) << 4;
116+
tmp |= (char)(pos - base64Chars) >> 4;
117+
mbStr += tmp;
118+
tmp = (char)((pos - base64Chars) & 0x0f) << 4;
118119
state = 2;
119120
break;
120121
case 2:
121-
tmp |= (pos - base64Chars) >> 2;
122-
dst.push_back(tmp);
123-
tmp = (wchar_t)((pos - base64Chars) & 0x03) << 6;
122+
tmp |= (char)(pos - base64Chars) >> 2;
123+
mbStr += tmp;
124+
tmp = (char)((pos - base64Chars) & 0x03) << 6;
124125
state = 3;
125126
break;
126127
case 3:
127128
tmp |= pos - base64Chars;
128-
dst.push_back(tmp);
129+
mbStr += tmp;
129130
state = 0;
130131
break;
131132
default:
@@ -176,7 +177,7 @@ bool Base64::s_Decode(const std::wstring_view src, std::wstring& dst) noexcept
176177
return false;
177178
}
178179

179-
return true;
180+
return SUCCEEDED(til::u8u16(mbStr, dst));
180181
}
181182

182183
// Routine Description:

src/terminal/parser/ut_parser/Base64Test.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,5 +84,18 @@ class Microsoft::Console::VirtualTerminal::Base64Test
8484

8585
success = Base64::s_Decode(L"Zm9vYg=", result);
8686
VERIFY_ARE_EQUAL(false, success);
87+
88+
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
89+
result = L"";
90+
success = Base64::s_Decode(L"44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt", result);
91+
VERIFY_ARE_EQUAL(true, success);
92+
VERIFY_ARE_EQUAL(L"にほんご汉语한국", result);
93+
94+
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
95+
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
96+
result = L"";
97+
success = Base64::s_Decode(L"8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=", result);
98+
VERIFY_ARE_EQUAL(true, success);
99+
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", result);
87100
}
88101
};

src/terminal/parser/ut_parser/OutputEngineTest.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2677,6 +2677,21 @@ class StateMachineExternalTest final
26772677

26782678
pDispatch->ClearState();
26792679

2680+
// Passing an empty `Pc` param and a base64-encoded multibyte text `Pd` works.
2681+
// U+306b U+307b U+3093 U+3054 U+6c49 U+8bed U+d55c U+ad6d
2682+
mach.ProcessString(L"\x1b]52;;44Gr44G744KT44GU5rGJ6K+t7ZWc6rWt\x07");
2683+
VERIFY_ARE_EQUAL(L"にほんご汉语한국", pDispatch->_copyContent);
2684+
2685+
pDispatch->ClearState();
2686+
2687+
// Passing an empty `Pc` param and a base64-encoded multibyte text w/ emoji sequences `Pd` works.
2688+
// U+d83d U+dc4d U+d83d U+dc4d U+d83c U+dffb U+d83d U+dc4d U+d83c U+dffc U+d83d
2689+
// U+dc4d U+d83c U+dffd U+d83d U+dc4d U+d83c U+dffe U+d83d U+dc4d U+d83c U+dfff
2690+
mach.ProcessString(L"\x1b]52;;8J+RjfCfkY3wn4+78J+RjfCfj7zwn5GN8J+PvfCfkY3wn4++8J+RjfCfj78=\x07");
2691+
VERIFY_ARE_EQUAL(L"👍👍🏻👍🏼👍🏽👍🏾👍🏿", pDispatch->_copyContent);
2692+
2693+
pDispatch->ClearState();
2694+
26802695
// Passing a non-empty `Pc` param (`s0` is ignored) and a valid `Pd` param works.
26812696
mach.ProcessString(L"\x1b]52;s0;Zm9v\x07");
26822697
VERIFY_ARE_EQUAL(L"foo", pDispatch->_copyContent);

0 commit comments

Comments
 (0)