@@ -1325,6 +1325,25 @@ void WriteUTF8AsUTF16(StreamingParserHandler* writer, const std::string& utf8) {
13251325 writer->HandleString16 (SpanFrom (UTF8ToUTF16 (SpanFrom (utf8))));
13261326}
13271327
1328+ TEST (JsonEncoder, OverlongEncodings) {
1329+ std::string out;
1330+ Status status;
1331+ std::unique_ptr<StreamingParserHandler> writer =
1332+ NewJSONEncoder (&GetTestPlatform (), &out, &status);
1333+
1334+ // We encode 0x7f, which is the DEL ascii character, as a 4 byte UTF8
1335+ // sequence. This is called an overlong encoding, because only 1 byte
1336+ // is needed to represent 0x7f as UTF8.
1337+ std::vector<uint8_t > chars = {
1338+ 0xf0 , // Starts 4 byte utf8 sequence
1339+ 0x80 , // continuation byte
1340+ 0x81 , // continuation byte w/ payload bit 7 set to 1.
1341+ 0xbf , // continuation byte w/ payload bits 0-6 set to 11111.
1342+ };
1343+ writer->HandleString8 (SpanFrom (chars));
1344+ EXPECT_EQ (" \"\" " , out); // Empty string means that 0x7f was rejected (good).
1345+ }
1346+
13281347TEST (JsonStdStringWriterTest, HelloWorld) {
13291348 std::string out;
13301349 Status status;
@@ -1561,6 +1580,13 @@ TEST_F(JsonParserTest, UsAsciiDelCornerCase) {
15611580 " string16: a\x7f\n "
15621581 " map end\n " ,
15631582 log_.str ());
1583+
1584+ // We've seen an implementation of UTF16ToUTF8 which would replace the DEL
1585+ // character with ' ', so this simple roundtrip tests the routines in
1586+ // encoding_test_helper.h, to make test failures of the above easier to
1587+ // diagnose.
1588+ std::vector<uint16_t > utf16 = UTF8ToUTF16 (SpanFrom (json));
1589+ EXPECT_EQ (json, UTF16ToUTF8 (SpanFrom (utf16)));
15641590}
15651591
15661592TEST_F (JsonParserTest, Whitespace) {
0 commit comments