Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Current Trunk
- Add a new `BinaryenModuleReadWithFeatures` function to the C API that allows
to configure which features to enable in the parser.
- The build-time option to use legacy WasmGC opcodes is removed.
- The strings in `string.const` instructions must now be valid WTF-8.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this not a limitation for anyone we know?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It shouldn't be, no. Before this change, most invalid WTF-8 would end up littered with replacement characters in the output anyway, and certainly no one wants that.


v117
----
Expand Down
3 changes: 0 additions & 3 deletions scripts/fuzz_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,6 @@ def is_git_repo():
'exception-handling.wast',
'translate-to-new-eh.wast',
'rse-eh.wast',
# Non-UTF8 strings trap in V8, and have limitations in our interpreter
'string-lowering.wast',
'precompute-strings.wast',
]


Expand Down
8 changes: 7 additions & 1 deletion src/binaryen-c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "pass.h"
#include "shell-interface.h"
#include "support/colors.h"
#include "support/string.h"
#include "wasm-binary.h"
#include "wasm-builder.h"
#include "wasm-interpreter.h"
Expand Down Expand Up @@ -1895,8 +1896,13 @@ BinaryenExpressionRef BinaryenStringNew(BinaryenModuleRef module,
}
BinaryenExpressionRef BinaryenStringConst(BinaryenModuleRef module,
const char* name) {
// Re-encode from WTF-8 to WTF-16.
std::stringstream wtf16;
[[maybe_unused]] bool valid = String::convertWTF8ToWTF16(wtf16, name);
assert(valid);
// TODO: Use wtf16.view() once we have C++20.
return static_cast<Expression*>(
Builder(*(Module*)module).makeStringConst(name));
Builder(*(Module*)module).makeStringConst(wtf16.str()));
}
BinaryenExpressionRef BinaryenStringMeasure(BinaryenModuleRef module,
BinaryenOp op,
Expand Down
2 changes: 1 addition & 1 deletion src/literal.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class Literal {
assert(type.isSignature());
}
explicit Literal(std::shared_ptr<GCData> gcData, HeapType type);
explicit Literal(std::string string);
explicit Literal(std::string_view string);
Literal(const Literal& other);
Literal& operator=(const Literal& other);
~Literal();
Expand Down
9 changes: 8 additions & 1 deletion src/parser/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "lexer.h"
#include "support/name.h"
#include "support/result.h"
#include "support/string.h"
#include "wasm-builder.h"
#include "wasm-ir-builder.h"
#include "wasm.h"
Expand Down Expand Up @@ -2491,7 +2492,13 @@ struct ParseDefsCtx : TypeParserCtx<ParseDefsCtx> {
Result<> makeStringConst(Index pos,
const std::vector<Annotation>& annotations,
std::string_view str) {
return withLoc(pos, irBuilder.makeStringConst(Name(str)));
// Re-encode from WTF-8 to WTF-16.
std::stringstream wtf16;
if (!String::convertWTF8ToWTF16(wtf16, str)) {
return in.err(pos, "invalid string constant");
}
// TODO: Use wtf16.view() once we have C++20.
return withLoc(pos, irBuilder.makeStringConst(wtf16.str()));
}

Result<> makeStringMeasure(Index pos,
Expand Down
21 changes: 2 additions & 19 deletions src/parser/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <variant>

#include "lexer.h"
#include "support/string.h"

using namespace std::string_view_literals;

Expand Down Expand Up @@ -308,25 +309,7 @@ struct LexStrCtx : LexCtx {
if ((0xd800 <= u && u < 0xe000) || 0x110000 <= u) {
return false;
}
if (u < 0x80) {
// 0xxxxxxx
*escapeBuilder << uint8_t(u);
} else if (u < 0x800) {
// 110xxxxx 10xxxxxx
*escapeBuilder << uint8_t(0b11000000 | ((u >> 6) & 0b00011111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 0) & 0b00111111));
} else if (u < 0x10000) {
// 1110xxxx 10xxxxxx 10xxxxxx
*escapeBuilder << uint8_t(0b11100000 | ((u >> 12) & 0b00001111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 6) & 0b00111111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 0) & 0b00111111));
} else {
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*escapeBuilder << uint8_t(0b11110000 | ((u >> 18) & 0b00000111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 12) & 0b00111111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 6) & 0b00111111));
*escapeBuilder << uint8_t(0b10000000 | ((u >> 0) & 0b00111111));
}
String::writeWTF8CodePoint(*escapeBuilder, u);
return true;
}
};
Expand Down
8 changes: 7 additions & 1 deletion src/passes/Print.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2232,7 +2232,13 @@ struct PrintExpressionContents
}
void visitStringConst(StringConst* curr) {
printMedium(o, "string.const ");
String::printEscaped(o, curr->string.str);
// Re-encode from WTF-16 to WTF-8.
std::stringstream wtf8;
[[maybe_unused]] bool valid =
String::convertWTF16ToWTF8(wtf8, curr->string.str);
assert(valid);
// TODO: Use wtf8.view() once we have C++20.
String::printEscaped(o, wtf8.str());
}
void visitStringMeasure(StringMeasure* curr) {
switch (curr->op) {
Expand Down
8 changes: 7 additions & 1 deletion src/passes/StringLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,14 @@ struct StringGathering : public Pass {
}

auto& string = strings[i];
// Re-encode from WTF-16 to WTF-8 to make the name easier to read.
std::stringstream wtf8;
[[maybe_unused]] bool valid =
String::convertWTF16ToWTF8(wtf8, string.str);
assert(valid);
// TODO: Use wtf8.view() once we have C++20.
auto name = Names::getValidGlobalName(
*module, std::string("string.const_") + std::string(string.str));
*module, std::string("string.const_") + std::string(wtf8.str()));
globalName = name;
newNames.insert(name);
auto* stringConst = builder.makeStringConst(string);
Expand Down
7 changes: 6 additions & 1 deletion src/support/json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ namespace json {

void Value::stringify(std::ostream& os, bool pretty) {
if (isString()) {
wasm::String::printEscapedJSON(os, getCString());
std::stringstream wtf16;
[[maybe_unused]] bool valid =
wasm::String::convertWTF8ToWTF16(wtf16, getIString().str);
assert(valid);
// TODO: Use wtf16.view() once we have C++20.
wasm::String::printEscapedJSON(os, wtf16.str());
} else if (isArray()) {
os << '[';
auto first = true;
Expand Down
Loading