Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/tools/fuzzing.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,8 @@ class TranslateToFuzzReader {
Expression* makeBasicRef(Type type);
Expression* makeCompoundRef(Type type);

Expression* makeString();

// Similar to makeBasic/CompoundRef, but indicates that this value will be
// used in a place that will trap on null. For example, the reference of a
// struct.get or array.set would use this.
Expand Down Expand Up @@ -378,6 +380,7 @@ class TranslateToFuzzReader {
Type getLoggableType();
bool isLoggableType(Type type);
Nullability getNullability();
Mutability getMutability();
Nullability getSubType(Nullability nullability);
HeapType getSubType(HeapType type);
Type getSubType(Type type);
Expand Down
121 changes: 80 additions & 41 deletions src/tools/fuzzing/fuzzing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2605,47 +2605,7 @@ Expression* TranslateToFuzzReader::makeBasicRef(Type type) {
return null;
}
case HeapType::string: {
// Construct an interesting WTF-8 string from parts.
std::stringstream wtf8;
bool lastWasLeadingSurrogate = false;
for (size_t i = 0, end = upTo(4); i < end; ++i) {
switch (upTo(6)) {
case 0:
// A simple ascii string.
wtf8 << std::to_string(upTo(1024));
break;
case 1:
// '£'
wtf8 << "\xC2\xA3";
break;
case 2:
// '€'
wtf8 << "\xE2\x82\xAC";
break;
case 3:
// '𐍈'
wtf8 << "\xF0\x90\x8D\x88";
break;
case 4:
// The leading surrogate in '𐍈'
wtf8 << "\xED\xA0\x80";
lastWasLeadingSurrogate = true;
continue;
case 5:
if (lastWasLeadingSurrogate) {
// Avoid invalid WTF-8.
continue;
}
// The trailing surrogate in '𐍈'
wtf8 << "\xED\xBD\x88";
break;
}
lastWasLeadingSurrogate = false;
}
std::stringstream wtf16;
// TODO: Use wtf16.view() once we have C++20.
String::convertWTF8ToWTF16(wtf16, wtf8.str());
return builder.makeStringConst(wtf16.str());
return makeString();
}
case HeapType::stringview_wtf16:
// We fully support wtf16 strings.
Expand Down Expand Up @@ -2760,6 +2720,81 @@ Expression* TranslateToFuzzReader::makeCompoundRef(Type type) {
}
}

Expression* TranslateToFuzzReader::makeString() {
// Fuzz with JS-style strings.
auto mutability = getMutability();
auto arrayHeapType =
HeapType(Array(Field(Field::PackedType::i16, mutability)));
auto nullability = getNullability();
auto arrayType = Type(arrayHeapType, nullability);
switch (upTo(3)) {
case 0: {
// Make a string from an array. We can only do this in functions.
if (funcContext) {
auto array = make(arrayType);
auto* start = make(Type::i32);
auto* end = make(Type::i32);
return builder.makeStringNew(
StringNewWTF16Array, array, start, end, false);
}
[[fallthrough]];
}
case 1: {
// Make a string from a code point. We can only do this in functions.
if (funcContext) {
auto codePoint = make(Type::i32);
return builder.makeStringNew(
StringNewFromCodePoint, codePoint, nullptr, false);
}
[[fallthrough]];
}
case 2: {
// Construct an interesting WTF-8 string from parts and use string.const.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part is unchanged.

std::stringstream wtf8;
bool lastWasLeadingSurrogate = false;
for (size_t i = 0, end = upTo(4); i < end; ++i) {
switch (upTo(6)) {
case 0:
// A simple ascii string.
wtf8 << std::to_string(upTo(1024));
break;
case 1:
// '£'
wtf8 << "\xC2\xA3";
break;
case 2:
// '€'
wtf8 << "\xE2\x82\xAC";
break;
case 3:
// '𐍈'
wtf8 << "\xF0\x90\x8D\x88";
break;
case 4:
// The leading surrogate in '𐍈'
wtf8 << "\xED\xA0\x80";
lastWasLeadingSurrogate = true;
continue;
case 5:
if (lastWasLeadingSurrogate) {
// Avoid invalid WTF-8.
continue;
}
// The trailing surrogate in '𐍈'
wtf8 << "\xED\xBD\x88";
break;
}
lastWasLeadingSurrogate = false;
}
std::stringstream wtf16;
// TODO: Use wtf16.view() once we have C++20.
String::convertWTF8ToWTF16(wtf16, wtf8.str());
return builder.makeStringConst(wtf16.str());
}
}
WASM_UNREACHABLE("bad switch");
}

Expression* TranslateToFuzzReader::makeTrappingRefUse(HeapType type) {
auto percent = upTo(100);
// Only give a low probability to emit a nullable reference.
Expand Down Expand Up @@ -4071,6 +4106,10 @@ Nullability TranslateToFuzzReader::getNullability() {
return Nullable;
}

Mutability TranslateToFuzzReader::getMutability() {
return oneIn(2) ? Mutable : Immutable;
}

Nullability TranslateToFuzzReader::getSubType(Nullability nullability) {
if (nullability == NonNullable) {
return NonNullable;
Expand Down
11 changes: 11 additions & 0 deletions src/wasm-interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "support/bits.h"
#include "support/safe_integer.h"
#include "support/stdckdint.h"
#include "support/string.h"
#include "wasm-builder.h"
#include "wasm-traversal.h"
#include "wasm.h"
Expand Down Expand Up @@ -1898,6 +1899,16 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
}
return makeGCData(contents, curr->type);
}
case StringNewFromCodePoint: {
uint32_t codePoint = ptr.getSingleValue().getUnsigned();
if (codePoint > 0x10FFFF) {
trap("invalid code point");
}
std::stringstream wtf16;
String::writeWTF16CodePoint(wtf16, codePoint);
std::string str = wtf16.str();
return Literal(str);
}
default:
// TODO: others
return Flow(NONCONSTANT_FLOW);
Expand Down
86 changes: 86 additions & 0 deletions test/lit/exec/strings.wast
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,64 @@
;; Concatenating these surrogates creates '𐍈'.
(string.concat (string.const "\ED\A0\80") (string.const "\ED\BD\88"))
)

;; CHECK: [fuzz-exec] calling string.from_code_point
;; CHECK-NEXT: [fuzz-exec] note result: string.from_code_point => string("A")
(func $string.from_code_point (export "string.from_code_point") (result stringref)
(string.from_code_point
(i32.const 65)
)
)

;; CHECK: [fuzz-exec] calling unsigned_code_point
;; CHECK-NEXT: [fuzz-exec] note result: unsigned_code_point => string("\u0093")
(func $unsigned_code_point (export "unsigned_code_point") (result stringref)
(string.from_code_point
;; This must be interpreted as unsigned, that is, in the escaped output
;; the top byte is 0.
(i32.const 147)
)
)

;; CHECK: [fuzz-exec] calling weird_code_point
;; CHECK-NEXT: [fuzz-exec] note result: weird_code_point => string("\u03e8")
(func $weird_code_point (export "weird_code_point") (result stringref)
(string.from_code_point
(i32.const 0x3e8)
)
)

;; CHECK: [fuzz-exec] calling isolated_high_code_point
;; CHECK-NEXT: [fuzz-exec] note result: isolated_high_code_point => string("\ud800")
(func $isolated_high_code_point (export "isolated_high_code_point") (result stringref)
(string.from_code_point
(i32.const 0xD800)
)
)

;; CHECK: [fuzz-exec] calling isolated_low_code_point
;; CHECK-NEXT: [fuzz-exec] note result: isolated_low_code_point => string("\udc00")
(func $isolated_low_code_point (export "isolated_low_code_point") (result stringref)
(string.from_code_point
(i32.const 0xDC00)
)
)

;; CHECK: [fuzz-exec] calling surrogate_pair_code_point
;; CHECK-NEXT: [fuzz-exec] note result: surrogate_pair_code_point => string("\u286c")
(func $surrogate_pair_code_point (export "surrogate_pair_code_point") (result stringref)
(string.from_code_point
(i32.const 0x286c) ;; 𐍈
)
)

;; CHECK: [fuzz-exec] calling invalid_code_point
;; CHECK-NEXT: [trap invalid code point]
(func $invalid_code_point (export "invalid_code_point") (result stringref)
(string.from_code_point
(i32.const -83)
)
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Other test cases to add:

An isolated high surrogate: i32.const 0xD800
An isolated low surrogate: i32.const 0xDC00
A codepoint requiring a surrogate pair: 10348 (𐍈)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, added. I also noticed a fuzz error on an unsigned case (that now works) that I also added.

)
;; CHECK: [fuzz-exec] calling new_wtf16_array
;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello")
Expand Down Expand Up @@ -518,6 +576,27 @@

;; CHECK: [fuzz-exec] calling concat-surrogates
;; CHECK-NEXT: [fuzz-exec] note result: concat-surrogates => string("\ud800\udf48")

;; CHECK: [fuzz-exec] calling string.from_code_point
;; CHECK-NEXT: [fuzz-exec] note result: string.from_code_point => string("A")

;; CHECK: [fuzz-exec] calling unsigned_code_point
;; CHECK-NEXT: [fuzz-exec] note result: unsigned_code_point => string("\u0093")

;; CHECK: [fuzz-exec] calling weird_code_point
;; CHECK-NEXT: [fuzz-exec] note result: weird_code_point => string("\u03e8")

;; CHECK: [fuzz-exec] calling isolated_high_code_point
;; CHECK-NEXT: [fuzz-exec] note result: isolated_high_code_point => string("\ud800")

;; CHECK: [fuzz-exec] calling isolated_low_code_point
;; CHECK-NEXT: [fuzz-exec] note result: isolated_low_code_point => string("\udc00")

;; CHECK: [fuzz-exec] calling surrogate_pair_code_point
;; CHECK-NEXT: [fuzz-exec] note result: surrogate_pair_code_point => string("\u286c")

;; CHECK: [fuzz-exec] calling invalid_code_point
;; CHECK-NEXT: [trap invalid code point]
;; CHECK-NEXT: [fuzz-exec] comparing compare.1
;; CHECK-NEXT: [fuzz-exec] comparing compare.10
;; CHECK-NEXT: [fuzz-exec] comparing compare.2
Expand All @@ -540,6 +619,9 @@
;; CHECK-NEXT: [fuzz-exec] comparing eq.5
;; CHECK-NEXT: [fuzz-exec] comparing get_codeunit
;; CHECK-NEXT: [fuzz-exec] comparing get_length
;; CHECK-NEXT: [fuzz-exec] comparing invalid_code_point
;; CHECK-NEXT: [fuzz-exec] comparing isolated_high_code_point
;; CHECK-NEXT: [fuzz-exec] comparing isolated_low_code_point
;; CHECK-NEXT: [fuzz-exec] comparing new_2
;; CHECK-NEXT: [fuzz-exec] comparing new_4
;; CHECK-NEXT: [fuzz-exec] comparing new_empty
Expand All @@ -551,3 +633,7 @@
;; CHECK-NEXT: [fuzz-exec] comparing slice
;; CHECK-NEXT: [fuzz-exec] comparing slice-big
;; CHECK-NEXT: [fuzz-exec] comparing slice-unicode
;; CHECK-NEXT: [fuzz-exec] comparing string.from_code_point
;; CHECK-NEXT: [fuzz-exec] comparing surrogate_pair_code_point
;; CHECK-NEXT: [fuzz-exec] comparing unsigned_code_point
;; CHECK-NEXT: [fuzz-exec] comparing weird_code_point