Skip to content

Commit 4c2af25

Browse files
authored
aot compiler: Use larger alignment for load/store when possible (bytecodealliance#3552)
Consider the following wasm module: ```wast (module (func (export "foo") i32.const 0x104 i32.const 0x12345678 i32.store ) (memory 1 1) ) ``` While the address (0x104) is perfectly aligned for i32.store, as our aot compiler uses 1-byte alignment for load/store LLVM IR instructions, it often produces inefficient machine code, especially for alignment-sensitive targets. For example, the above "foo" function is compiled into the following xtensa machine code. ``` 0000002c <aot_func_internal#0>: 2c: 004136 entry a1, 32 2f: 07a182 movi a8, 0x107 32: 828a add.n a8, a2, a8 34: 291c movi.n a9, 18 36: 004892 s8i a9, a8, 0 39: 06a182 movi a8, 0x106 3c: 828a add.n a8, a2, a8 3e: ffff91 l32r a9, 3c <aot_func_internal#0+0x10> (ff91828a <aot_func_internal#0+0xff91825e>) 3e: R_XTENSA_SLOT0_OP .literal+0x8 41: 004892 s8i a9, a8, 0 44: 05a182 movi a8, 0x105 47: 828a add.n a8, a2, a8 49: ffff91 l32r a9, 48 <aot_func_internal#0+0x1c> (ffff9182 <aot_func_internal#0+0xffff9156>) 49: R_XTENSA_SLOT0_OP .literal+0xc 4c: 41a890 srli a10, a9, 8 4f: 0048a2 s8i a10, a8, 0 52: 04a182 movi a8, 0x104 55: 828a add.n a8, a2, a8 57: 004892 s8i a9, a8, 0 5a: f01d retw.n ``` Note that the each four bytes are stored separately using one-byte-store instruction, s8i. This commit tries to use larger alignments for load/store LLVM IR instructions when possible. with this commit, the above example is compiled into the following machine code, which seems more reasonable. ``` 0000002c <aot_func_internal#0>: 2c: 004136 entry a1, 32 2f: ffff81 l32r a8, 2c <aot_func_internal#0> (81004136 <aot_func_internal#0+0x8100410a>) 2f: R_XTENSA_SLOT0_OP .literal+0x8 32: 416282 s32i a8, a2, 0x104 35: f01d retw.n ``` Note: this doesn't work well for --xip because aot_load_const_from_table() hides the constness of the value. Maybe we need our own mechanism to propagate the constness and the value.
1 parent 607ae47 commit 4c2af25

File tree

5 files changed

+65
-19
lines changed

5 files changed

+65
-19
lines changed

core/iwasm/common/wasm_memory.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,12 @@ wasm_enlarge_memory_internal(WASMModuleInstance *module, uint32 inc_page_count)
883883
}
884884
#endif /* end of WASM_MEM_ALLOC_WITH_USAGE */
885885

886+
/*
887+
* AOT compiler assumes at least 8 byte alignment.
888+
* see aot_check_memory_overflow.
889+
*/
890+
bh_assert(((uintptr_t)memory->memory_data & 0x7) == 0);
891+
886892
memory->num_bytes_per_page = num_bytes_per_page;
887893
memory->cur_page_count = total_page_count;
888894
memory->max_page_count = max_page_count;
@@ -1032,5 +1038,11 @@ wasm_allocate_linear_memory(uint8 **data, bool is_shared_memory,
10321038
#endif
10331039
}
10341040

1041+
/*
1042+
* AOT compiler assumes at least 8 byte alignment.
1043+
* see aot_check_memory_overflow.
1044+
*/
1045+
bh_assert(((uintptr_t)*data & 0x7) == 0);
1046+
10351047
return BHT_OK;
10361048
}

core/iwasm/compilation/aot_emit_memory.c

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ get_memory_curr_page_count(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);
9696

9797
LLVMValueRef
9898
aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
99-
mem_offset_t offset, uint32 bytes, bool enable_segue)
99+
mem_offset_t offset, uint32 bytes, bool enable_segue,
100+
unsigned int *alignp)
100101
{
101102
LLVMValueRef offset_const =
102103
MEMORY64_COND_VALUE(I64_CONST(offset), I32_CONST(offset));
@@ -180,6 +181,26 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
180181
comp_ctx->comp_data->memories[0].init_page_count;
181182
uint64 mem_data_size = (uint64)num_bytes_per_page * init_page_count;
182183

184+
if (alignp != NULL) {
185+
/*
186+
* A note about max_align below:
187+
* the assumption here is the base address of a linear memory
188+
* has the natural alignment. for platforms using mmap, it can
189+
* be even larger. for now, use a conservative value.
190+
*/
191+
const int max_align = 8;
192+
int shift = ffs((int)(unsigned int)mem_offset);
193+
if (shift == 0) {
194+
*alignp = max_align;
195+
}
196+
else {
197+
unsigned int align = 1 << (shift - 1);
198+
if (align > max_align) {
199+
align = max_align;
200+
}
201+
*alignp = align;
202+
}
203+
}
183204
if (mem_offset + bytes <= mem_data_size) {
184205
/* inside memory space */
185206
if (comp_ctx->pointer_size == sizeof(uint64))
@@ -205,6 +226,9 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
205226
return maddr;
206227
}
207228
}
229+
else if (alignp != NULL) {
230+
*alignp = 1;
231+
}
208232

209233
if (is_target_64bit) {
210234
if (!(offset_const = LLVMBuildZExt(comp_ctx->builder, offset_const,
@@ -324,7 +348,7 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
324348
aot_set_last_error("llvm build load failed."); \
325349
goto fail; \
326350
} \
327-
LLVMSetAlignment(value, 1); \
351+
LLVMSetAlignment(value, known_align); \
328352
} while (0)
329353

330354
#define BUILD_TRUNC(value, data_type) \
@@ -343,7 +367,7 @@ aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
343367
aot_set_last_error("llvm build store failed."); \
344368
goto fail; \
345369
} \
346-
LLVMSetAlignment(res, 1); \
370+
LLVMSetAlignment(res, known_align); \
347371
} while (0)
348372

349373
#define BUILD_SIGN_EXT(dst_type) \
@@ -445,8 +469,9 @@ aot_compile_op_i32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
445469
LLVMTypeRef data_type;
446470
bool enable_segue = comp_ctx->enable_segue_i32_load;
447471

472+
unsigned int known_align;
448473
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
449-
enable_segue)))
474+
enable_segue, &known_align)))
450475
return false;
451476

452477
switch (bytes) {
@@ -515,8 +540,9 @@ aot_compile_op_i64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
515540
LLVMTypeRef data_type;
516541
bool enable_segue = comp_ctx->enable_segue_i64_load;
517542

543+
unsigned int known_align;
518544
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
519-
enable_segue)))
545+
enable_segue, &known_align)))
520546
return false;
521547

522548
switch (bytes) {
@@ -591,8 +617,9 @@ aot_compile_op_f32_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
591617
LLVMValueRef maddr, value;
592618
bool enable_segue = comp_ctx->enable_segue_f32_load;
593619

620+
unsigned int known_align;
594621
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4,
595-
enable_segue)))
622+
enable_segue, &known_align)))
596623
return false;
597624

598625
if (!enable_segue)
@@ -614,8 +641,9 @@ aot_compile_op_f64_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
614641
LLVMValueRef maddr, value;
615642
bool enable_segue = comp_ctx->enable_segue_f64_load;
616643

644+
unsigned int known_align;
617645
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8,
618-
enable_segue)))
646+
enable_segue, &known_align)))
619647
return false;
620648

621649
if (!enable_segue)
@@ -640,8 +668,9 @@ aot_compile_op_i32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
640668

641669
POP_I32(value);
642670

671+
unsigned int known_align;
643672
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
644-
enable_segue)))
673+
enable_segue, &known_align)))
645674
return false;
646675

647676
switch (bytes) {
@@ -691,8 +720,9 @@ aot_compile_op_i64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
691720

692721
POP_I64(value);
693722

723+
unsigned int known_align;
694724
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
695-
enable_segue)))
725+
enable_segue, &known_align)))
696726
return false;
697727

698728
switch (bytes) {
@@ -748,8 +778,9 @@ aot_compile_op_f32_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
748778

749779
POP_F32(value);
750780

781+
unsigned int known_align;
751782
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 4,
752-
enable_segue)))
783+
enable_segue, &known_align)))
753784
return false;
754785

755786
if (!enable_segue)
@@ -771,8 +802,9 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
771802

772803
POP_F64(value);
773804

805+
unsigned int known_align;
774806
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, 8,
775-
enable_segue)))
807+
enable_segue, &known_align)))
776808
return false;
777809

778810
if (!enable_segue)
@@ -1302,7 +1334,7 @@ aot_compile_op_atomic_rmw(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
13021334
POP_I64(value);
13031335

13041336
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
1305-
enable_segue)))
1337+
enable_segue, NULL)))
13061338
return false;
13071339

13081340
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
@@ -1392,7 +1424,7 @@ aot_compile_op_atomic_cmpxchg(AOTCompContext *comp_ctx,
13921424
}
13931425

13941426
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
1395-
enable_segue)))
1427+
enable_segue, NULL)))
13961428
return false;
13971429

13981430
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
@@ -1505,7 +1537,7 @@ aot_compile_op_atomic_wait(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
15051537
CHECK_LLVM_CONST(is_wait64);
15061538

15071539
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
1508-
false)))
1540+
false, NULL)))
15091541
return false;
15101542

15111543
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))
@@ -1579,7 +1611,7 @@ aot_compiler_op_atomic_notify(AOTCompContext *comp_ctx,
15791611
POP_I32(count);
15801612

15811613
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes,
1582-
false)))
1614+
false, NULL)))
15831615
return false;
15841616

15851617
if (!check_memory_alignment(comp_ctx, func_ctx, maddr, align))

core/iwasm/compilation/aot_emit_memory.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ aot_compile_op_f64_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
5353

5454
LLVMValueRef
5555
aot_check_memory_overflow(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx,
56-
mem_offset_t offset, uint32 bytes, bool enable_segue);
56+
mem_offset_t offset, uint32 bytes, bool enable_segue,
57+
unsigned int *alignp);
5758

5859
bool
5960
aot_compile_op_memory_size(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx);

core/iwasm/compilation/simd/simd_load_store.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ simd_load(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align,
1919
LLVMValueRef maddr, data;
2020

2121
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
22-
data_length, enable_segue))) {
22+
data_length, enable_segue, NULL))) {
2323
HANDLE_FAILURE("aot_check_memory_overflow");
2424
return NULL;
2525
}
@@ -287,7 +287,7 @@ simd_store(AOTCompContext *comp_ctx, AOTFuncContext *func_ctx, uint32 align,
287287
LLVMValueRef maddr, result;
288288

289289
if (!(maddr = aot_check_memory_overflow(comp_ctx, func_ctx, offset,
290-
data_length, enable_segue)))
290+
data_length, enable_segue, NULL)))
291291
return false;
292292

293293
if (!(maddr = LLVMBuildBitCast(comp_ctx->builder, maddr, value_ptr_type,

tests/unit/compilation/aot_emit_memory_test.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ TEST_F(compilation_aot_emit_memory_test, aot_check_memory_overflow)
100100

101101
for (uint32 i = 0; i < DEFAULT_CYCLE_TIMES; i++) {
102102
offset = (1 + (rand() % (DEFAULT_MAX_RAND_NUM - 1 + 1)));
103-
aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, false);
103+
aot_check_memory_overflow(comp_ctx, func_ctx, offset, bytes, false,
104+
NULL);
104105
}
105106
}
106107

0 commit comments

Comments
 (0)