Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions build-scripts/config_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,15 @@ if (NOT DEFINED WAMR_BUILD_EXCE_HANDLING)
set (WAMR_BUILD_EXCE_HANDLING 0)
endif ()

if (NOT DEFINED WAMR_BUILD_RELAXED_SIMD)
# Relaxed-SIMD (wasm 2.0 extension) — off by default, mirrors the
# dormant `WASM_FEATURE_RELAXED_SIMD` bit at `aot_runtime.h:32`.
# Enable via `-DWAMR_BUILD_RELAXED_SIMD=1` at cmake time; the
# cmake block in this file then defines `WASM_ENABLE_RELAXED_SIMD`
# for the C compiler.
set (WAMR_BUILD_RELAXED_SIMD 0)
endif ()

if (NOT DEFINED WAMR_BUILD_GC)
set (WAMR_BUILD_GC 0)
endif ()
Expand Down Expand Up @@ -470,6 +479,49 @@ if (WAMR_BUILD_SIMD EQUAL 1)
endif ()
add_definitions(-DWASM_ENABLE_SIMD=${SIMD_ENABLED})
endif ()
if (WAMR_BUILD_RELAXED_SIMD EQUAL 1)
# Relaxed-SIMD is a strict superset of SIMD — fail fast if the
# caller forgot to also turn on the base feature, otherwise the
# interpreter sees a relaxed sub-opcode it can dispatch but the
# surrounding SIMD machinery (frame_lp v128 cells, simde
# intrinsics) is compiled out and we'd link against undefined
# symbols.
if (NOT WAMR_BUILD_SIMD EQUAL 1)
message (FATAL_ERROR
"WAMR_BUILD_RELAXED_SIMD=1 requires WAMR_BUILD_SIMD=1")
endif ()
# Scope is fast-interp only for now. The shared loader
# `prepare_bytecode` accepts the new opcodes when this flag is
# set, but the AOT / JIT / wamrc compilation paths in
# `core/iwasm/compilation/aot_compiler.c:1494, 2463, 2639, 2799`
# all truncate the SIMD sub-opcode to `uint8` (`opcode =
# (uint8)opcode1`). Sub-opcodes 0x100..0x113 would silently
# alias into `SIMD_v128_load` / `SIMD_v128_load8x8_s` / ...
# causing garbage memarg reads at codegen time. Reject the
# combination at configure time rather than silently
# mis-compile.
if (NOT WAMR_BUILD_FAST_INTERP EQUAL 1)
message (FATAL_ERROR
"WAMR_BUILD_RELAXED_SIMD=1 requires WAMR_BUILD_FAST_INTERP=1 "
"(the relaxed-SIMD dispatch + SIMDe glue lives only in the "
"fast-interp path; classic-interp doesn't ship a SIMD switch)")
endif ()
if (WAMR_BUILD_AOT EQUAL 1 OR WAMR_BUILD_JIT EQUAL 1
OR WAMR_BUILD_WAMR_COMPILER EQUAL 1
OR WAMR_BUILD_FAST_JIT EQUAL 1)
message (FATAL_ERROR
"WAMR_BUILD_RELAXED_SIMD=1 cannot be combined with "
"WAMR_BUILD_AOT / WAMR_BUILD_JIT / WAMR_BUILD_FAST_JIT / "
"WAMR_BUILD_WAMR_COMPILER today — those pipelines truncate "
"the SIMD sub-opcode to uint8 (see aot_compiler.c) and "
"would silently mis-compile relaxed-SIMD opcodes "
"0x100..0x113 as legacy v128_load/store variants. Build "
"fast-interp-only to use relaxed-SIMD until the AOT/JIT "
"pipelines learn the wider sub-opcode range.")
endif ()
add_definitions (-DWASM_ENABLE_RELAXED_SIMD=1)
message (" Relaxed SIMD enabled")
endif ()
if (WAMR_BUILD_AOT_STACK_FRAME EQUAL 1)
add_definitions (-DWASM_ENABLE_AOT_STACK_FRAME=1)
message (" AOT stack frame enabled")
Expand Down Expand Up @@ -809,6 +861,7 @@ message (
" \"Multiple Memories\" via WAMR_BUILD_MULTI_MEMORY: ${WAMR_BUILD_MULTI_MEMORY}\n"
" \"Reference Types\" via WAMR_BUILD_REF_TYPES: ${WAMR_BUILD_REF_TYPES}\n"
" \"Reference-Typed Strings\" via WAMR_BUILD_STRINGREF: ${WAMR_BUILD_STRINGREF}\n"
" \"Relaxed SIMD\" via WAMR_BUILD_RELAXED_SIMD: ${WAMR_BUILD_RELAXED_SIMD}\n"
" \"Tail Call\" via WAMR_BUILD_TAIL_CALL: ${WAMR_BUILD_TAIL_CALL}\n"
" \"Threads\" via WAMR_BUILD_SHARED_MEMORY: ${WAMR_BUILD_SHARED_MEMORY}\n"
" \"Typed Function References\" via WAMR_BUILD_GC: ${WAMR_BUILD_GC}\n"
Expand Down
11 changes: 11 additions & 0 deletions core/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,17 @@ unless used elsewhere */
#define WASM_ENABLE_SIMDE 0
#endif

/* Disable relaxed-SIMD (wasm 2.0 extension — 20 new opcodes at
* 0x100..0x113 under the existing 0xfd prefix) unless manually
* enabled. The fast-interp path under `WAMR_BUILD_RELAXED_SIMD=1`
* widens the SIMD sub-opcode IR encoding from 1 byte to 2 bytes
* and wires SIMDe relaxed intrinsics into the SIMD-prefix switch;
* AOT/JIT codegen does NOT yet recognize the wider range, so the
* cmake gate forbids enabling this flag with AOT/JIT/WAMR_COMPILER. */
#ifndef WASM_ENABLE_RELAXED_SIMD
#define WASM_ENABLE_RELAXED_SIMD 0
#endif

/* GC performance profiling */
#ifndef WASM_ENABLE_GC_PERF_PROFILING
#define WASM_ENABLE_GC_PERF_PROFILING 0
Expand Down
314 changes: 303 additions & 11 deletions core/iwasm/interpreter/wasm_interp_fast.c

Large diffs are not rendered by default.

119 changes: 115 additions & 4 deletions core/iwasm/interpreter/wasm_loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -8275,13 +8275,15 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache,
uint32 opcode1;

read_leb_uint32(p, p_end, opcode1);
/* opcode1 was checked in wasm_loader_prepare_bytecode and
is no larger than UINT8_MAX */
opcode = (uint8)opcode1;
/* opcode1 was checked in wasm_loader_prepare_bytecode.
* Legacy SIMD opcodes fit in a uint8 (0x00..0xff);
* relaxed-SIMD opcodes (gated below) span 0x100..0x113.
* Switch on the uint32 directly so both ranges are
* reachable by their enum names. */

/* follow the order of enum WASMSimdEXTOpcode in wasm_opcode.h
*/
switch (opcode) {
switch (opcode1) {
case SIMD_v128_load:
case SIMD_v128_load8x8_s:
case SIMD_v128_load8x8_u:
Expand Down Expand Up @@ -8351,6 +8353,40 @@ wasm_loader_find_block_addr(WASMExecEnv *exec_env, BlockAddr *block_addr_cache,
skip_leb_mem_offset(p, p_end);
break;

#if WASM_ENABLE_RELAXED_SIMD != 0
/* Relaxed-SIMD opcodes carry no immediates beyond
* the LEB-encoded sub-opcode already consumed
* above — every operand is a stack v128 (and one
* laneselect / madd takes 3 v128s, encoded
* implicitly via the stack). Fall through to
* `break` along with the no-immediate legacy
* default below. Listed explicitly here so a
* future SIMD-spec assignment to 0x100..0x113
* doesn't silently reroute through the default
* branch. */
case SIMD_i8x16_relaxed_swizzle:
case SIMD_i32x4_relaxed_trunc_f32x4_s:
case SIMD_i32x4_relaxed_trunc_f32x4_u:
case SIMD_i32x4_relaxed_trunc_f64x2_s_zero:
case SIMD_i32x4_relaxed_trunc_f64x2_u_zero:
case SIMD_f32x4_relaxed_madd:
case SIMD_f32x4_relaxed_nmadd:
case SIMD_f64x2_relaxed_madd:
case SIMD_f64x2_relaxed_nmadd:
case SIMD_i8x16_relaxed_laneselect:
case SIMD_i16x8_relaxed_laneselect:
case SIMD_i32x4_relaxed_laneselect:
case SIMD_i64x2_relaxed_laneselect:
case SIMD_f32x4_relaxed_min:
case SIMD_f32x4_relaxed_max:
case SIMD_f64x2_relaxed_min:
case SIMD_f64x2_relaxed_max:
case SIMD_i16x8_relaxed_q15mulr_s:
case SIMD_i16x8_relaxed_dot_i8x16_i7x16_s:
case SIMD_i32x4_relaxed_dot_i8x16_i7x16_add_s:
break;
#endif /* WASM_ENABLE_RELAXED_SIMD */

default:
/*
* since latest SIMD specific used almost every value
Expand Down Expand Up @@ -16178,7 +16214,26 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
pb_read_leb_uint32(p, p_end, opcode1);

#if WASM_ENABLE_FAST_INTERP != 0
#if WASM_ENABLE_RELAXED_SIMD != 0
/* Relaxed-SIMD sub-opcodes span 0x100..0x113, past
* the byte that the legacy emit uses. Widen the
* IR sub-opcode to a 2-byte little-endian uint16
* for every SIMD op so dispatch can read a single
* stride and switch over the full 0x000..0x113
* range. `wasm_loader_emit_int16` writes two
* consecutive bytes via STORE_U16 (no per-byte
* padding even on non-unaligned-access platforms),
* matching the `frame_ip[0] | (frame_ip[1] << 8)`
* decode in `HANDLE_OP(WASM_OP_SIMD_PREFIX)`. IR
* cost vs the legacy 1-byte emit: +1 byte per SIMD
* op on platforms with unaligned access, identical
* on platforms without (the legacy emit already
* burned a padding byte per opcode). */
wasm_loader_emit_int16(loader_ctx, (int16)opcode1);
LOG_OP("%d\t", opcode1);
#else
emit_byte(loader_ctx, opcode1);
#endif
#endif

/* follow the order of enum WASMSimdEXTOpcode in wasm_opcode.h
Expand Down Expand Up @@ -16853,6 +16908,62 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func,
break;
}

#if WASM_ENABLE_RELAXED_SIMD != 0
/* Relaxed-SIMD — type signatures from
* https://github.com/WebAssembly/relaxed-simd/blob/
* main/proposals/relaxed-simd/Overview.md.
*
* unary (1 v128 -> 1 v128): all four trunc variants.
* binary (2 v128 -> 1 v128): swizzle, min/max,
* q15mulr, dot_i8x16_i7x16_s.
* ternary (3 v128 -> 1 v128): madd, nmadd,
* laneselect, dot_i8x16_i7x16_add_s.
*
* The 3-input shape is encoded as POP_V128 (one
* extra v128) + POP2_AND_PUSH (the standard
* 2-pop-1-push) — same pattern bitselect uses
* above so the loader's stack tracker doesn't
* need a new macro. */
case SIMD_i32x4_relaxed_trunc_f32x4_s:
case SIMD_i32x4_relaxed_trunc_f32x4_u:
case SIMD_i32x4_relaxed_trunc_f64x2_s_zero:
case SIMD_i32x4_relaxed_trunc_f64x2_u_zero:
{
POP_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}

case SIMD_i8x16_relaxed_swizzle:
case SIMD_f32x4_relaxed_min:
case SIMD_f32x4_relaxed_max:
case SIMD_f64x2_relaxed_min:
case SIMD_f64x2_relaxed_max:
case SIMD_i16x8_relaxed_q15mulr_s:
case SIMD_i16x8_relaxed_dot_i8x16_i7x16_s:
{
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}

case SIMD_f32x4_relaxed_madd:
case SIMD_f32x4_relaxed_nmadd:
case SIMD_f64x2_relaxed_madd:
case SIMD_f64x2_relaxed_nmadd:
case SIMD_i8x16_relaxed_laneselect:
case SIMD_i16x8_relaxed_laneselect:
case SIMD_i32x4_relaxed_laneselect:
case SIMD_i64x2_relaxed_laneselect:
case SIMD_i32x4_relaxed_dot_i8x16_i7x16_add_s:
{
/* Three v128 inputs: extra POP_V128 first,
* then standard 2-pop-1-push. Same shape as
* SIMD_v128_bitselect above. */
POP_V128();
POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128);
break;
}
#endif /* WASM_ENABLE_RELAXED_SIMD */

default:
{
if (error_buf != NULL) {
Expand Down
32 changes: 32 additions & 0 deletions core/iwasm/interpreter/wasm_opcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,38 @@ typedef enum WASMSimdEXTOpcode {
SIMD_i32x4_trunc_sat_f64x2_u_zero = 0xfd,
SIMD_f64x2_convert_low_i32x4_s = 0xfe,
SIMD_f64x2_convert_low_i32x4_u = 0xff,

#if WASM_ENABLE_RELAXED_SIMD != 0
/* Relaxed-SIMD proposal — finalized as a wasm 2.0 extension.
* The spec uses the same `0xfd` SIMD prefix and reserves
* sub-opcodes 0x100..0x113. Listing the constants here lets
* the loader case-label them directly; the IR encoder/decoder
* widens the SIMD sub-opcode from 1 byte to 2 bytes when this
* macro is set (see emit / GET_OPCODE in wasm_loader.c and
* wasm_interp_fast.c). When WAMR_BUILD_RELAXED_SIMD=0 these
* constants disappear and the SIMD IR / dispatch is
* byte-identical to the legacy-SIMD-only build. */
SIMD_i8x16_relaxed_swizzle = 0x100,
SIMD_i32x4_relaxed_trunc_f32x4_s = 0x101,
SIMD_i32x4_relaxed_trunc_f32x4_u = 0x102,
SIMD_i32x4_relaxed_trunc_f64x2_s_zero = 0x103,
SIMD_i32x4_relaxed_trunc_f64x2_u_zero = 0x104,
SIMD_f32x4_relaxed_madd = 0x105,
SIMD_f32x4_relaxed_nmadd = 0x106,
SIMD_f64x2_relaxed_madd = 0x107,
SIMD_f64x2_relaxed_nmadd = 0x108,
SIMD_i8x16_relaxed_laneselect = 0x109,
SIMD_i16x8_relaxed_laneselect = 0x10a,
SIMD_i32x4_relaxed_laneselect = 0x10b,
SIMD_i64x2_relaxed_laneselect = 0x10c,
SIMD_f32x4_relaxed_min = 0x10d,
SIMD_f32x4_relaxed_max = 0x10e,
SIMD_f64x2_relaxed_min = 0x10f,
SIMD_f64x2_relaxed_max = 0x110,
SIMD_i16x8_relaxed_q15mulr_s = 0x111,
SIMD_i16x8_relaxed_dot_i8x16_i7x16_s = 0x112,
SIMD_i32x4_relaxed_dot_i8x16_i7x16_add_s = 0x113,
#endif /* WASM_ENABLE_RELAXED_SIMD */
} WASMSimdEXTOpcode;

typedef enum WASMAtomicEXTOpcode {
Expand Down
1 change: 1 addition & 0 deletions tests/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ add_subdirectory(linux-perf)
add_subdirectory(gc)
add_subdirectory(unsupported-features)
add_subdirectory(exception-handling)
add_subdirectory(relaxed-simd)
add_subdirectory(running-modes)
add_subdirectory(mem-alloc)

Expand Down
42 changes: 42 additions & 0 deletions tests/unit/relaxed-simd/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright (C) 2026 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

cmake_minimum_required(VERSION 3.14)

project (test-relaxed-simd)

add_definitions (-DRUN_ON_LINUX)

add_definitions (-Dattr_container_malloc=malloc)
add_definitions (-Dattr_container_free=free)

set (WAMR_BUILD_AOT 0)
set (WAMR_BUILD_INTERP 1)
set (WAMR_BUILD_FAST_INTERP 1)
set (WAMR_BUILD_JIT 0)
set (WAMR_BUILD_LIBC_WASI 0)
set (WAMR_BUILD_APP_FRAMEWORK 0)
set (WAMR_BUILD_SIMD 1)
set (WAMR_BUILD_RELAXED_SIMD 1)
set (WAMR_BUILD_BULK_MEMORY 1)
set (WAMR_BUILD_REF_TYPES 1)

include (../unit_common.cmake)

include_directories (${CMAKE_CURRENT_SOURCE_DIR})
include_directories (${IWASM_DIR}/interpreter)

file (GLOB_RECURSE source_all ${CMAKE_CURRENT_SOURCE_DIR}/*.cc)

set (UNIT_SOURCE ${source_all})

set (unit_test_sources
${UNIT_SOURCE}
${WAMR_RUNTIME_LIB_SOURCE}
)

add_executable (relaxed_simd_test ${unit_test_sources})

target_link_libraries (relaxed_simd_test gtest_main)

gtest_discover_tests(relaxed_simd_test)
Loading
Loading