SIMD math library for game developers
Tested on x86_64 and AArch64.
Provides ~140 optimized routines and ~70 extensive tests.
Can be used with any graphics API.
Documentation can be found here.
Benchamrks can be found here.
An intro article can be found here.
How to get dependencies
- specific version:
zig fetch --save<REPLACE ME>.tar.gz
- main branch version:
zig fetch --save git+
Example build.zig
pub fn build(b: *std.Build) void {
const exe = b.addExecutable(.{ ... });
const zmath = b.dependency("zmath", .{});
exe.root_module.addImport("zmath", zmath.module("root"));
Now in your code you may import and use zmath:
const zm = @import("zmath");
pub fn main() !void {
// OpenGL/Vulkan example
const object_to_world = zm.rotationY(..);
const world_to_view = zm.lookAtRh(
zm.f32x4(3.0, 3.0, 3.0, 1.0), // eye position
zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
// `perspectiveFovRhGl` produces Z values in [-1.0, 1.0] range (Vulkan app should use `perspectiveFovRh`)
const view_to_clip = zm.perspectiveFovRhGl(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
const object_to_view = zm.mul(object_to_world, world_to_view);
const object_to_clip = zm.mul(object_to_view, view_to_clip);
// Transposition is needed because GLSL uses column-major matrices by default
gl.uniformMatrix4fv(0, 1, gl.TRUE, zm.arrNPtr(&object_to_clip));
// In GLSL: gl_Position = vec4(in_position, 1.0) * object_to_clip;
// DirectX example
const object_to_world = zm.rotationY(..);
const world_to_view = zm.lookAtLh(
zm.f32x4(3.0, 3.0, -3.0, 1.0), // eye position
zm.f32x4(0.0, 0.0, 0.0, 1.0), // focus point
zm.f32x4(0.0, 1.0, 0.0, 0.0), // up direction ('w' coord is zero because this is a vector not a point)
const view_to_clip = zm.perspectiveFovLh(0.25 * math.pi, aspect_ratio, 0.1, 20.0);
const object_to_view = zm.mul(object_to_world, world_to_view);
const object_to_clip = zm.mul(object_to_view, view_to_clip);
// Transposition is needed because HLSL uses column-major matrices by default
const mem = allocateUploadMemory(...);
zm.storeMat(mem, zm.transpose(object_to_clip));
// In HLSL: out_position_sv = mul(float4(in_position, 1.0), object_to_clip);
// 'WASD' camera movement example
const speed = zm.f32x4s(10.0);
const delta_time = zm.f32x4s(demo.frame_stats.delta_time);
const transform = zm.mul(zm.rotationX(, zm.rotationY(;
var forward = zm.normalize3(zm.mul(zm.f32x4(0.0, 0.0, 1.0, 0.0), transform));
zm.storeArr3(&, forward);
const right = speed * delta_time * zm.normalize3(zm.cross3(zm.f32x4(0.0, 1.0, 0.0, 0.0), forward));
forward = speed * delta_time * forward;
var cam_pos = zm.loadArr3(;
if (keyDown('W')) {
cam_pos += forward;
} else if (keyDown('S')) {
cam_pos -= forward;
if (keyDown('D')) {
cam_pos += right;
} else if (keyDown('A')) {
cam_pos -= right;
zm.storeArr3(&, cam_pos);
// SIMD wave equation solver example (works with vector width 4, 8 and 16)
// 'T' can be F32x4, F32x8 or F32x16
var z_index: i32 = 0;
while (z_index < grid_size) : (z_index += 1) {
const z = scale * @intToFloat(f32, z_index - grid_size / 2);
const vz = zm.splat(T, z);
var x_index: i32 = 0;
while (x_index < grid_size) : (x_index += zm.veclen(T)) {
const x = scale * @intToFloat(f32, x_index - grid_size / 2);
const vx = zm.splat(T, x) + voffset * zm.splat(T, scale);
const d = zm.sqrt(vx * vx + vz * vz);
const vy = zm.sin(d - vtime);
const index = @intCast(usize, x_index + z_index * grid_size);[index..], vx, 0);[index..], vy, 0);[index..], vz, 0);