Open
Description
I tried this code:
#[inline(never)]
pub fn step_sum(arr: &[u32]) -> u32
{
const STEP_SIZE: usize = 8;
let mut result = 0;
for step in 0..(arr.len() / STEP_SIZE) {
result += arr[step * STEP_SIZE];
}
result
}
I expected to see this happen: the index is in the bounds, so the check is elided.
Instead, this happened:
.LBB0_4:
cmp rcx, rsi
jae .LBB0_6
add eax, dword ptr [rdi + 4*rcx]
add rcx, 8
dec rdx
jne .LBB0_4
ret
.LBB0_6:
push rax
lea rdx, [rip + .L__unnamed_1]
mov rdi, rcx
call qword ptr [rip + core::panicking::panic_bounds_check::h300eea3d2ac1c8da@GOTPCREL]
Also, that isn't even the strictest scenario where the compiler should optimize this.
Stricter example:
#[inline(never)]
pub fn step_sum(arr: &[u32]) -> u32
{
const STEP_SIZE: usize = 8;
let mut result = 0;
for step in 0..(arr.len().div_ceil(STEP_SIZE)) {
let offset = (arr.len() - 1) % STEP_SIZE;
result += arr[step * STEP_SIZE + offset];
}
result
}