Skip to content

Commit

Permalink
Avoid mixing SSE and AVX in XTS-mode AVX512 implementation (#2140)
Browse files Browse the repository at this point in the history
A bimodal performance occurred in the XTS encrypt AVX512 implementation. We have observed more than 80% drop in performance. This is caused by mixing SSE and AVX instructions in the AVX512 implementation. For a subset of input lengths, the code path contained a single move movdqa, an SSE instruction. Use vmovdqa instead.
  • Loading branch information
torben-hansen authored Jan 27, 2025
1 parent 81f138a commit 37c2b5e
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion crypto/fipsmodule/aes/asm/aesni-xts-avx512.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1598,7 +1598,7 @@
vmovdqu8 %zmm1,($output)
vmovdqu %xmm2,0x40($output)
add \$0x50,$output
movdqa %xmm2,%xmm8
vmovdqa %xmm2,%xmm8
vextracti32x4 \$0x1,%zmm10,%xmm0
and \$0xf,$length
je .L_ret_${rndsuffix}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ aes_hw_xts_encrypt_avx512:
vmovdqu8 %zmm1,(%rsi)
vmovdqu %xmm2,64(%rsi)
addq $0x50,%rsi
movdqa %xmm2,%xmm8
vmovdqa %xmm2,%xmm8
vextracti32x4 $0x1,%zmm10,%xmm0
andq $0xf,%rdx
je .L_ret_hEgxyDlCngwrfFe
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ L$_remaining_num_blocks_is_5_hEgxyDlCngwrfFe:
vmovdqu8 %zmm1,(%rsi)
vmovdqu %xmm2,64(%rsi)
addq $0x50,%rsi
movdqa %xmm2,%xmm8
vmovdqa %xmm2,%xmm8
vextracti32x4 $0x1,%zmm10,%xmm0
andq $0xf,%rdx
je L$_ret_hEgxyDlCngwrfFe
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ $L$_remaining_num_blocks_is_5_hEgxyDlCngwrfFe:
vmovdqu8 ZMMWORD[rdx],zmm1
vmovdqu XMMWORD[64+rdx],xmm2
add rdx,0x50
movdqa xmm8,xmm2
vmovdqa xmm8,xmm2
vextracti32x4 xmm0,zmm10,0x1
and r8,0xf
je NEAR $L$_ret_hEgxyDlCngwrfFe
Expand Down

0 comments on commit 37c2b5e

Please sign in to comment.