Skip to content

Commit

Permalink
Use SIMD instructions to update pointers.
Browse files Browse the repository at this point in the history
Signed-off-by: Nicola Torracca <[email protected]>
  • Loading branch information
Shark64 authored and pablodelara committed Nov 6, 2024
1 parent d3a248d commit b724ea1
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 70 deletions.
15 changes: 5 additions & 10 deletions sha1_mb/sha1_mb_x16_avx512.asm
Original file line number Diff line number Diff line change
Expand Up @@ -490,16 +490,11 @@ lastLoop:
vpaddd E,E,EE

;; update into data pointers
%assign I 0
%rep 8
mov inp0, [IN + (2*I)*8]
mov inp1, [IN + (2*I +1)*8]
add inp0, IDX
add inp1, IDX
mov [IN + (2*I)*8], inp0
mov [IN + (2*I+1)*8], inp1
%assign I (I+1)
%endrep
vpbroadcastq TMP1, IDX
vpaddq TMP0, TMP1, [IN]
vpaddq TMP1, TMP1, [IN+64]
vmovdqu64 [IN], TMP0
vmovdqu64 [IN+64], TMP1

; Write out digest
; Do we need to untranspose digests???
Expand Down
23 changes: 7 additions & 16 deletions sha1_mb/sha1_mb_x8_avx2.asm
Original file line number Diff line number Diff line change
Expand Up @@ -475,22 +475,13 @@ lloop:
vmovdqu [arg1 + 4*32], E

;; update input pointers
add inp0, IDX
add inp1, IDX
add inp2, IDX
add inp3, IDX
add inp4, IDX
add inp5, IDX
add inp6, IDX
add inp7, IDX
mov [arg1+_data_ptr+0*8], inp0
mov [arg1+_data_ptr+1*8], inp1
mov [arg1+_data_ptr+2*8], inp2
mov [arg1+_data_ptr+3*8], inp3
mov [arg1+_data_ptr+4*8], inp4
mov [arg1+_data_ptr+5*8], inp5
mov [arg1+_data_ptr+6*8], inp6
mov [arg1+_data_ptr+7*8], inp7
vmovq xmm1, IDX
vpbroadcastq ymm1, xmm1
lea IDX, [arg1+_data_ptr]
vpaddq ymm0, ymm1, [IDX]
vpaddq ymm1, ymm1, [IDX+32]
vmovdqu [IDX], ymm0
vmovdqu [IDX+32], ymm1

;;;;;;;;;;;;;;;;
;; Postamble
Expand Down
15 changes: 5 additions & 10 deletions sha256_mb/sha256_mb_x16_avx512.asm
Original file line number Diff line number Diff line change
Expand Up @@ -607,16 +607,11 @@ lastLoop:
vpaddd H, H, [rsp + _DIGEST_SAVE + 64*7]

;; update into data pointers
%assign I 0
%rep 8
mov inp0, [IN + (2*I)*8]
mov inp1, [IN + (2*I +1)*8]
add inp0, IDX
add inp1, IDX
mov [IN + (2*I)*8], inp0
mov [IN + (2*I+1)*8], inp1
%assign I (I+1)
%endrep
vpbroadcastq TMP1, IDX
vpaddq TMP0, TMP1, [IN]
vpaddq TMP1, TMP1, [IN+64]
vmovdqu64 [IN], TMP0
vmovdqu64 [IN+64], TMP1

; Write out digest
; Do we need to untranspose digests???
Expand Down
23 changes: 7 additions & 16 deletions sha256_mb/sha256_mb_x8_avx2.asm
Original file line number Diff line number Diff line change
Expand Up @@ -463,22 +463,13 @@ Lrounds_16_xx:
vmovdqu [STATE + 7*SHA256_DIGEST_ROW_SIZE],h

; update input pointers
add inp0, IDX
mov [STATE + _args_data_ptr + 0*8], inp0
add inp1, IDX
mov [STATE + _args_data_ptr + 1*8], inp1
add inp2, IDX
mov [STATE + _args_data_ptr + 2*8], inp2
add inp3, IDX
mov [STATE + _args_data_ptr + 3*8], inp3
add inp4, IDX
mov [STATE + _args_data_ptr + 4*8], inp4
add inp5, IDX
mov [STATE + _args_data_ptr + 5*8], inp5
add inp6, IDX
mov [STATE + _args_data_ptr + 6*8], inp6
add inp7, IDX
mov [STATE + _args_data_ptr + 7*8], inp7
vmovq XWORD(TMP0), IDX
vpbroadcastq TMP1, XWORD(TMP0)
lea IDX, [STATE + _args_data_ptr]
vpaddq TMP0, TMP1, [IDX]
vpaddq TMP1, TMP1, [IDX + 32]
vmovdqu [IDX], TMP0
vmovdqu [IDX+32], TMP1

;;;;;;;;;;;;;;;;
;; Postamble
Expand Down
13 changes: 5 additions & 8 deletions sha512_mb/sha512_mb_x4_avx2.asm
Original file line number Diff line number Diff line change
Expand Up @@ -379,14 +379,11 @@ Lrounds_16_xx:
vmovdqu [STATE+ 7*SHA512_DIGEST_ROW_SIZE ],h

;; update input data pointers
add inp0, IDX
mov [STATE + _data_ptr_sha512 + 0*PTR_SZ], inp0
add inp1, IDX
mov [STATE + _data_ptr_sha512 + 1*PTR_SZ], inp1
add inp2, IDX
mov [STATE + _data_ptr_sha512 + 2*PTR_SZ], inp2
add inp3, IDX
mov [STATE + _data_ptr_sha512 + 3*PTR_SZ], inp3
vmovq xmm0, IDX
lea IDX, [STATE + _data_ptr_sha512]
vpbroadcastq ymm0, xmm0
vpaddq ymm0, ymm0, [IDX]
vmovdqu [IDX], ymm0

;;;;;;;;;;;;;;;;
;; Postamble
Expand Down
13 changes: 3 additions & 10 deletions sha512_mb/sha512_mb_x8_avx512.asm
Original file line number Diff line number Diff line change
Expand Up @@ -494,16 +494,9 @@ lastLoop:
vpaddq H, H, [rsp + _DIGEST_SAVE + 64*7]

;; update into data pointers
%assign I 0
%rep 4
mov inp0, [IN + (2*I)*8]
mov inp1, [IN + (2*I +1)*8]
add inp0, IDX
add inp1, IDX
mov [IN + (2*I)*8], inp0
mov [IN + (2*I+1)*8], inp1
%assign I (I+1)
%endrep
vpbroadcastq TMP0, IDX
vpaddq TMP0, TMP0, [IN]
vmovdqu64 [IN], TMP0

VMOVDQ32 [DIGEST + 0*8*8], A
VMOVDQ32 [DIGEST + 1*8*8], B
Expand Down

0 comments on commit b724ea1

Please sign in to comment.