-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathmd5x16.go
151 lines (128 loc) · 3.54 KB
/
md5x16.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// Package md5x16 implements 16-lane parallel MD5 with AVX-512 instructions.
package md5x16
import (
"encoding/binary"
"errors"
"math"
"reflect"
"unsafe"
)
//go:generate go run asm.go -out md5x16.s -stubs stub.go
// Size of a MD5 checksum in bytes.
const Size = 16
// BlockSize is the block size of MD5 in bytes.
const BlockSize = 64
// Lanes is the maximum number of parallel MD5 computations.
const Lanes = 16
// Validate checks whether the preconditions required by Sum() are met.
func Validate(data [Lanes][]byte) error {
_, err := config(data)
return err
}
// Sum returns the MD5 checksum of up to Lanes data of the same length.
//
// Non-nil inputs must all have the same length, and occupy a memory span not
// exceeding 32 bits.
func Sum(data [Lanes][]byte) [Lanes][Size]byte {
// Determine lane configuration.
cfg, err := config(data)
if err != nil {
panic(err)
}
// Initialize hash.
var h [4][Lanes]uint32
for _, l := range cfg.active {
h[0][l] = 0x67452301
h[1][l] = 0xefcdab89
h[2][l] = 0x98badcfe
h[3][l] = 0x10325476
}
// Consume full blocks.
base, n := cfg.base, cfg.n
for ; n >= BlockSize; n -= BlockSize {
block(&h, base, &cfg.offsets, cfg.mask)
base += BlockSize
}
// Final block.
var last [Lanes][]byte
var buffer [Lanes * BlockSize]byte
base = dataptr(buffer[:])
var offsets [Lanes]uint32
for _, l := range cfg.active {
last[l] = buffer[l*BlockSize : (l+1)*BlockSize]
offsets[l] = uint32(l * BlockSize)
copy(last[l], data[l][cfg.n-n:])
last[l][n] = 0x80
}
if n >= 56 {
block(&h, base, &offsets, cfg.mask)
for i := range buffer {
buffer[i] = 0
}
}
for _, l := range cfg.active {
binary.LittleEndian.PutUint64(last[l][56:], uint64(8*cfg.n))
}
block(&h, base, &offsets, cfg.mask)
// Write into byte array.
var digest [Lanes][Size]byte
for _, l := range cfg.active {
for i := 0; i < 4; i++ {
binary.LittleEndian.PutUint32(digest[l][4*i:], h[i][l])
}
}
return digest
}
// lanes represents the configuration of the 16 data lanes of an MD5
// computation.
type lanes struct {
n int // length of all active (non-nil) lanes
active []int // indexes of active lanes
mask uint16 // mask of active lanes
base uintptr // base pointer
offsets [Lanes]uint32 // offset of data lanes relative to base
}
// config determines the lane configuration for the provided data. Returns an
// error if there are no active lanes, there's a length mismatch among active
// lanes, or the data spans a memory region larger than 32-bits.
func config(data [Lanes][]byte) (*lanes, error) {
cfg := &lanes{}
// Populate active lanes, and ensure they're all the same length.
for l, d := range data {
if d != nil {
cfg.active = append(cfg.active, l)
}
}
if len(cfg.active) == 0 {
return nil, errors.New("no active lanes")
}
cfg.n = len(data[cfg.active[0]])
for _, l := range cfg.active {
cfg.mask |= 1 << l
if len(data[l]) != cfg.n {
return nil, errors.New("length mismatch")
}
}
// Compute base pointer and lane offsets.
cfg.base = ^uintptr(0)
for _, l := range cfg.active {
ptr := dataptr(data[l])
if ptr < cfg.base {
cfg.base = ptr
}
}
for _, l := range cfg.active {
ptr := dataptr(data[l])
offset := ptr - cfg.base
if offset > math.MaxUint32 {
return nil, errors.New("input data exceed 32-bit memory region")
}
cfg.offsets[l] = uint32(offset)
}
return cfg, nil
}
// dataptr extracts the data pointer from the given slice.
func dataptr(data []byte) uintptr {
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&data))
return hdr.Data
}