Skip to content

Commit f16232f

Browse files
authored
fix: sparse inc mast less than uint32 max (milvus-io#38250)
relate: milvus-io#35853 Signed-off-by: aoiasd <[email protected]>
1 parent e1aebe8 commit f16232f

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

internal/util/function/bm25_function.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ func (v *BM25FunctionRunner) run(data []string, dst []map[uint32]float32) error
9797
for tokenStream.Advance() {
9898
token := tokenStream.Token()
9999
// TODO More Hash Option
100-
hash := typeutil.HashString2Uint32(token)
100+
hash := typeutil.HashString2LessUint32(token)
101101
embeddingMap[hash] += 1
102102
}
103103
dst[i] = embeddingMap

pkg/util/typeutil/hash.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package typeutil
1919
import (
2020
"fmt"
2121
"hash/crc32"
22+
"math"
2223
"strconv"
2324
"strings"
2425
"unsafe"
@@ -75,6 +76,16 @@ func HashString2Uint32(v string) uint32 {
7576
return crc32.ChecksumIEEE([]byte(subString))
7677
}
7778

79+
// HashString2LessUint32 hashing a string to uint32 but less than uint32 max
80+
func HashString2LessUint32(v string) uint32 {
81+
subString := v
82+
if len(v) > substringLengthForCRC {
83+
subString = v[:substringLengthForCRC]
84+
}
85+
86+
return crc32.ChecksumIEEE([]byte(subString)) % math.MaxUint32
87+
}
88+
7889
// HashPK2Channels hash primary keys to channels
7990
func HashPK2Channels(primaryKeys *schemapb.IDs, shardNames []string) []uint32 {
8091
numShard := uint32(len(shardNames))

0 commit comments

Comments
 (0)