Skip to content

Commit

Permalink
🔨 version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
sunhailin-Leo committed Aug 2, 2021
1 parent 6786fe5 commit 4b69c91
Show file tree
Hide file tree
Showing 17 changed files with 2,430 additions and 1 deletion.
48 changes: 48 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Dependency directories (remove the comment below to include it)
# vendor/

# Prerequisites
*.d

# Compiled Object files
*.slo
*.lo
*.o
*.obj

# Precompiled Headers
*.gch
*.pch

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Fortran module files
*.mod
*.smod

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Executables
*.exe
*.out
*.app
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "hnswlib"]
path = hnswlib
url = https://github.com/nmslib/hnswlib.git
31 changes: 31 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
CXX = c++
INCLUDES = -I.
CXXFLAGS = -pthread -std=c++0x -march=native -std=c++11 $(INCLUDES)
OBJS = hnsw_wrapper.o

opt: CXXFLAGS += -O3 -funroll-loops
opt: build

coverage: CXXFLAGS += -O0 -fno-inline -fprofile-arcs --coverage

hnsw_wrapper.o: hnsw_wrapper.h hnsw_wrapper.cc hnswlib/*.h
$(CXX) $(CXXFLAGS) -c hnsw_wrapper.cc

libhnsw.a: $(OBJS)
$(AR) rcs libhnsw.a $(OBJS)

clean:
rm -rf *.o libhnsw.a *.o *.gcno *.gcda hnsw

build: libhnsw.a
env CGO_CXXFLAGS="$(INCLUDES) -std=c++11" go build

test: build
go test
36 changes: 35 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,36 @@
# hnswlib-to-go
Hnswlib to go. Golang interface to hnswlib(https://github.com/nmslib/hnswlib)
Hnswlib to go. Golang interface to hnswlib(https://github.com/nmslib/hnswlib). This is a golang interface of [hnswlib](https://github.com/nmslib/hnswlib). For more information, please follow [hnswlib](https://github.com/nmslib/hnswlib) and [Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs.](https://arxiv.org/abs/1603.09320).
**But in this project, we make compatible hnswlib to 0.5.2.**


### Version

* version 1.0.0
* hnswlib compatible version 0.5.2.


### Build

* Linux/MacOS
* Build Golang Env
* `go mod init`
* `make`

### Usage

* When building golang program, please add `export CGO_CXXFLAGS=-std=c++11` command before `go build / run / test ...`

| argument | type | |
| -------------- | ---- | ----- |
| dim | int | vector dimension |
| M | int | see[ALGO_PARAMS.md](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) |
| efConstruction | int | see[ALGO_PARAMS.md](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) |
| randomSeed | int | random seed for hnsw |
| maxElements | int | max records in data |
| spaceType | str | |

| spaceType | distance |
| --------- |:-----------------:|
| ip | inner product |
| cosine | cosine similarity |
| l2 | l2 |
44 changes: 44 additions & 0 deletions example/demo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package main

import (
"fmt"
"math/rand"
"time"

hnswgo "github.com/sunhailin-Leo/hnswlib-to-go"
)

func randVector(dim int) []float32 {
vec := make([]float32, dim)
for j := 0; j < dim; j++ {
vec[j] = rand.Float32()
}
return vec
}

func main() {
var dim, M, ef int = 128, 32, 300
// Max elements
var maxElements uint32 = 1000
// Distance cosine
var spaceType, indexLocation string = "cosine", "hnsw_demo_index.bin"
var randomSeed int = 100
// Init new index
h := hnswgo.New(dim, M, ef, randomSeed, maxElements, spaceType)
// Insert 1000 vectors to index. Label Type is uint32
var i uint32
for ; i < maxElements; i++ {
h.AddPoint(randVector(dim), i)
}
h.Save(indexLocation)
h = hnswgo.Load(indexLocation, dim, spaceType)
// Search vector with maximum 5 NN
h.SetEf(15)
searchVector := randVector(dim)
// Count query time
startTime := time.Now().UnixNano()
labels, vectors := h.SearchKNN(searchVector, 5)
endTime := time.Now().UnixNano()
fmt.Println(endTime - startTime)
fmt.Println(labels, vectors)
}
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module github.com/sunhailin-Leo/hnswlib-to-go

go 1.15
100 changes: 100 additions & 0 deletions hnsw.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package hnswgo

// #cgo LDFLAGS: -L${SRCDIR} -lhnsw -lm
// #include <stdlib.h>
// #include "hnsw_wrapper.h"
// HNSW initHNSW(int dim, unsigned long int max_elements, int M, int ef_construction, int rand_seed, char stype);
// HNSW loadHNSW(char *location, int dim, char stype);
// void addPoint(HNSW index, float *vec, unsigned long int label);
// int searchKnn(HNSW index, float *vec, int N, unsigned long int *label, float *dist);
// void setEf(HNSW index, int ef);
import "C"
import (
"math"
"unsafe"
)

type HNSW struct {
index C.HNSW
spaceType string
dim int
normalize bool
}

func New(dim, M, efConstruction, randSeed int, maxElements uint32, spaceType string) *HNSW {
var hnsw HNSW
hnsw.dim = dim
hnsw.spaceType = spaceType
if spaceType == "ip" {
hnsw.index = C.initHNSW(C.int(dim), C.ulong(maxElements), C.int(M), C.int(efConstruction), C.int(randSeed), C.char('i'))
} else if spaceType == "cosine" {
hnsw.normalize = true
hnsw.index = C.initHNSW(C.int(dim), C.ulong(maxElements), C.int(M), C.int(efConstruction), C.int(randSeed), C.char('i'))
} else {
hnsw.index = C.initHNSW(C.int(dim), C.ulong(maxElements), C.int(M), C.int(efConstruction), C.int(randSeed), C.char('l'))
}
return &hnsw
}

func Load(location string, dim int, spaceType string) *HNSW {
var hnsw HNSW
hnsw.dim = dim
hnsw.spaceType = spaceType

pLocation := C.CString(location)
if spaceType == "ip" {
hnsw.index = C.loadHNSW(pLocation, C.int(dim), C.char('i'))
} else if spaceType == "cosine" {
hnsw.normalize = true
hnsw.index = C.loadHNSW(pLocation, C.int(dim), C.char('i'))
} else {
hnsw.index = C.loadHNSW(pLocation, C.int(dim), C.char('l'))
}
C.free(unsafe.Pointer(pLocation))
return &hnsw
}

func (h *HNSW) Save(location string) {
pLocation := C.CString(location)
C.saveHNSW(h.index, pLocation)
C.free(unsafe.Pointer(pLocation))
}

func normalizeVector(vector []float32) []float32 {
var norm float32
for i := 0; i < len(vector); i++ {
norm += vector[i] * vector[i]
}
norm = 1.0 / (float32(math.Sqrt(float64(norm))) + 1e-15)
for i := 0; i < len(vector); i++ {
vector[i] = vector[i] * norm
}
return vector
}

func (h *HNSW) AddPoint(vector []float32, label uint32) {
if h.normalize {
vector = normalizeVector(vector)
}
C.addPoint(h.index, (*C.float)(unsafe.Pointer(&vector[0])), C.ulong(label))
}

func (h *HNSW) SearchKNN(vector []float32, N int) ([]uint32, []float32) {
Clabel := make([]C.ulong, N, N)
Cdist := make([]C.float, N, N)
if h.normalize {
vector = normalizeVector(vector)
}
numResult := int(C.searchKnn(h.index, (*C.float)(unsafe.Pointer(&vector[0])), C.int(N), &Clabel[0], &Cdist[0]))
labels := make([]uint32, N)
dists := make([]float32, N)
for i := 0; i < numResult; i++ {
labels[i] = uint32(Clabel[i])
dists[i] = float32(Cdist[i])
}
return labels[:numResult], dists[:numResult]
}

func (h *HNSW) SetEf(ef int) {
C.setEf(h.index, C.int(ef))
}
59 changes: 59 additions & 0 deletions hnsw_wrapper.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
//hnsw_wrapper.cpp
#include <iostream>
#include "hnswlib/hnswlib.h"
#include "hnsw_wrapper.h"
#include <thread>
#include <atomic>

HNSW initHNSW(int dim, unsigned long int max_elements, int M, int ef_construction, int rand_seed, char stype) {
hnswlib::SpaceInterface<float> *space;
if (stype == 'i') {
space = new hnswlib::InnerProductSpace(dim);
} else {
space = new hnswlib::L2Space(dim);
}
hnswlib::HierarchicalNSW<float> *appr_alg = new hnswlib::HierarchicalNSW<float>(space, max_elements, M, ef_construction, rand_seed);
return (void*)appr_alg;
}

HNSW loadHNSW(char *location, int dim, char stype) {
hnswlib::SpaceInterface<float> *space;
if (stype == 'i') {
space = new hnswlib::InnerProductSpace(dim);
} else {
space = new hnswlib::L2Space(dim);
}
hnswlib::HierarchicalNSW<float> *appr_alg = new hnswlib::HierarchicalNSW<float>(space, std::string(location), false, 0);
return (void*)appr_alg;
}

HNSW saveHNSW(HNSW index, char *location) {
((hnswlib::HierarchicalNSW<float>*)index)->saveIndex(location);
}

void addPoint(HNSW index, float *vec, unsigned long int label) {
((hnswlib::HierarchicalNSW<float>*)index)->addPoint(vec, label);
}

int searchKnn(HNSW index, float *vec, int N, unsigned long int *label, float *dist) {
std::priority_queue<std::pair<float, hnswlib::labeltype>> gt;
try {
gt = ((hnswlib::HierarchicalNSW<float>*)index)->searchKnn(vec, N);
} catch (const std::exception& e) {
return 0;
}

int n = gt.size();
std::pair<float, hnswlib::labeltype> pair;
for (int i = n - 1; i >= 0; i--) {
pair = gt.top();
*(dist+i) = pair.first;
*(label+i) = pair.second;
gt.pop();
}
return n;
}

void setEf(HNSW index, int ef) {
((hnswlib::HierarchicalNSW<float>*)index)->ef_ = ef;
}
14 changes: 14 additions & 0 deletions hnsw_wrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// hnsw_wrapper.h
#ifdef __cplusplus
extern "C" {
#endif
typedef void* HNSW;
HNSW initHNSW(int dim, unsigned long int max_elements, int M, int ef_construction, int rand_seed, char stype);
HNSW loadHNSW(char *location, int dim, char stype);
HNSW saveHNSW(HNSW index, char *location);
void addPoint(HNSW index, float *vec, unsigned long int label);
int searchKnn(HNSW index, float *vec, int N, unsigned long int *label, float *dist);
void setEf(HNSW index, int ef);
#ifdef __cplusplus
}
#endif
Binary file added hnsw_wrapper.o
Binary file not shown.
Loading

0 comments on commit 4b69c91

Please sign in to comment.