Open
Description
I'm finding that adding and searching an objectbox database is really fast. However, the remove operation is really slow (1 second per object.) The database is on a local NVME SSD drive. It contains about 20,000 hashes and takes about 6GB.
My find_unique hash_box.query operation is fast - it's literally the call to hash_box.remove that takes the time.
What am I doing wrong?
@Entity()
class ImHash:
id = Id
key = String(index=Index(IndexType.HASH), unique=True)
cos_value = Float32Vector(index=HnswIndex(
dimensions=62720,
distance_type=VectorDistanceType.COSINE,
))
def hash_image(im: Image.Image) -> list[float]:
vector = img2vec.get_vec(im, tensor=True)
return vector.detach().cpu().numpy().flatten()
def hash_and_store(name_or_fp, key: str):
im = Image.open(name_or_fp)
h = hash_image(im)
ih = find_unique(key)
if ih is None:
# create
ih = ImHash()
ih.key = key
ih.cos_value = h
with store_lock:
hash_box.put(ih)
def init(db_dir: pathlib.Path):
global store, hash_box, img2vec
store = Store(directory=str(db_dir / directory_name),
model_json_file=str(db_dir / json_model_name),
max_db_size_in_kb=10 * 1024 * 1024)
hash_box = store.box(ImHash)
img2vec = Img2Vec(cuda=False, model='efficientnet_b0')
def close():
store.close()
def find_unique(key: str):
with store_lock:
query = hash_box.query(ImHash.key.equals(key)).build()
result = query.find()
if len(result) == 0:
return None
elif len(result) > 1:
print('Multiple matches found')
return None
else:
return result[0]
def find_similar(key: str) -> list[tuple[ImHash, float]]:
target = find_unique(key)
with store_lock:
query = hash_box.query(ImHash.cos_value.nearest_neighbor(target.cos_value, 8)).build()
results = query.find_with_scores()
results.sort(key=lambda x: x[1])
return results
def remove(key: str):
target = find_unique(key)
if target is not None:
with store_lock:
hash_box.remove(target)
def remove_many(keys: list[str]):
with store.write_tx():
for k in keys:
i = find_unique(k)
if i is None:
print('Hash key "%s" was already gone' % k)
else:
with store_lock:
hash_box.remove(i.id)
Metadata
Metadata
Assignees
Labels
No labels