Skip to content

Seeking advice: remove is painfully slow #25

Open
@patknight

Description

@patknight

I'm finding that adding and searching an objectbox database is really fast. However, the remove operation is really slow (1 second per object.) The database is on a local NVME SSD drive. It contains about 20,000 hashes and takes about 6GB.

My find_unique hash_box.query operation is fast - it's literally the call to hash_box.remove that takes the time.

What am I doing wrong?

@Entity()
class ImHash:
    id = Id
    key = String(index=Index(IndexType.HASH), unique=True)
    cos_value = Float32Vector(index=HnswIndex(
        dimensions=62720,
        distance_type=VectorDistanceType.COSINE,
    ))


def hash_image(im: Image.Image) -> list[float]:
    vector = img2vec.get_vec(im, tensor=True)
    return vector.detach().cpu().numpy().flatten()


def hash_and_store(name_or_fp, key: str):
    im = Image.open(name_or_fp)
    h = hash_image(im)
    ih = find_unique(key)
    if ih is None:
        # create
        ih = ImHash()
        ih.key = key
    ih.cos_value = h
    with store_lock:
        hash_box.put(ih)


def init(db_dir: pathlib.Path):
    global store, hash_box, img2vec
    store = Store(directory=str(db_dir / directory_name),
                  model_json_file=str(db_dir / json_model_name),
                  max_db_size_in_kb=10 * 1024 * 1024)
    hash_box = store.box(ImHash)
    img2vec = Img2Vec(cuda=False, model='efficientnet_b0')


def close():
    store.close()


def find_unique(key: str):
    with store_lock:
        query = hash_box.query(ImHash.key.equals(key)).build()
        result = query.find()
    if len(result) == 0:
        return None
    elif len(result) > 1:
        print('Multiple matches found')
        return None
    else:
        return result[0]


def find_similar(key: str) -> list[tuple[ImHash, float]]:
    target = find_unique(key)
    with store_lock:
        query = hash_box.query(ImHash.cos_value.nearest_neighbor(target.cos_value, 8)).build()
        results = query.find_with_scores()
    results.sort(key=lambda x: x[1])
    return results


def remove(key: str):
    target = find_unique(key)
    if target is not None:
        with store_lock:
            hash_box.remove(target)


def remove_many(keys: list[str]):
    with store.write_tx():
        for k in keys:
            i = find_unique(k)
            if i is None:
                print('Hash key "%s" was already gone' % k)
            else:
                with store_lock:
                    hash_box.remove(i.id)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions