Skip to content

Commit

Permalink
Ensure all array metrics return an ndarray (#71)
Browse files Browse the repository at this point in the history
* Add missing import

* Unify all array metric tests

* Add breaking test

* Make soergel always return array

* Test docstrings also on CI

* Increment patch number

* Format added lines
  • Loading branch information
sethaxen authored Dec 21, 2022
1 parent ce3d8c5 commit fdf1417
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 116 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
shell: bash -l {0}
run: |
conda install -c conda-forge pytest pytest-cov mock
pytest --cov=e3fp --cov-report=xml e3fp/test
pytest --cov=e3fp --cov-report=xml e3fp
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
with:
Expand Down
2 changes: 1 addition & 1 deletion e3fp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .util import E3FPWarning, E3FPDeprecationWarning

version_info = (1, 2, 4)
version_info = (1, 2, 5)
version = ".".join(str(c) for c in version_info)
__version__ = version
8 changes: 5 additions & 3 deletions e3fp/fingerprint/metrics/array_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import numpy as np
import scipy
from scipy.sparse import csr_matrix, issparse, vstack
import scipy.sparse.linalg
import scipy.spatial
from e3fp.util import maybe_jit


Expand Down Expand Up @@ -211,14 +213,14 @@ def _get_bitcount_arrays(X, Y, return_XYbits=False):


def _sparse_cosine(X, Y):
Xnorm = np.sqrt(X.multiply(X).sum(axis=1))
Xnorm = scipy.sparse.linalg.norm(X, axis=1)
if Y is X:
Ynorm = Xnorm
else:
Ynorm = np.sqrt(Y.multiply(Y).sum(axis=1))
Ynorm = scipy.sparse.linalg.norm(Y, axis=1)
XY = (X * Y.T).toarray()
with np.errstate(divide="ignore"): # handle 0 in denominator
return np.nan_to_num(XY / (Xnorm * Ynorm.T))
return np.nan_to_num(XY / np.outer(Xnorm, Ynorm))

@maybe_jit(nopython=True, nogil=True, cache=True)
def _dense_soergel(X, Y, S):
Expand Down
144 changes: 33 additions & 111 deletions e3fp/test/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ def _create_random_sparse(nrows, nbits=1024, perc_pos=0.1, counts=False):
)
return arr

def soergeldist(x, y):
return np.abs(x - y).sum() / np.maximum(x, y).sum()


class TestArrayMetrics:

Expand All @@ -36,120 +39,39 @@ def _eval(func, X, Y=None, dense=False, **kwargs):
Y = Y.toarray()
return func(X, Y, **kwargs)

def test_tanimoto(self):
X = _create_random_sparse(10, counts=False)
Y = X.copy()
func = array_metrics.tanimoto
sparse_score = self._eval(func, X, Y)
dense_score = self._eval(func, X, Y, dense=True)
np.testing.assert_allclose(np.diag(sparse_score), np.ones(X.shape[0]))
np.testing.assert_allclose(sparse_score, dense_score)
# test self-comparison
sparse_score = self._eval(func, X)
dense_score = self._eval(func, X, dense=True)
np.testing.assert_allclose(np.diag(sparse_score), np.ones(X.shape[0]))
np.testing.assert_allclose(sparse_score, dense_score)

def test_soergel(self):
X = _create_random_sparse(10, counts=True)
Y = X.copy()
func = array_metrics.soergel
sparse_score = self._eval(func, X, Y)
dense_score = self._eval(func, X, Y, dense=True)
np.testing.assert_allclose(np.diag(sparse_score), np.ones(X.shape[0]))
np.testing.assert_allclose(sparse_score, dense_score)
# test self-comparison
sparse_score = self._eval(func, X)
dense_score = self._eval(func, X, dense=True)
np.testing.assert_allclose(np.diag(sparse_score), np.ones(X.shape[0]))
np.testing.assert_allclose(sparse_score, dense_score)

def test_tanimoto_soergel_equal_for_binary(self):
X = _create_random_sparse(10, counts=False)
Y = X.copy()
sparse_tscore = self._eval(array_metrics.tanimoto, X, Y)
sparse_sscore = self._eval(array_metrics.soergel, X, Y)
np.testing.assert_allclose(sparse_tscore, sparse_sscore)
dense_tscore = self._eval(array_metrics.tanimoto, X, Y, dense=True)
dense_sscore = self._eval(array_metrics.soergel, X, Y, dense=True)
np.testing.assert_allclose(dense_tscore, dense_sscore)

def test_dice(self):
X = _create_random_sparse(10, counts=False)
Y = X.copy()
func = array_metrics.dice
expect_score = 1.0 - self._eval(cdist, X, Y, dense=True, metric="dice")
sparse_score = self._eval(func, X, Y)
dense_score = self._eval(func, X, Y, dense=True)
np.testing.assert_allclose(sparse_score, expect_score)
np.testing.assert_allclose(dense_score, expect_score)
# test self-comparison
sparse_score = self._eval(func, X)
dense_score = self._eval(func, X, dense=True)
np.testing.assert_allclose(sparse_score, expect_score)
np.testing.assert_allclose(dense_score, expect_score)

def test_cosine(self):
func = array_metrics.cosine
# test count fingerprints
X = _create_random_sparse(10, counts=True)
Y = X.copy()
expect_score = 1.0 - self._eval(
cdist, X, Y, dense=True, metric="cosine"
)
sparse_score = self._eval(func, X, Y)
dense_score = self._eval(func, X, Y, dense=True)
np.testing.assert_allclose(sparse_score, expect_score)
np.testing.assert_allclose(dense_score, expect_score)
# test self-comparison
sparse_score = self._eval(func, X)
dense_score = self._eval(func, X, dense=True)
np.testing.assert_allclose(sparse_score, expect_score)
np.testing.assert_allclose(dense_score, expect_score)

# test binary fingerprints
X = _create_random_sparse(10, counts=False)
Y = X.copy()
expect_score = 1.0 - self._eval(
cdist, X, Y, dense=True, metric="cosine"
)
sparse_score = self._eval(func, X, Y)
dense_score = self._eval(func, X, Y, dense=True)
np.testing.assert_allclose(sparse_score, expect_score)
np.testing.assert_allclose(dense_score, expect_score)
# test self-comparison
sparse_score = self._eval(func, X)
dense_score = self._eval(func, X, dense=True)
np.testing.assert_allclose(sparse_score, expect_score)
np.testing.assert_allclose(dense_score, expect_score)

# test binary assuming binary
sparse_score = self._eval(func, X, Y, assume_binary=True)
dense_score = self._eval(func, X, Y, dense=True, assume_binary=True)
np.testing.assert_allclose(sparse_score, expect_score)
np.testing.assert_allclose(dense_score, expect_score)
@pytest.mark.parametrize("dense", [True, False])
@pytest.mark.parametrize(
"func,cdist_metric,counts",
[
(array_metrics.tanimoto, "jaccard", False),
(array_metrics.dice, "dice", False),
(array_metrics.cosine, "cosine", False),
(array_metrics.cosine, "cosine", True),
(array_metrics.pearson, "correlation", False),
(array_metrics.pearson, "correlation", True),
(array_metrics.soergel, soergeldist, False),
(array_metrics.soergel, soergeldist, True),
],
)
def test_metrics_vs_cdist(self, func, cdist_metric, counts, dense):
X = _create_random_sparse(10, counts=counts)
Y = _create_random_sparse(8, counts=counts)
expect_score = 1.0 - cdist(X.toarray(), Y.toarray(), metric=cdist_metric)
score = self._eval(func, X, Y, dense=dense)
assert type(score) is np.ndarray
np.testing.assert_allclose(score, expect_score)
# test self-comparison
sparse_score = self._eval(func, X)
dense_score = self._eval(func, X, dense=True)
np.testing.assert_allclose(sparse_score, expect_score)
np.testing.assert_allclose(dense_score, expect_score)
expect_score = 1.0 - cdist(X.toarray(), X.toarray(), metric=cdist_metric)
score = self._eval(func, X, dense=dense)
np.testing.assert_allclose(score, expect_score)

def test_pearson(self):
@pytest.mark.parametrize("dense", [True, False])
def test_tanimoto_soergel_equal_for_binary(self, dense):
X = _create_random_sparse(10, counts=False)
Y = X.copy()
func = array_metrics.pearson
expect_score = self._eval(np.corrcoef, X, Y, dense=True)[
: X.shape[0], X.shape[0] :
]
sparse_score = self._eval(func, X, Y)
dense_score = self._eval(func, X, Y, dense=True)
np.testing.assert_allclose(sparse_score, expect_score)
np.testing.assert_allclose(dense_score, expect_score)
# test self-comparison
sparse_score = self._eval(func, X)
dense_score = self._eval(func, X, dense=True)
np.testing.assert_allclose(sparse_score, expect_score)
np.testing.assert_allclose(dense_score, expect_score)
Y = _create_random_sparse(8, counts=False)
tscore = self._eval(array_metrics.tanimoto, X, Y, dense=dense)
sscore = self._eval(array_metrics.soergel, X, Y, dense=dense)
np.testing.assert_allclose(tscore, sscore)


class TestFlexibleMetrics:
Expand Down

0 comments on commit fdf1417

Please sign in to comment.