Skip to content

Commit

Permalink
Fix DataFrames in obsm (#95)
Browse files Browse the repository at this point in the history
flying-sheep authored Jul 13, 2023
1 parent 165fe0c commit 637e30f
Showing 13 changed files with 115 additions and 45 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@
/docs/_build/
_version.py
__pycache__/
.pytest_cache/

# Jupyter
/.ipynb_checkpoints/
9 changes: 5 additions & 4 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
version: 2
build:
image: latest
sphinx:
configuration: docs/conf.py
os: ubuntu-22.04
tools:
python: "3.11"
python:
version: 3.7
install:
- method: pip
path: .
extra_requirements:
- doc
sphinx:
configuration: docs/conf.py
13 changes: 13 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"[python]": {
"editor.formatOnSave": true,
"editor.defaultFormatter": "ms-python.black-formatter",
//"editor.codeActionsOnSave": {
// "source.fixAll.ruff": true,
//},
},
"python.testing.pytestArgs": [],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.terminal.activateEnvironment": false,
}
39 changes: 35 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -17,16 +17,15 @@ urls.'Documentation' = 'https://icb-anndata2ri.readthedocs-hosted.com/'
urls.'Source Code' = 'https://github.com/theislab/anndata2ri'
urls.'Issue Tracker' = 'https://github.com/theislab/anndata2ri/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc'
dynamic = ['version']
requires-python = '>= 3.7'
requires-python = '>= 3.8'
dependencies = [
'get_version',
'rpy2 >= 3.4.3',
'tzlocal', # for pandas2ri
'anndata',
]

[project.optional-dependencies]
dev = ['pre-commit']
dev = ['pre-commit', 'setuptools-scm']
test = [
'pytest',
'pytest-faulthandler',
@@ -37,7 +36,7 @@ doc = [
'sphinx>=3.0',
'sphinx-autodoc-typehints',
'scanpydoc',
'sphinx-rtd-theme>=0.5', # Already a dep but 0.5 is prettier
'sphinx-rtd-theme>=0.5',
'lxml', # For scraping the R link info
'importlib_metadata; python_version < "3.8"',
]
@@ -49,6 +48,38 @@ raw-options = { local_scheme = 'no-local-version' } # be able to publish dev ve
[tool.hatch.build.hooks.vcs]
version-file = 'src/anndata2ri/_version.py'

[tool.hatch.envs.docs]
features = ['doc']
[tool.hatch.envs.docs.scripts]
build = 'sphinx-build -M html docs docs/_build'

[[tool.hatch.envs.test.matrix]]
python = ['3.8', '3.9', '3.10', '3.11']
[tool.hatch.envs.test]
features = ['test']
[tool.hatch.envs.test.scripts]
run = 'pytest -vv {args}'

[tool.pytest.ini_options]
addopts = [
'--import-mode=importlib',
'-panndata2ri.test_utils',
# TODO '-Werror',
]
filterwarnings = [
# eventlet 0.24.1 imports dns.hash: https://github.com/eventlet/eventlet/pull/563
'ignore::DeprecationWarning:dns.hash',
# igraph 0.7.1post6 imports SafeConfigParser: https://github.com/igraph/python-igraph/pull/203
'ignore::DeprecationWarning:igraph.configuration',
# ipywidgets 7.4.2 imports ABCs from collections: https://github.com/jupyter-widgets/ipywidgets/pull/2395
'ignore::DeprecationWarning:ipywidgets.widgets.widget_selection',
# jinja2 2.10.1 imports ABCs from collections: https://github.com/pallets/jinja/pull/867
'ignore::DeprecationWarning:jinja2.utils',
'ignore::DeprecationWarning:jinja2.runtime',
# rpy2 3.0.2 imports ABCs from collections: https://bitbucket.org/rpy2/rpy2/pull-requests/74/fix-deprecationwarning/diff
'ignore::DeprecationWarning:rpy2.rinterface_lib.sexp',
]

[tool.black]
line-length = 120
skip-string-normalization = true
13 changes: 0 additions & 13 deletions pytest.ini

This file was deleted.

22 changes: 19 additions & 3 deletions src/anndata2ri/conv.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,31 @@
from typing import Optional
from __future__ import annotations

import numpy as np
import pandas as pd
from rpy2.robjects import conversion, numpy2ri, pandas2ri
from rpy2.robjects.conversion import overlay_converter

from . import scipy2ri


original_converter: Optional[conversion.Converter] = None
original_converter: conversion.Converter | None = None
converter = conversion.Converter('original anndata conversion')

mat_converter = numpy2ri.converter + scipy2ri.converter
_mat_converter = numpy2ri.converter + scipy2ri.converter


def mat_py2rpy(obj: np.ndarray) -> np.ndarray:
if isinstance(obj, pd.DataFrame):
numeric_cols = obj.dtypes <= np.number
if not numeric_cols.all():
non_num = numeric_cols.index[~numeric_cols]
msg = f'DataFrame contains non-numeric columns {list(non_num)}'
raise ValueError(msg)
obj = obj.to_numpy()
return _mat_converter.py2rpy(obj)


mat_rpy2py = _mat_converter.rpy2py


def full_converter() -> conversion.Converter:
8 changes: 4 additions & 4 deletions src/anndata2ri/py2r.py
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@
from rpy2.robjects.vectors import ListVector

from . import conv_name
from .conv import converter, full_converter, mat_converter
from .conv import converter, full_converter, mat_py2rpy
from .rpy2_ext import importr


@@ -52,8 +52,8 @@ def py2rpy_anndata(obj: AnnData) -> RS4:
s4v = importr('S4Vectors')
sce = importr('SingleCellExperiment')
# TODO: sparse
x = {} if obj.X is None else dict(X=mat_converter.py2rpy(obj.X.T))
layers = {k: mat_converter.py2rpy(v.T) for k, v in obj.layers.items()}
x = {} if obj.X is None else dict(X=mat_py2rpy(obj.X.T))
layers = {k: mat_py2rpy(v.T) for k, v in obj.layers.items()}
assays = ListVector({**x, **layers})

row_args = {k: pandas2ri.py2rpy(v) for k, v in obj.var.items()}
@@ -70,7 +70,7 @@ def py2rpy_anndata(obj: AnnData) -> RS4:
with localconverter(full_converter() + dict_converter):
metadata = ListVector(obj.uns.items())

rd_args = {conv_name.scanpy2sce(k): mat_converter.py2rpy(obj.obsm[k]) for k in obj.obsm.keys()}
rd_args = {conv_name.scanpy2sce(k): mat_py2rpy(obj.obsm[k]) for k in obj.obsm.keys()}
reduced_dims = s4v.SimpleList(**rd_args)

return sce.SingleCellExperiment(
8 changes: 4 additions & 4 deletions src/anndata2ri/r2py.py
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@
from rpy2.robjects.robject import RSlots

from . import conv_name
from .conv import converter, full_converter, mat_converter
from .conv import converter, full_converter, mat_rpy2py
from .rpy2_ext import importr
from .scipy2ri import supported_r_matrix_classes
from .scipy2ri.r2py import rmat_to_spmat
@@ -78,9 +78,9 @@ def rpy2py_single_cell_experiment(obj: SexpS4) -> AnnData:
def convert_mats(attr: str, mats: Mapping[str, Sexp], *, transpose: bool = False):
rv = []
for n, mat in mats.items():
conv = mat_converter.rpy2py(mat)
conv = mat_rpy2py(mat)
if isinstance(conv, RS4):
cls_names = mat_converter.rpy2py(conv.slots['class']).tolist()
cls_names = mat_rpy2py(conv.slots['class']).tolist()
raise TypeError(f'Cannot convert {attr}{n}” of type(s) {cls_names} to Python')
rv.append(conv.T if transpose else conv)
return rv
@@ -89,7 +89,7 @@ def convert_mats(attr: str, mats: Mapping[str, Sexp], *, transpose: bool = False
if not isinstance(assay_names, NULLType):
assay_names = [str(a) for a in se.assayNames(obj)]
# The assays can be stored in an env or elsewise so we don’t use obj.slots['assays']
assays = convert_mats(f'assay', {n: se.assay(obj, n) for n in assay_names}, transpose=True)
assays = convert_mats('assay', {n: se.assay(obj, n) for n in assay_names}, transpose=True)
# There’s SingleCellExperiment with no assays
exprs, layers = assays[0], dict(zip(assay_names[1:], assays[1:]))
assert len(exprs.shape) == 2, exprs.shape
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# the following line is only necessary for IDEs
from anndata2ri.test_utils import py2r, r2py # noqa: F401
32 changes: 28 additions & 4 deletions tests/test_py2rpy.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from typing import List
from warnings import WarningMessage, catch_warnings, simplefilter
from warnings import catch_warnings, simplefilter

import numpy as np
import pytest
import scanpy as sc
from anndata import AnnData
from pandas import DataFrame
from rpy2.robjects import baseenv, globalenv
from rpy2.robjects.conversion import localconverter

import anndata2ri
from anndata2ri.rpy2_ext import importr
from anndata2ri.test_utils import py2r # noqa


def mk_ad_simple():
@@ -56,9 +56,33 @@ def test_py2rpy2_numpy_pbmc68k():

try:
anndata2ri.activate()
with catch_warnings(record=True) as logs: # type: List[WarningMessage]
with catch_warnings(record=True) as logs:
simplefilter('ignore', DeprecationWarning)
globalenv['adata'] = pbmc68k_reduced()
assert len(logs) == 0, [m.message for m in logs]
finally:
anndata2ri.deactivate()


@pytest.mark.parametrize('attr', ['X', 'layers', 'obsm'])
def test_dfs(attr):
"""X, layers, obsm can contain dataframes"""
adata = mk_ad_simple()
if attr == 'X':
adata.X = DataFrame(adata.X, index=adata.obs_names)
elif attr == 'layers':
adata.layers['X2'] = DataFrame(adata.X, index=adata.obs_names)
elif attr == 'obsm':
adata.obsm['X_pca'] = DataFrame(adata.obsm['X_pca'], index=adata.obs_names)
else:
assert False, attr

with localconverter(anndata2ri.converter):
globalenv['adata_obsm_pd'] = adata


def test_df_error():
adata = mk_ad_simple()
adata.obsm['stuff'] = DataFrame(dict(a=[1, 2], b=list('ab'), c=[1.0, 2.0]), index=adata.obs_names)
with pytest.raises(ValueError, match=r"DataFrame contains non-numeric columns \['b'\]"):
anndata2ri.converter.py2rpy(adata)
1 change: 0 additions & 1 deletion tests/test_rpy2py.py
Original file line number Diff line number Diff line change
@@ -7,7 +7,6 @@

import anndata2ri
from anndata2ri.rpy2_ext import importr
from anndata2ri.test_utils import r2py # noqa


as_ = getattr(importr('methods'), 'as')
6 changes: 2 additions & 4 deletions tests/test_scipy_py2rpy.py
Original file line number Diff line number Diff line change
@@ -4,7 +4,6 @@
from scipy import sparse

from anndata2ri import scipy2ri
from anndata2ri.test_utils import conversions_py2rpy


mats = [
@@ -19,12 +18,11 @@


@pytest.mark.parametrize('typ', ['l', 'd'])
@pytest.mark.parametrize('conversion', conversions_py2rpy)
@pytest.mark.parametrize('shape,dataset,cls', mats)
def test_py2rpy(typ, conversion, shape, dataset, cls):
def test_py2rpy(py2r, typ, shape, dataset, cls):
if typ == 'l':
dataset = dataset.astype(bool)
sm = conversion(scipy2ri, dataset)
sm = py2r(scipy2ri, dataset)
assert f'{typ}{cls}Matrix' in set(sm.rclass)
assert tuple(baseenv['dim'](sm)) == shape

6 changes: 2 additions & 4 deletions tests/test_scipy_rpy2py.py
Original file line number Diff line number Diff line change
@@ -9,7 +9,6 @@

from anndata2ri import scipy2ri
from anndata2ri.rpy2_ext import importr
from anndata2ri.test_utils import ConversionModule, conversions_rpy2py


matrix = importr('Matrix')
@@ -54,17 +53,16 @@
]


@pytest.mark.parametrize('conversion', conversions_rpy2py)
@pytest.mark.parametrize('shape,cls,dtype,arr,dataset', mats)
def test_py2rpy(
conversion: Callable[[ConversionModule, Callable[[], Sexp]], sparse.spmatrix],
r2py,
shape: Tuple[int, int],
cls: Type[sparse.spmatrix],
dtype: np.dtype,
arr: np.ndarray,
dataset: Callable[[], Sexp],
):
sm = conversion(scipy2ri, dataset)
sm = r2py(scipy2ri, dataset)
assert isinstance(sm, cls)
assert sm.shape == shape
assert np.allclose(sm.toarray(), np.array(arr))

0 comments on commit 637e30f

Please sign in to comment.