From 894ed692ce5b9fe6d4eb5061bdbb1cc128aca412 Mon Sep 17 00:00:00 2001 From: Adam Lugowski Date: Wed, 30 Aug 2023 17:50:43 -0700 Subject: [PATCH] Add numpy support --- README.md | 6 +- demo-numpy.ipynb | 181 ++++++++++++++++++++++++++++++++ matspy/__init__.py | 13 ++- matspy/adapters/numpy_driver.py | 18 ++++ matspy/adapters/numpy_impl.py | 39 +++++++ matspy/spy_renderer.py | 10 +- tests/test_numpy.py | 51 +++++++++ 7 files changed, 314 insertions(+), 4 deletions(-) create mode 100644 demo-numpy.ipynb create mode 100644 matspy/adapters/numpy_driver.py create mode 100644 matspy/adapters/numpy_impl.py create mode 100644 tests/test_numpy.py diff --git a/README.md b/README.md index b7021af..cadb40a 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,9 @@ # MatSpy Sparse matrix spy plot and sparkline renderer. Supports: -* **SciPy** - sparse matrices and arrays like `csr_matrix` and `coo_array` -* **[Python-graphblas](https://github.com/python-graphblas/python-graphblas)** - `gb.Matrix`. [See demo.](demo-python-graphblas.ipynb) +* **SciPy** - sparse matrices and arrays like `csr_matrix` and `coo_array` [(demo)](demo.ipynb) +* **NumPy** - `ndarray` [(demo)](demo-numpy.ipynb) +* **[Python-graphblas](https://github.com/python-graphblas/python-graphblas)** - `gb.Matrix` [(demo)](demo-python-graphblas.ipynb) Features: * Simple `spy()` method, similar to MatLAB's spy. @@ -53,6 +54,7 @@ All methods take the same arguments. Apart from the matrix itself: * `shading`: `binary`, `relative`, `absolute`. * `buckets`: spy plot pixels (longest side). * `dpi`: determine `buckets` relative to figure size. +* `precision`: For numpy arrays, magnitude less than this is considered zero. Like [matplotlib.pyplot.spy()](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.spy.html)'s `precision`. ### Overriding defaults `matspy.params` contains the default values for all arguments. diff --git a/demo-numpy.ipynb b/demo-numpy.ipynb new file mode 100644 index 0000000..d91c762 --- /dev/null +++ b/demo-numpy.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-31T00:41:41.137570Z", + "start_time": "2023-08-31T00:41:41.017838Z" + }, + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-31T00:41:41.322466Z", + "start_time": "2023-08-31T00:41:41.135934Z" + }, + "jupyter": { + "source_hidden": true + } + }, + "outputs": [], + "source": [ + "import scipy\n", + "A = scipy.io.mmread(\"doc/matrices/email-Eu-core.mtx.gz\").todense()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T23:04:28.653403Z", + "start_time": "2023-08-22T23:04:28.580379Z" + } + }, + "source": [ + "\n", + "Now view the entire matrix as a spy plot:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-31T00:41:41.608106Z", + "start_time": "2023-08-31T00:41:41.519592Z" + } + }, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matspy import spy\n", + "\n", + "spy(A)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-22T23:04:45.970063Z", + "start_time": "2023-08-22T23:04:45.607749Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "# Precision\n", + "\n", + "Sometimes we may wish to set near-zero values to zero. The `precision` argument does that. Any value `abs(value) < precision` is treated as zero.\n", + "\n", + "This argument is compatible with [matplotlib.pyplot.spy()](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.spy.html)'s `precision` parameter.\n", + "\n", + "As a simple demonstration, use `precision` to filter random values:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-31T00:41:41.616444Z", + "start_time": "2023-08-31T00:41:41.604573Z" + }, + "jupyter": { + "source_hidden": true + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
precision = 0precision = 0.2precision = 0.8
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "arr = np.random.random((100, 100))\n", + "\n", + "from IPython.display import display, HTML\n", + "from matspy import to_sparkline\n", + "\n", + "precisions = [0, 0.2, 0.8]\n", + "display(HTML(f''\n", + " f''\n", + " f''\n", + " f''\n", + " f\"\"\n", + " f\"\"\n", + " f\"\"\n", + " f\"
precision = {precisions[0]}precision = {precisions[1]}precision = {precisions[2]}
{to_sparkline(arr, sparkline_size=1.5, precision=precisions[0])}{to_sparkline(arr, sparkline_size=1.5, precision=precisions[1])}{to_sparkline(arr, sparkline_size=1.5, precision=precisions[2])}
\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-08-31T00:41:41.617408Z", + "start_time": "2023-08-31T00:41:41.615424Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/matspy/__init__.py b/matspy/__init__.py index b59644f..8ef96d7 100644 --- a/matspy/__init__.py +++ b/matspy/__init__.py @@ -54,6 +54,14 @@ class MatSpyParams: buckets: int = None """Pixel count of longest side of spy image. If None then computed from size and DPI.""" + precision: float = None + """ + Applies to dense matrices like numpy arrays. If None or 0, nonzero values are plotted. Else only values with + absolute value > `precision` are plotted. + + Behaves like `matplotlib.pyplot.spy`'s `precision` argument, but for dense arrays only. + """ + spy_aa_tweaks_enabled: bool = None """ Whether to_sparkline() may tweak parameters like bucket count to prevent visible aliasing artifacts. @@ -117,6 +125,9 @@ def _register_bundled(): from .adapters.scipy_driver import SciPyDriver register_driver(SciPyDriver) + from .adapters.numpy_driver import NumPyDriver + register_driver(NumPyDriver) + from .adapters.graphblas_driver import GraphBLASDriver register_driver(GraphBLASDriver) @@ -125,7 +136,7 @@ def _register_bundled(): def _get_driver(mat): - type_str = ".".join((mat.__module__, mat.__class__.__name__)) + type_str = ".".join((type(mat).__module__, type(mat).__name__)) for prefix, driver in _driver_prefixes.items(): if type_str.startswith(prefix): return driver diff --git a/matspy/adapters/numpy_driver.py b/matspy/adapters/numpy_driver.py new file mode 100644 index 0000000..4981796 --- /dev/null +++ b/matspy/adapters/numpy_driver.py @@ -0,0 +1,18 @@ +# Copyright (C) 2023 Adam Lugowski. +# Use of this source code is governed by the BSD 2-clause license found in the LICENSE.txt file. +# SPDX-License-Identifier: BSD-2-Clause + +from typing import Any, Iterable + +from . import Driver, MatrixSpyAdapter + + +class NumPyDriver(Driver): + @staticmethod + def get_supported_type_prefixes() -> Iterable[str]: + return ["numpy."] + + @staticmethod + def adapt_spy(mat: Any) -> MatrixSpyAdapter: + from .numpy_impl import NumPySpy + return NumPySpy(mat) diff --git a/matspy/adapters/numpy_impl.py b/matspy/adapters/numpy_impl.py new file mode 100644 index 0000000..1b7563c --- /dev/null +++ b/matspy/adapters/numpy_impl.py @@ -0,0 +1,39 @@ +# Copyright (C) 2023 Adam Lugowski. +# Use of this source code is governed by the BSD 2-clause license found in the LICENSE.txt file. +# SPDX-License-Identifier: BSD-2-Clause + +import numpy as np +from scipy.sparse import csr_matrix + +from . import describe, MatrixSpyAdapter +from .scipy_impl import SciPySpy + + +class NumPySpy(MatrixSpyAdapter): + def __init__(self, arr): + super().__init__() + if len(arr.shape) != 2: + raise ValueError("Only 2D arrays are supported") + self.arr = arr + + def get_shape(self) -> tuple: + return self.arr.shape + + def describe(self) -> str: + format_name = "array" + + return describe(shape=self.arr.shape, nz_type=self.arr.dtype, + notes=f"{format_name}") + + def get_spy(self, spy_shape: tuple) -> np.array: + precision = self.get_option("precision", None) + + if not precision: + mask = (self.arr != 0) + else: + mask = (self.arr > precision) | (self.arr < -precision) + + if self.arr.dtype == 'object': + mask = mask & (self.arr != np.array([None])) + + return SciPySpy(csr_matrix(mask)).get_spy(spy_shape) diff --git a/matspy/spy_renderer.py b/matspy/spy_renderer.py index 4a4204d..8a2ffe6 100644 --- a/matspy/spy_renderer.py +++ b/matspy/spy_renderer.py @@ -59,12 +59,16 @@ def _rescale(arr, from_range, to_range): # noinspection PyUnusedLocal def get_spy_heatmap(adapter: MatrixSpyAdapter, buckets, shading, shading_absolute_min, - shading_relative_min, shading_relative_max_percentile, **kwargs): + shading_relative_min, shading_relative_max_percentile, precision, **kwargs): # find spy matrix shape mat_shape = adapter.get_shape() + if mat_shape[0] == 0 or mat_shape[1] == 0: + return np.array([[]]) + ratio = buckets / max(mat_shape) spy_shape = tuple(max(1, int(ratio * x)) for x in mat_shape) + adapter.set_option("precision", precision) dense = adapter.get_spy(spy_shape=spy_shape) dense[dense < 0] = 0 @@ -236,6 +240,10 @@ def to_sparkline(mat, retscale=False, scale=None, html_border="1px solid black", repeat = int(repeat) if repeat >= 2 else 1 heatmap = to_spy_heatmap(adapter, **options.to_kwargs()) + if heatmap.size == 0: + # zero-size + return "▫" # a single character that is an empty square + if repeat > 1: heatmap = heatmap.repeat(repeat, axis=0) heatmap = heatmap.repeat(repeat, axis=1) diff --git a/tests/test_numpy.py b/tests/test_numpy.py new file mode 100644 index 0000000..0156199 --- /dev/null +++ b/tests/test_numpy.py @@ -0,0 +1,51 @@ +# Copyright (C) 2023 Adam Lugowski. +# Use of this source code is governed by the BSD 2-clause license found in the LICENSE.txt file. +# SPDX-License-Identifier: BSD-2-Clause + +import unittest + +import numpy as np + +from matspy import spy_to_mpl, to_sparkline, to_spy_heatmap + +np.random.seed(123) + + +class NumPyTests(unittest.TestCase): + def setUp(self): + self.mats = [ + np.array([[]]), + np.random.random((10, 10)), + ] + + def test_no_crash(self): + import matplotlib.pyplot as plt + for mat in self.mats: + fig, ax = spy_to_mpl(mat) + plt.close(fig) + + res = to_sparkline(mat) + self.assertGreater(len(res), 5) + + def test_shape(self): + arr = np.array([]) + with self.assertRaises(ValueError): + spy_to_mpl(arr) + + def test_count(self): + arrs = [ + (1, np.array([[1]])), + (1, np.array([[1, 0], [0, 0]])), + (1, np.array([[1, None], [None, None]])), + (1, np.array([[1, 0], [None, None]])), + ] + + for count, arr in arrs: + area = np.prod(arr.shape) + heatmap = to_spy_heatmap(arr, buckets=1, shading="absolute") + self.assertEqual(len(heatmap), 1) + self.assertAlmostEqual(heatmap[0][0], count / area, places=2) + + +if __name__ == '__main__': + unittest.main()