diff --git a/README.md b/README.md index b9d414e..8e9a4f1 100644 --- a/README.md +++ b/README.md @@ -26,13 +26,10 @@ A simple yet powerful tensorboard event log parser/reader. Installation: ```sh -pip install tensorflow # or tensorflow-cpu pip install -U tbparse # requires Python >= 3.7 ``` -**Note**: If you don't want to install TensorFlow, see [Installing without TensorFlow](https://tbparse.readthedocs.io/en/latest/pages/installation.html#installing-without-tensorflow). - -We suggest using an additional virtual environment for parsing and plotting the tensorboard events. So no worries if your training code uses Python 3.6 or older versions. +We suggest using an additional virtual environment for parsing and plotting the tensorboard events. So no worries if your training code uses Python 3.6 or older versions. Reading one or more event files with tbparse only requires 5 lines of code: @@ -77,11 +74,11 @@ All events above are generated and plotted in [gallery-pytorch.ipynb](https://gi ## Installation ```sh -pip install tensorflow # or tensorflow-cpu +pip install tensorflow # optional, only required if you want to parse images and audio pip install -U tbparse # requires Python >= 3.7 ``` -**Note**: If you don't want to install TensorFlow, see [Installing without TensorFlow](https://tbparse.readthedocs.io/en/latest/pages/installation.html#installing-without-tensorflow). +**Note**: For details on when TensorFlow is required, see [Installing without TensorFlow](https://tbparse.readthedocs.io/en/latest/pages/installation.html#installing-without-tensorflow). ## Testing the Source Code diff --git a/docs/index.rst b/docs/index.rst index af731f8..823aac3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -54,11 +54,8 @@ Installation: .. code-block:: bash - pip install tensorflow # or tensorflow-cpu pip install -U tbparse # requires Python >= 3.7 -**Note**: If you don't want to install TensorFlow, see :ref:`Installing without TensorFlow `. - We suggest using an additional virtual environment for parsing and plotting the tensorboard events. So no worries if your training code uses Python 3.6 or older versions. diff --git a/docs/pages/installation.rst b/docs/pages/installation.rst index 6c4f290..8baba9c 100644 --- a/docs/pages/installation.rst +++ b/docs/pages/installation.rst @@ -10,10 +10,10 @@ Install from PyPI: .. code-block:: bash - pip install tensorflow # or tensorflow-cpu + pip install tensorflow # optional, only required if you want to parse images and audio pip install -U tbparse # requires Python >= 3.7 -**Note**: If you don't want to install TensorFlow, see :ref:`Installing without TensorFlow `. +**Note**: For details on when TensorFlow is required, see :ref:`Installing without TensorFlow `. Install from Source: @@ -21,7 +21,7 @@ Install from Source: git clone https://github.com/j3soon/tbparse cd tbparse - pip install tensorflow # or tensorflow-cpu + pip install tensorflow # optional, only required if you want to parse images and audio pip install -e . # requires Python >= 3.7 .. _tbparse_installing-without-tensorflow: @@ -38,13 +38,13 @@ You can install tbparse with reduced feature set if you don't want to install Te Without TensorFlow, tbparse supports parsing :ref:`scalars `, -:ref:`histograms `, and -:ref:`hparams `, -but doesn't support parsing :ref:`tensors `, -:ref:`images `, -:ref:`audio `, and +:ref:`histograms `, +:ref:`hparams `, and :ref:`text `. +but doesn't support parsing +:ref:`images ` and +:ref:`audio `. tbparse will instruct you to install TensorFlow by raising an error if you try to parse the unsupported event types, such as: diff --git a/tbparse/summary_reader.py b/tbparse/summary_reader.py index 25b399a..36694f9 100644 --- a/tbparse/summary_reader.py +++ b/tbparse/summary_reader.py @@ -17,6 +17,9 @@ STORE_EVERYTHING_SIZE_GUIDANCE, TENSORS, AudioEvent, EventAccumulator, HistogramEvent, ImageEvent, ScalarEvent, TensorEvent) from tensorboard.plugins.hparams.plugin_data_pb2 import HParamsPluginData + +from .tensorflow_stub import make_ndarray + try: import tensorflow except ImportError: @@ -51,7 +54,7 @@ } ALL_EVENT_TYPES = {SCALARS, TENSORS, HISTOGRAMS, IMAGES, AUDIO, HPARAMS, TEXT} -REDUCED_EVENT_TYPES = {SCALARS, HISTOGRAMS, HPARAMS} +REDUCED_EVENT_TYPES = ALL_EVENT_TYPES.difference({IMAGES, AUDIO}) ALL_EXTRA_COLUMNS = {'dir_name', 'file_name', 'wall_time', 'min', 'max', 'num', 'sum', 'sum_squares', 'width', 'height', 'content_type', 'length_frames', 'sample_rate'} @@ -577,7 +580,6 @@ def histogram_to_cdf(counts: np.ndarray, limits: np.ndarray, i += 1 return np.array(y) / n - # pylint: disable=R0914 @staticmethod def histogram_to_bins(counts: np.ndarray, limits: np.ndarray, lower_bound: Optional[float] = None, @@ -603,8 +605,9 @@ def histogram_to_bins(counts: np.ndarray, limits: np.ndarray, each bucket. :rtype: Tuple[np.ndarray, np.ndarray] """ + # pylint: disable=R0914 # pylint: disable=C0301 - # Ref: https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/histogram/tf_histogram_dashboard/histogramCore.ts#L83 # noqa: E501 + # Ref: https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/histogram/tf_histogram_dashboard/histogramCore.ts#L83 # noqa: E501 assert len(counts) == len(limits) assert counts[0] == 0 if lower_bound is None or upper_bound is None: @@ -676,12 +679,10 @@ def _get_tensor_cols(self, tag_to_events: Dict[str, TensorEvent]) -> \ cols = self._get_default_cols(tag_to_events) if len(tag_to_events) == 0: return cols - # pylint: disable=C0103 - tf = SummaryReader._get_tensorflow() idx = 0 for tag, events in tag_to_events.items(): for e in events: - value = tf.make_ndarray(e.tensor_proto) + value = make_ndarray(e.tensor_proto) if value.shape == (): # Tensorflow histogram may have more than one items value = value.item() @@ -807,12 +808,10 @@ def _get_text_cols(self, tag_to_events: Dict[str, TensorEvent]) -> \ cols = self._get_default_cols(tag_to_events) if len(tag_to_events) == 0: return cols - # pylint: disable=C0103 - tf = SummaryReader._get_tensorflow() idx = 0 for tag, events in tag_to_events.items(): for e in events: - value = tf.make_ndarray(e.tensor_proto).item() + value = make_ndarray(e.tensor_proto).item() assert isinstance(value, bytes) value = value.decode('utf-8') cols['step'][idx] = e.step diff --git a/tbparse/tensorflow_stub/__init__.py b/tbparse/tensorflow_stub/__init__.py new file mode 100644 index 0000000..e56c20f --- /dev/null +++ b/tbparse/tensorflow_stub/__init__.py @@ -0,0 +1,7 @@ +""" +Provides a stub for the TensorFlow module. +""" + +from .tensor_util import make_ndarray + +__all__ = ['make_ndarray', ] diff --git a/tbparse/tensorflow_stub/tensor_util.py b/tbparse/tensorflow_stub/tensor_util.py new file mode 100644 index 0000000..bfb7966 --- /dev/null +++ b/tbparse/tensorflow_stub/tensor_util.py @@ -0,0 +1,109 @@ +import numpy as np +from tensorboard.compat.tensorflow_stub import dtypes + + +# flake8: noqa +# pylint: skip-file +# Ref: https://github.com/tensorflow/tensorflow/blob/ad6d8cc177d0c868982e39e0823d0efbfb95f04c/tensorflow/python/framework/tensor_util.py#L633 +def make_ndarray(tensor): + """Create a numpy ndarray from a tensor. + + Create a numpy ndarray with the same shape and data as the tensor. + + For example: + + ```python + # Tensor a has shape (2,3) + a = tf.constant([[1,2,3],[4,5,6]]) + proto_tensor = tf.make_tensor_proto(a) # convert `tensor a` to a proto tensor + tf.make_ndarray(proto_tensor) # output: array([[1, 2, 3], + # [4, 5, 6]], dtype=int32) + # output has shape (2,3) + ``` + + Args: + tensor: A TensorProto. + + Returns: + A numpy array with the tensor contents. + + Raises: + TypeError: if tensor has unsupported type. + + """ + shape = [d.size for d in tensor.tensor_shape.dim] + num_elements = np.prod(shape, dtype=np.int64) + tensor_dtype = dtypes.as_dtype(tensor.dtype) + dtype = tensor_dtype.as_numpy_dtype + + if tensor.tensor_content: + return (np.frombuffer(tensor.tensor_content, + dtype=dtype).copy().reshape(shape)) + + if tensor_dtype == dtypes.string: + # np.pad throws on these arrays of type np.object_. + values = list(tensor.string_val) + padding = num_elements - len(values) + if padding > 0: + last = values[-1] if values else "" + values.extend([last] * padding) + return np.array(values, dtype=dtype).reshape(shape) + + if tensor_dtype == dtypes.float16 or tensor_dtype == dtypes.bfloat16: + # the half_val field of the TensorProto stores the binary representation + # of the fp16: we need to reinterpret this as a proper float16 + values = np.fromiter(tensor.half_val, dtype=np.uint16) + values.dtype = dtype + # TODO: The following is a temporary fix for float8_e5m2 and float8_e4m3fn + # Ref: https://github.com/tensorflow/tensorboard/issues/6899 + elif tensor_dtype in [ + dtypes.DType(dtypes.types_pb2.DT_FLOAT8_E5M2), + dtypes.DType(dtypes.types_pb2.DT_FLOAT8_E4M3FN), + ]: + values = np.fromiter(tensor.float8_val, dtype=np.uint8) + values.dtype = dtype + elif tensor_dtype == dtypes.float32: + values = np.fromiter(tensor.float_val, dtype=dtype) + elif tensor_dtype == dtypes.float64: + values = np.fromiter(tensor.double_val, dtype=dtype) + elif tensor_dtype in [ + dtypes.int32, + dtypes.uint8, + dtypes.uint16, + dtypes.int16, + dtypes.int8, + dtypes.qint32, + dtypes.quint8, + dtypes.qint8, + dtypes.qint16, + dtypes.quint16, + dtypes.int4, + dtypes.uint4, + ]: + values = np.fromiter(tensor.int_val, dtype=dtype) + elif tensor_dtype == dtypes.int64: + values = np.fromiter(tensor.int64_val, dtype=dtype) + elif tensor_dtype == dtypes.uint32: + values = np.fromiter(tensor.uint32_val, dtype=dtype) + elif tensor_dtype == dtypes.uint64: + values = np.fromiter(tensor.uint64_val, dtype=dtype) + elif tensor_dtype == dtypes.complex64: + it = iter(tensor.scomplex_val) + values = np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=dtype) + elif tensor_dtype == dtypes.complex128: + it = iter(tensor.dcomplex_val) + values = np.array([complex(x[0], x[1]) for x in zip(it, it)], dtype=dtype) + elif tensor_dtype == dtypes.bool: + values = np.fromiter(tensor.bool_val, dtype=dtype) + else: + raise TypeError(f"Unsupported tensor type: {tensor.dtype}. See " + "https://www.tensorflow.org/api_docs/python/tf/dtypes " + "for supported TF dtypes.") + + if values.size == 0: + return np.zeros(shape, dtype) + + if values.size != num_elements: + values = np.pad(values, (0, num_elements - values.size), "edge") + + return values.reshape(shape) diff --git a/tests/test_summary_reader/test_no_tensorflow.py b/tests/test_summary_reader/test_no_tensorflow.py index 80654f6..a30b907 100644 --- a/tests/test_summary_reader/test_no_tensorflow.py +++ b/tests/test_summary_reader/test_no_tensorflow.py @@ -15,6 +15,11 @@ def prepare(testdir): for i in x: writer.add_scalar('y=2x', i * 2, i) writer.add_text('text', 'lorem ipsum', 0) + img_batch = np.zeros((16, 3, 100, 100)) + for i in range(16): + img_batch[i, 0] = np.arange(0, 10000).reshape(100, 100) / 10000 / 16 * i + img_batch[i, 1] = (1 - np.arange(0, 10000).reshape(100, 100) / 10000) / 16 * i + writer.add_images('my_image_batch', img_batch, 0) writer.close() def test_log_dir(prepare, testdir): @@ -24,7 +29,10 @@ def test_log_dir(prepare, testdir): assert df.columns.tolist() == ['step', 'y=2x'] assert df['step'].to_list() == [i for i in range(100)] assert df['y=2x'].to_list() == [i*2 for i in range(100)] + df = reader.text + assert df['step'].to_list() == [0] + assert df['text'].to_list() == ["lorem ipsum"] with pytest.raises(ModuleNotFoundError): - df = reader.text + df = reader.images with pytest.raises(ModuleNotFoundError): - reader = SummaryReader(log_dir, pivot=True, event_types={'scalars', 'text'}) + reader = SummaryReader(log_dir, pivot=True, event_types={'images'}) diff --git a/tests/test_summary_reader/test_scalar_new_style_torch_sample.py b/tests/test_summary_reader/test_scalar_new_style_torch_sample.py new file mode 100644 index 0000000..ec70187 --- /dev/null +++ b/tests/test_summary_reader/test_scalar_new_style_torch_sample.py @@ -0,0 +1,24 @@ +import os + +import pytest +from tbparse import SummaryReader +from torch.utils.tensorboard import SummaryWriter + + +@pytest.fixture +def prepare(testdir): + # Ref: https://pytorch.org/docs/stable/tensorboard.html + log_dir = os.path.join(testdir.tmpdir, 'run') + writer = SummaryWriter(log_dir) + x = range(100) + for i in x: + writer.add_scalar('y=2x', i * 2, i, new_style=True) + writer.close() + +def test_log_dir(prepare, testdir): + log_dir = os.path.join(testdir.tmpdir, 'run') + reader = SummaryReader(log_dir, pivot=True) + df = reader.tensors + assert df.columns.tolist() == ['step', 'y=2x'] + assert df['step'].to_list() == [i for i in range(100)] + assert df['y=2x'].to_list() == [i*2 for i in range(100)] diff --git a/tox.ini b/tox.ini index bb1f98b..c9f3ddb 100644 --- a/tox.ini +++ b/tox.ini @@ -24,6 +24,8 @@ commands = "{toxinidir}/tests/test_summary_reader/test_histogram_torch_sample.py" \ "{toxinidir}/tests/test_summary_reader/test_hparams_torch_sample.py" \ "{toxinidir}/tests/test_summary_reader/test_scalar_torch_sample.py" \ + "{toxinidir}/tests/test_summary_reader/test_scalar_new_style_torch_sample.py" \ + "{toxinidir}/tests/test_summary_reader/test_text_torch_sample.py" \ "{toxinidir}/tests/test_summary_reader/test_no_tensorflow.py" # Test tbparse with full feature set (with TensorFlow) pip install tensorflow