From fbd1bd3be5cbc538d892556b2bee33e3ba6a79ef Mon Sep 17 00:00:00 2001 From: Eugen Wintersberger Date: Fri, 3 Nov 2017 14:20:56 +0100 Subject: [PATCH] Far to large commit - but only documentation * started a new chapter about advanced topics - currently only a stub * major update to the overview section in the UG * some updates tot he dataspace section in the UG Update #110 --- design/model.notation | 703 +++++++++++++ design/model.uml | 59 ++ doc/source/CMakeLists.txt | 1 + doc/source/advanced/CMakeLists.txt | 8 + doc/source/advanced/cpp_datastructures.rst | 8 + doc/source/advanced/hdf5_handlers.rst | 10 + doc/source/advanced/index.rst | 10 + doc/source/advanced/io.rst | 15 + doc/source/images/32bit_integer.svg | 94 ++ doc/source/images/CMakeLists.txt | 6 + doc/source/images/dataspace_high_level.svg | 448 +++++++++ doc/source/images/hdf5_selection_manager.svg | 77 ++ doc/source/images/memory_buffer_40byte.svg | 142 +++ doc/source/images/memory_storage_example.svg | 923 ++++++++++++++++++ doc/source/images/storage_models.svg | 89 ++ doc/source/index.rst.in | 1 + doc/source/users_guide/CMakeLists.txt | 1 - .../users_guide/advanced/CMakeLists.txt | 0 doc/source/users_guide/dataspace.rst | 36 + doc/source/users_guide/overview.rst | 136 +++ 20 files changed, 2766 insertions(+), 1 deletion(-) create mode 100644 doc/source/advanced/CMakeLists.txt create mode 100644 doc/source/advanced/cpp_datastructures.rst create mode 100644 doc/source/advanced/hdf5_handlers.rst create mode 100644 doc/source/advanced/index.rst create mode 100644 doc/source/advanced/io.rst create mode 100644 doc/source/images/32bit_integer.svg create mode 100644 doc/source/images/dataspace_high_level.svg create mode 100644 doc/source/images/hdf5_selection_manager.svg create mode 100644 doc/source/images/memory_buffer_40byte.svg create mode 100644 doc/source/images/memory_storage_example.svg create mode 100644 doc/source/images/storage_models.svg delete mode 100644 doc/source/users_guide/advanced/CMakeLists.txt diff --git a/design/model.notation b/design/model.notation index 795e8025b6..5da6e25aa0 100644 --- a/design/model.notation +++ b/design/model.notation @@ -5375,4 +5375,707 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/design/model.uml b/design/model.uml index f30c1b9f13..439559fcb3 100644 --- a/design/model.uml +++ b/design/model.uml @@ -237,6 +237,10 @@ free-standing functions with the first argument being the parent or base object. Such an approach needs to be discussed to. + + + We would need something like +this for the container adapter. @@ -461,6 +465,15 @@ Such an approach needs to be discussed to. + + + + + + + + +
@@ -897,6 +910,52 @@ Such an approach needs to be discussed to. + + + + + + + + + + + + + + + + + + + + +
+ + + + + + +
+ + + + + + +
+ + + + + + +
+ + + +
diff --git a/doc/source/CMakeLists.txt b/doc/source/CMakeLists.txt index 56719166ef..66d13b228e 100644 --- a/doc/source/CMakeLists.txt +++ b/doc/source/CMakeLists.txt @@ -10,3 +10,4 @@ add_subdirectory(images) add_subdirectory(users_guide) add_subdirectory(design) add_subdirectory(api_reference) +add_subdirectory(advanced) diff --git a/doc/source/advanced/CMakeLists.txt b/doc/source/advanced/CMakeLists.txt new file mode 100644 index 0000000000..a8cacb8aac --- /dev/null +++ b/doc/source/advanced/CMakeLists.txt @@ -0,0 +1,8 @@ +set(SOURCES index.rst + io.rst + cpp_datastructures.rst + hdf5_handlers.rst + ) + +add_sphinx_source(${SOURCES}) +copy_to_current_build(${SOURCES}) \ No newline at end of file diff --git a/doc/source/advanced/cpp_datastructures.rst b/doc/source/advanced/cpp_datastructures.rst new file mode 100644 index 0000000000..54705b982b --- /dev/null +++ b/doc/source/advanced/cpp_datastructures.rst @@ -0,0 +1,8 @@ +============================ +C++ data structures and HDF5 +============================ + +.. todo:: + + Add here some content about C++ data structures and about the principal + difficulties with them when working with HDF5. \ No newline at end of file diff --git a/doc/source/advanced/hdf5_handlers.rst b/doc/source/advanced/hdf5_handlers.rst new file mode 100644 index 0000000000..6621f51e37 --- /dev/null +++ b/doc/source/advanced/hdf5_handlers.rst @@ -0,0 +1,10 @@ + +======================== +HDF5 handlers in *h5cpp* +======================== + + +.. todo:: + + Add here some content basically about :cpp:class:`hdf5::ObjectHandle` and + how it is releated with :cpp:type:`hid_t`. \ No newline at end of file diff --git a/doc/source/advanced/index.rst b/doc/source/advanced/index.rst new file mode 100644 index 0000000000..e17f19ee00 --- /dev/null +++ b/doc/source/advanced/index.rst @@ -0,0 +1,10 @@ +=============== +Advanced topics +=============== + + +.. toctree:: + :maxdepth: 1 + + io + hdf5_handlers diff --git a/doc/source/advanced/io.rst b/doc/source/advanced/io.rst new file mode 100644 index 0000000000..6df0718612 --- /dev/null +++ b/doc/source/advanced/io.rst @@ -0,0 +1,15 @@ +=========== +Advanced IO +=========== + +This section deals with advanced IO topics. IO is a complex subject and it +would be hard to discuss all nouances in the users guide where we want to +point out only the major aspects. Once you have mastered the users guide +this chapter is a good staring point for further exploring the capabilities +of *h5cpp*. + + +.. toctree:: + :maxdepth: 1 + + cpp_datastructures \ No newline at end of file diff --git a/doc/source/images/32bit_integer.svg b/doc/source/images/32bit_integer.svg new file mode 100644 index 0000000000..8018061ccb --- /dev/null +++ b/doc/source/images/32bit_integer.svg @@ -0,0 +1,94 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + diff --git a/doc/source/images/CMakeLists.txt b/doc/source/images/CMakeLists.txt index d6aff3f3bb..e9b5d71a09 100644 --- a/doc/source/images/CMakeLists.txt +++ b/doc/source/images/CMakeLists.txt @@ -13,6 +13,12 @@ set(IMAGES attribute_manager_uml.png static_dataspace.svg dynamic_dataspace_bounded.svg dynamic_dataspace_unbounded.svg + hdf5_selection_manager.svg + memory_buffer_40byte.svg + 32bit_integer.svg + dataspace_high_level.svg + storage_models.svg + memory_storage_example.svg ) add_sphinx_source(${IMAGES}) diff --git a/doc/source/images/dataspace_high_level.svg b/doc/source/images/dataspace_high_level.svg new file mode 100644 index 0000000000..a776bd19a8 --- /dev/null +++ b/doc/source/images/dataspace_high_level.svg @@ -0,0 +1,448 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/images/hdf5_selection_manager.svg b/doc/source/images/hdf5_selection_manager.svg new file mode 100644 index 0000000000..29dbf5db4e --- /dev/null +++ b/doc/source/images/hdf5_selection_manager.svg @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +SelectionManager + + + + + +Dataspace + + + 1 + + 1 + + + + + 1 + + 1 + + + + + diff --git a/doc/source/images/memory_buffer_40byte.svg b/doc/source/images/memory_buffer_40byte.svg new file mode 100644 index 0000000000..dd66a3f104 --- /dev/null +++ b/doc/source/images/memory_buffer_40byte.svg @@ -0,0 +1,142 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + 0 + 8 + 16 + 32 + 24 + + diff --git a/doc/source/images/memory_storage_example.svg b/doc/source/images/memory_storage_example.svg new file mode 100644 index 0000000000..eecb7b93d5 --- /dev/null +++ b/doc/source/images/memory_storage_example.svg @@ -0,0 +1,923 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + + + + 1 + + + + 3 + + + + 4 + + + + 2 + + + + 5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 6 + + + + 7 + + + + 9 + + + + 10 + + + + 8 + + + + 11 + + + + + + + + + + + + + + + + + + + 12 + + + + 13 + + + + 14 + + 0 + 72 + 144 + 216 + 288 + + diff --git a/doc/source/images/storage_models.svg b/doc/source/images/storage_models.svg new file mode 100644 index 0000000000..20ef19ace9 --- /dev/null +++ b/doc/source/images/storage_models.svg @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Storage + + + + + +DiskStorage + + + +MemoryStorage + + + + + + + + + + + + diff --git a/doc/source/index.rst.in b/doc/source/index.rst.in index 3a00e57084..7f0cc1bd4b 100644 --- a/doc/source/index.rst.in +++ b/doc/source/index.rst.in @@ -16,6 +16,7 @@ Contents: design/index users_guide/index api_reference/index + advanced/index Indices and tables diff --git a/doc/source/users_guide/CMakeLists.txt b/doc/source/users_guide/CMakeLists.txt index 50f90f1607..e9862c955c 100644 --- a/doc/source/users_guide/CMakeLists.txt +++ b/doc/source/users_guide/CMakeLists.txt @@ -1,4 +1,3 @@ -add_subdirectory(advanced) set(SOURCES index.rst installing.rst using.rst diff --git a/doc/source/users_guide/advanced/CMakeLists.txt b/doc/source/users_guide/advanced/CMakeLists.txt deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/doc/source/users_guide/dataspace.rst b/doc/source/users_guide/dataspace.rst index 29ebd6938d..75bd5e505d 100644 --- a/doc/source/users_guide/dataspace.rst +++ b/doc/source/users_guide/dataspace.rst @@ -193,7 +193,43 @@ STL container to obtain all simple dataspaces in a collection sdfsdfsf Selections ========== +Selections in HDF5 allow the user to read or write only specific data to or +from a file. This is particularly useful if the total size of a dataset +is too large to fit into memory or only the specific data is required +to performa particular action. + + .. figure:: ../images/hdf5_selections.svg :align: center :width: 60% + +HDF5 provides two types of selections + +* *hyperslabs* (:cpp:class:`hdf5::dataspace::Hyperslab`) which are + multidimensional selections that maybe can be compared to the complex array + slicing and indexing features that numpy arrays allow in Python +* *point selections* (:cpp:class:`hdf5::dataspace::Points`) which allow picking + individual elements from a dataset. + +All selections derive from :cpp:class:`hdf5::dataspace::Selection`. This +class basically provides a single method to apply a selection on a dataspace. + + +.. attention:: + + Currently only hyperslabs are implemented in *h5cpp*. + + +Applying a selection +-------------------- + +To apply a selection you need to + +.. figure:: ../images/hdf5_selection_manager.svg + :align: center + :width: 75% + +Hyperslabs +---------- + diff --git a/doc/source/users_guide/overview.rst b/doc/source/users_guide/overview.rst index 5b607c6b03..805e2494a2 100644 --- a/doc/source/users_guide/overview.rst +++ b/doc/source/users_guide/overview.rst @@ -131,6 +131,142 @@ remains constant once a node has been created within a particular file. * *Paths* are lists of link names referencing a node within a file * every *Node* is associated with a unique ID which remains unique even over file boundaries + +Data IO +------- + +Until now we have only considered structural aspects of an HDF5 file. The +second and most probably most important topic is data IO. +To understand how data IO works in HDF5 we first have to introduce a bit +terminology. + +From a rather high level point of view the smallest unit of information which +can be accessed by HDF5 is a *data element*. Such a *data element* can be +everything ranging from a single integer number up to complex types +consisting of nested C-structs or C++ classes. A *data element* is stored in +memory and/or on disk as a set of bits. In order to interpret these bits +correctly and reassemble the stored *data element* we need some information +about it. This information is provided in HDF5 by a *datatype*. + +*Data elements* have some logical organization. For instance we can +store these elements in a 2 dimensional array. + +.. figure:: ../images/dataspace_high_level.svg + :align: center + :width: 75% + + *data elements* of 3 numbers (could be a 3D vector) are stored in a + 2 dimensional array with 5 elements along the first and 3 elements + along the second dimensions. + +The logical layout of *data elements* is described by a *dataspace*. +Currently there are only two *dataspaces* available in HDF5 + +* a *scalar* space which can store only a single element +* and a *simple* space which is a regular n-dimensional array + (as the one above). + +*Dataspaces* and *datatypes* are the fundamental building blocks of all +objects that can store data within an HDF5 file + +* *attributes* +* and *datasets* + +For the construction of either of them you have to provide a *datatype* and +a *dataspace*. As a matter of fact, *attributes* and *datasets* are quite +similar, though a *dataset* is a *node* type and can be accessed via a +*path*. In addition, *datasets* are far more flexible than *attributes* as +we will see soon. + +We need to introduce the term *storage* as a rather abstract region of space +where we can store data and which is contiguous and linear addressable. +Technically such a *storage* can be implemented either + +* in memory (as a contiguous region of memory) +* or on disk + +where in the latter case it is not important for us how exactly the data +is stored on disk (as a single block of data within a file or scattered +over several blocks within a file). + +.. figure:: ../images/storage_models.svg + :align: center + :width: 60% + +We never have to care about the :cpp:class:`DiskStorage` this is done by the +HDF5 library. However, in some cases we need to care about the +:cpp:class:`MemoryStorage`. As far as it concerns this chapter we can consider +both to satisfiy the above constraints. + +We will have a look now how data transfer roughly works by using the above +example. For the dataset under consideration we have + +* a datatype comprising 3 double valus (3x8Bytes) and thus a total size of + 24 Bytes +* and a dataspace of shape (3,5) where the last index varies fastest. + +The :cpp:class:`MemoryStorage` of such a dataset would look like this + +.. figure:: ../images/memory_storage_example.svg + :align: center + :width: 65% + +Every data element occupies 24Byte. The numbers on the very left denote the +memory offset in byte for the very left byte in the block. In the above figure +the elements are represented in a 3x5 matrix to preserve space but in memory +they would be aligend simply one after the other. +It is the dataspace which associates the linear region of memory with a +particular shape. By default C-style ordering, last index variest fastest, +is used. As a matter of fact it is the job of the dataspace to map the +multidimensional index of a particular element onto a linear address in the +storage area. + +When data is written to disk, the content of the :cpp:class:`MemoryStorage` +is transfered to the :cpp:class:`DiskStorage`. It is important to note that +the dataspace of the latter one must not be equal to that of the memory storage. +They must only have equal size (number of data elements). In addition, the +data elements in memory must be convertable to those associated with the +file storage. The same is true for the other direction when reading data +from the disk. + +Selections and partial IO +~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the nice features of HDF5 is that we do not have to read or write the +entire data from or to the disk. This is paticularly usefull if the total +amount of data available in a dataset would not fit in the memory of the +computer which wants to access the data. +We can distinguish between + +* *point selections* where individual data elements can be picked in an + arbitrary pattern +* and *hyperslab selections* which are regular multidimensional *slices*. + *Hyperslabs* roughly compare to what you can do with array indices and + slices on numpy arrays in Python. + +With a point selection we could for instance read the elements +(0,2), (1,3) and (2,0) and store them in a either a new memory storage of +size 3 (which would be 72Bytes in total) or in a more sophisticated setup +we could map them on points (0),(5) and (11) in a 1D array in memory. + + + + + + + +.. important:: + + The following concepts are important and thus should be kept in mind + for further reading + + * a *Datatype* describes a single data element (no matter how complex it + might be) + * a *Dataspace* describes how data elemets are layed out in memory + * all data is store in *Dataspaces* and *Attributes* (the interfaces are + quite simliar but attributes have some limitations) + * *Selections* make it possible to read only a particular part of a + *Dataset* *h5cpp* a C++ wrapper for *HDF5* ================================