Initial commit

aradi · aradi · commit cf02c3997324 · 2020-01-31T18:51:01.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+*~
+.idea
+*.pyc
+dist
+build
+docs/_build
+*.egg-info
+.tox
+_gitmsg.saved.txt
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2020 Bálint Aradi, Universität Bremen
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation and/or
+other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.rst b/README.rst
@@ -0,0 +1,60 @@
+************************************
+HSD — Human-friendly Structured Data
+************************************
+
+This Python package contains utilities to write (and soon also to read) files in
+the Human-friendly Structured Data (HSD) format.
+
+It is licensed under the *BSD 2-clause license*.
+
+
+The HSD format
+==============
+
+The HSD-format is very similar to both JSON and XML, but tries to minimize the
+effort for humans to read and write it. It ommits special characters as much as
+possible but (in contrast to YAML for example) is not indentation dependent.
+
+It was developed originally developed as the input format for a scientific
+simulation tool (DFTB+), but is absolutely general. A typical input written in
+HSD would look like ::
+
+  driver {
+    conjugate_gradients {
+      moved_atoms = 1 2 "7:19"
+      max_steps = 100
+    }
+  }
+
+  hamiltonian {
+    dftb {
+      scc = yes
+      scc_tolerance = 1e-10
+      mixer {
+        broyden {}
+      }
+      filling {
+        fermi {
+          temperature [kelvin] = 1e-8
+        }
+      }
+      k_points_and_weights {
+        supercell_folding = {
+          2   0   0
+          0   2   0
+          0   0   2
+          0.5 0.5 0.5
+        }
+      }
+    }
+  }
+
+Content in HSD format can be represented as JSON. Content in JSON format can be
+represented as HSD, provided it satisfies a restriction for arrays: Either all
+elements of an array must be objects or none of them. (This allows for a clear
+separation of structure and data and allows for the very simple input format.)
+
+Content in HSD format can be represented as XML (DOM-tree). Content in XML can
+be converted to HSD, provided it satisfies the restriction that every child has
+either data (text) or further children, but never both of them. (Again, this
+ensures the simplicity of the input format.)
diff --git a/src/hsd.py b/src/hsd.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+#------------------------------------------------------------------------------#
+#  hsd: package for manipulating HSD-formatted data                            #
+#  Copyright (C) 2020  Bálint Aradi, Universität Bremen                        #
+#                                                                              #
+#  See the LICENSE file for terms of usage and distribution.                   #
+#------------------------------------------------------------------------------#
+#
+"""
+Provides functionality to convert Python structures to HSD
+"""
+import io
+import numpy as np
+
+__all__ = ['dump', 'dumps']
+
+
+_INDENT_STR = "  "
+
+# String quoting delimiters (must be at least two)
+_QUOTING_CHARS = "\"'"
+
+# Suffix for appending attributes
+_ATTRIBUTE_SUFFIX = ".attribute"
+
+
+def dump(obj, fobj):
+    """Serializes an object to a file in HSD format.
+
+    Args:
+        obj: Object to be serialized in HSD format
+        fobj: File like object to write the result to.
+    """
+
+    if isinstance(obj, dict):
+        _dump_dict(obj, fobj, "")
+    else:
+        msg = "Invalid object type"
+        raise TypeError(msg)
+
+
+def dumps(obj):
+    """Serializes an object to string in HSD format.
+
+    Args:
+        obj: Object to serialize.
+
+    Returns:
+        HSD formatted string.
+    """
+    result = io.StringIO()
+    dump(obj, result)
+    return result.getvalue()
+
+
+def _dump_dict(obj, fobj, indentstr):
+    for key, value in obj.items():
+        if key.endswith(_ATTRIBUTE_SUFFIX):
+            if key[:-len(_ATTRIBUTE_SUFFIX)] in obj:
+                continue
+            else:
+                msg = "Attribute '{}' without corresponding tag '{}'"\
+                      .format(key, key[:-len(_ATTRIBUTE_SUFFIX)])
+                raise ValueError(msg)
+        attrib = obj.get(key + _ATTRIBUTE_SUFFIX)
+        if attrib is None:
+            attribstr = ""
+        elif not isinstance(attrib, str):
+            msg = "Invalid data type ({}) for '{}'"\
+                  .format(str(type(attrib)), key + ".attribute")
+            raise ValueError(msg)
+        else:
+            attribstr = " [" + attrib + "]"
+        if isinstance(value, dict):
+            if value:
+                fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr))
+                _dump_dict(value, fobj, indentstr + _INDENT_STR)
+                fobj.write("{}}}\n".format(indentstr))
+            else:
+                fobj.write("{}{}{} {{}}\n".format(indentstr, key, attribstr))
+        elif isinstance(value, list) and value and isinstance(value[0], dict):
+            for item in value:
+                fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr))
+                _dump_dict(item, fobj, indentstr + _INDENT_STR)
+                fobj.write("{}}}\n".format(indentstr))
+        else:
+            valstr = _get_hsd_rhs(value, indentstr)
+            fobj.write("{}{}{} {}\n"\
+                     .format(indentstr, key, attribstr, valstr))
+
+
+def _get_hsd_rhs(obj, indentstr):
+
+    if isinstance(obj, list):
+        objstr = _list_to_hsd(obj)
+    elif isinstance(obj, np.ndarray):
+        objstr = _list_to_hsd(obj.tolist())
+    else:
+        objstr = _item_to_hsd(obj)
+    if "\n" in objstr:
+        newline_indent = "\n" + indentstr + _INDENT_STR
+        rhs = ("= {" + newline_indent + objstr.replace("\n", newline_indent)
+               + "\n" + indentstr + "}")
+    else:
+        rhs = "= " + objstr
+    return rhs
+
+
+def _list_to_hsd(lst):
+    if lst and isinstance(lst[0], list):
+        lines = []
+        for innerlist in lst:
+            lines.append(" ".join([_item_to_hsd(item) for item in innerlist]))
+        return "\n".join(lines)
+    return " ".join([_item_to_hsd(item) for item in lst])
+
+
+def _item_to_hsd(item):
+
+    if isinstance(item, (int, float)):
+        return str(item)
+    elif isinstance(item, bool):
+        return "Yes" if item else "No"
+    elif isinstance(item, str):
+        return _str_to_hsd(item)
+    else:
+        msg = "Data type {} can not be converted to HSD string"\
+              .format(type(item))
+        raise TypeError(msg)
+
+
+def _str_to_hsd(string):
+    is_present = [qc in string for qc in _QUOTING_CHARS]
+    if sum(is_present) > 1:
+        msg = "String '{}' can not be quoted correctly".format(string)
+        raise ValueError(msg)
+    delimiter = _QUOTING_CHARS[0] if not is_present[0] else _QUOTING_CHARS[1]
+    return delimiter + string + delimiter
+
+
+
+if __name__ == "__main__":
+    INPUT = {
+        "Driver": {},
+        "Hamiltonian": {
+            "DFTB": {
+                "Scc": True,
+                "SccTolerance": 1e-10,
+                "MaxSccIterations": 1000,
+                "Mixer": {
+                    "Broyden": {}
+                },
+                "MaxAngularMomentum": {
+                    "O": "p",
+                    "H": "s"
+                },
+                "Filling": {
+                    "Fermi": {
+                        "Temperature": 1e-8,
+                        "Temperature.attribute": "Kelvin"
+                    }
+                },
+                "KPointsAndWeights": {
+                    "SupercellFolding": [[2, 0, 0], [0, 2, 0], [0, 0, 2],
+                                         [0.5, 0.5, 0.5]]
+                },
+                "ElectricField": {
+                    "PointCharges": {
+                        "CoordsAndCharges": np.array(
+                            [[-0.94, -9.44, 1.2, 1.0],
+                             [-0.94, -9.44, 1.2, -1.0]])
+                    }
+                },
+                "SelectSomeAtoms": [1, 2, "3:-3"]
+            }
+        },
+        "Analysis": {
+            "ProjectStates": {
+                "Region": [
+                    {
+                        "Atoms": [1, 2, 3],
+                        "Label": "region1",
+                    },
+                    {
+                        "Atoms": np.array([1, 2, 3]),
+                        "Label": "region2",
+                    }
+                ]
+            }
+        }
+    }
+    print(dumps(INPUT))