Skip to content

Commit cf02c39

Browse files
committed
Initial commit
0 parents  commit cf02c39

File tree

4 files changed

+285
-0
lines changed

4 files changed

+285
-0
lines changed

.gitignore

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
*~
2+
.idea
3+
*.pyc
4+
dist
5+
build
6+
docs/_build
7+
*.egg-info
8+
.tox
9+
_gitmsg.saved.txt

LICENSE

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
Copyright (c) 2020 Bálint Aradi, Universität Bremen
2+
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without modification,
6+
are permitted provided that the following conditions are met:
7+
8+
1. Redistributions of source code must retain the above copyright notice, this
9+
list of conditions and the following disclaimer.
10+
11+
2. Redistributions in binary form must reproduce the above copyright notice,
12+
this list of conditions and the following disclaimer in the documentation and/or
13+
other materials provided with the distribution.
14+
15+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

README.rst

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
************************************
2+
HSD — Human-friendly Structured Data
3+
************************************
4+
5+
This Python package contains utilities to write (and soon also to read) files in
6+
the Human-friendly Structured Data (HSD) format.
7+
8+
It is licensed under the *BSD 2-clause license*.
9+
10+
11+
The HSD format
12+
==============
13+
14+
The HSD-format is very similar to both JSON and XML, but tries to minimize the
15+
effort for humans to read and write it. It ommits special characters as much as
16+
possible but (in contrast to YAML for example) is not indentation dependent.
17+
18+
It was developed originally developed as the input format for a scientific
19+
simulation tool (DFTB+), but is absolutely general. A typical input written in
20+
HSD would look like ::
21+
22+
driver {
23+
conjugate_gradients {
24+
moved_atoms = 1 2 "7:19"
25+
max_steps = 100
26+
}
27+
}
28+
29+
hamiltonian {
30+
dftb {
31+
scc = yes
32+
scc_tolerance = 1e-10
33+
mixer {
34+
broyden {}
35+
}
36+
filling {
37+
fermi {
38+
temperature [kelvin] = 1e-8
39+
}
40+
}
41+
k_points_and_weights {
42+
supercell_folding = {
43+
2 0 0
44+
0 2 0
45+
0 0 2
46+
0.5 0.5 0.5
47+
}
48+
}
49+
}
50+
}
51+
52+
Content in HSD format can be represented as JSON. Content in JSON format can be
53+
represented as HSD, provided it satisfies a restriction for arrays: Either all
54+
elements of an array must be objects or none of them. (This allows for a clear
55+
separation of structure and data and allows for the very simple input format.)
56+
57+
Content in HSD format can be represented as XML (DOM-tree). Content in XML can
58+
be converted to HSD, provided it satisfies the restriction that every child has
59+
either data (text) or further children, but never both of them. (Again, this
60+
ensures the simplicity of the input format.)

src/hsd.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
#!/usr/bin/env python3
2+
#------------------------------------------------------------------------------#
3+
# hsd: package for manipulating HSD-formatted data #
4+
# Copyright (C) 2020 Bálint Aradi, Universität Bremen #
5+
# #
6+
# See the LICENSE file for terms of usage and distribution. #
7+
#------------------------------------------------------------------------------#
8+
#
9+
"""
10+
Provides functionality to convert Python structures to HSD
11+
"""
12+
import io
13+
import numpy as np
14+
15+
__all__ = ['dump', 'dumps']
16+
17+
18+
_INDENT_STR = " "
19+
20+
# String quoting delimiters (must be at least two)
21+
_QUOTING_CHARS = "\"'"
22+
23+
# Suffix for appending attributes
24+
_ATTRIBUTE_SUFFIX = ".attribute"
25+
26+
27+
def dump(obj, fobj):
28+
"""Serializes an object to a file in HSD format.
29+
30+
Args:
31+
obj: Object to be serialized in HSD format
32+
fobj: File like object to write the result to.
33+
"""
34+
35+
if isinstance(obj, dict):
36+
_dump_dict(obj, fobj, "")
37+
else:
38+
msg = "Invalid object type"
39+
raise TypeError(msg)
40+
41+
42+
def dumps(obj):
43+
"""Serializes an object to string in HSD format.
44+
45+
Args:
46+
obj: Object to serialize.
47+
48+
Returns:
49+
HSD formatted string.
50+
"""
51+
result = io.StringIO()
52+
dump(obj, result)
53+
return result.getvalue()
54+
55+
56+
def _dump_dict(obj, fobj, indentstr):
57+
for key, value in obj.items():
58+
if key.endswith(_ATTRIBUTE_SUFFIX):
59+
if key[:-len(_ATTRIBUTE_SUFFIX)] in obj:
60+
continue
61+
else:
62+
msg = "Attribute '{}' without corresponding tag '{}'"\
63+
.format(key, key[:-len(_ATTRIBUTE_SUFFIX)])
64+
raise ValueError(msg)
65+
attrib = obj.get(key + _ATTRIBUTE_SUFFIX)
66+
if attrib is None:
67+
attribstr = ""
68+
elif not isinstance(attrib, str):
69+
msg = "Invalid data type ({}) for '{}'"\
70+
.format(str(type(attrib)), key + ".attribute")
71+
raise ValueError(msg)
72+
else:
73+
attribstr = " [" + attrib + "]"
74+
if isinstance(value, dict):
75+
if value:
76+
fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr))
77+
_dump_dict(value, fobj, indentstr + _INDENT_STR)
78+
fobj.write("{}}}\n".format(indentstr))
79+
else:
80+
fobj.write("{}{}{} {{}}\n".format(indentstr, key, attribstr))
81+
elif isinstance(value, list) and value and isinstance(value[0], dict):
82+
for item in value:
83+
fobj.write("{}{}{} {{\n".format(indentstr, key, attribstr))
84+
_dump_dict(item, fobj, indentstr + _INDENT_STR)
85+
fobj.write("{}}}\n".format(indentstr))
86+
else:
87+
valstr = _get_hsd_rhs(value, indentstr)
88+
fobj.write("{}{}{} {}\n"\
89+
.format(indentstr, key, attribstr, valstr))
90+
91+
92+
def _get_hsd_rhs(obj, indentstr):
93+
94+
if isinstance(obj, list):
95+
objstr = _list_to_hsd(obj)
96+
elif isinstance(obj, np.ndarray):
97+
objstr = _list_to_hsd(obj.tolist())
98+
else:
99+
objstr = _item_to_hsd(obj)
100+
if "\n" in objstr:
101+
newline_indent = "\n" + indentstr + _INDENT_STR
102+
rhs = ("= {" + newline_indent + objstr.replace("\n", newline_indent)
103+
+ "\n" + indentstr + "}")
104+
else:
105+
rhs = "= " + objstr
106+
return rhs
107+
108+
109+
def _list_to_hsd(lst):
110+
if lst and isinstance(lst[0], list):
111+
lines = []
112+
for innerlist in lst:
113+
lines.append(" ".join([_item_to_hsd(item) for item in innerlist]))
114+
return "\n".join(lines)
115+
return " ".join([_item_to_hsd(item) for item in lst])
116+
117+
118+
def _item_to_hsd(item):
119+
120+
if isinstance(item, (int, float)):
121+
return str(item)
122+
elif isinstance(item, bool):
123+
return "Yes" if item else "No"
124+
elif isinstance(item, str):
125+
return _str_to_hsd(item)
126+
else:
127+
msg = "Data type {} can not be converted to HSD string"\
128+
.format(type(item))
129+
raise TypeError(msg)
130+
131+
132+
def _str_to_hsd(string):
133+
is_present = [qc in string for qc in _QUOTING_CHARS]
134+
if sum(is_present) > 1:
135+
msg = "String '{}' can not be quoted correctly".format(string)
136+
raise ValueError(msg)
137+
delimiter = _QUOTING_CHARS[0] if not is_present[0] else _QUOTING_CHARS[1]
138+
return delimiter + string + delimiter
139+
140+
141+
142+
if __name__ == "__main__":
143+
INPUT = {
144+
"Driver": {},
145+
"Hamiltonian": {
146+
"DFTB": {
147+
"Scc": True,
148+
"SccTolerance": 1e-10,
149+
"MaxSccIterations": 1000,
150+
"Mixer": {
151+
"Broyden": {}
152+
},
153+
"MaxAngularMomentum": {
154+
"O": "p",
155+
"H": "s"
156+
},
157+
"Filling": {
158+
"Fermi": {
159+
"Temperature": 1e-8,
160+
"Temperature.attribute": "Kelvin"
161+
}
162+
},
163+
"KPointsAndWeights": {
164+
"SupercellFolding": [[2, 0, 0], [0, 2, 0], [0, 0, 2],
165+
[0.5, 0.5, 0.5]]
166+
},
167+
"ElectricField": {
168+
"PointCharges": {
169+
"CoordsAndCharges": np.array(
170+
[[-0.94, -9.44, 1.2, 1.0],
171+
[-0.94, -9.44, 1.2, -1.0]])
172+
}
173+
},
174+
"SelectSomeAtoms": [1, 2, "3:-3"]
175+
}
176+
},
177+
"Analysis": {
178+
"ProjectStates": {
179+
"Region": [
180+
{
181+
"Atoms": [1, 2, 3],
182+
"Label": "region1",
183+
},
184+
{
185+
"Atoms": np.array([1, 2, 3]),
186+
"Label": "region2",
187+
}
188+
]
189+
}
190+
}
191+
}
192+
print(dumps(INPUT))

0 commit comments

Comments
 (0)