-
Notifications
You must be signed in to change notification settings - Fork 15
/
split.py
executable file
·104 lines (72 loc) · 2.36 KB
/
split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python
"""
Split hdf5 file
"""
import os
import sys
from collections import OrderedDict
from parser import get_args_split as parser
import msg
import hdf5
import check
def generate_filelist(prefix, old_size, new_size):
"""Generate filenames for output files
and return as a dict (file: [begin, end]).
Keyword arguments:
prefix -- common path/to/basename
old_size -- size of input hdf5 files
new_size -- requested size for output hdf5 files
"""
if new_size >= old_size:
msg.error("Use splitter wisely...")
sys.exit(1)
nof_files, leftover = old_size // new_size, old_size % new_size
files = OrderedDict()
for i in range(nof_files + int(leftover > 0)):
filename = "%(prefix)s_%(id)03d.hdf5" % {"prefix": prefix, "id": i}
begin = i * new_size
end = (i + 1) * new_size if i < nof_files else i * new_size + leftover
files[filename] = [begin, end]
return files
def generate_uneven_filelist(path, new_sizelist):
"""
Generate filenames for output files and return as a dict
(file: [begin, end]).
Keyword arguments:
path -- common path
new_sizelist -- list of tuples for the files (name, (start, stop))
"""
files = OrderedDict()
for tup in new_sizelist:
filename = path + tup[0]
start_stop_tup = tup[1]
start_idx = start_stop_tup[0]
stop_idx = start_stop_tup[1]
files[filename] = [start_idx, stop_idx]
return files
def save_filelist(filename, filelist):
"""Save the list of created files.
Keyword arguments:
filename -- the path to txt file
filelist -- the list of files
"""
f = open(filename, 'w')
for fn in filelist:
print(os.path.abspath(fn), file=f)
f.close()
if __name__ == '__main__':
msg.box("HDF5 MANIPULATOR: SPLIT")
args = parser()
data = hdf5.load(args.input)
print("The following datasets were found in %s:\n" % args.input)
msg.list_dataset(data)
filelist = generate_filelist(
args.prefix or os.path.splitext(args.input)[0],
check.get_size(data), int(args.size))
print("\nSaving output files:\n")
for f, r in filelist.items():
msg.list_fileinfo(f, r)
hdf5.save_subset(f, data, r[0], r[1])
if args.filelist:
save_filelist(args.filelist, filelist.keys())
msg.info("Done")