Skip to content

Commit c986747

Browse files
liambeguinTeque5
authored andcommitted
sigmffile: separate data_offset and data_size
These elements are usually checked independently, separate them into two arguments to facilitate checks. Signed-off-by: Liam Beguin <[email protected]>
1 parent 4dee2de commit c986747

File tree

3 files changed

+28
-15
lines changed

3 files changed

+28
-15
lines changed

sigmf/archivereader.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
4242
raise ValueError('In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None')
4343

4444
json_contents = None
45-
data_offset_size = None
45+
data_offset = None
46+
data_size_bytes = None
4647

4748
for memb in tar_obj.getmembers():
4849
if memb.isdir(): # memb.type == tarfile.DIRTYPE:
@@ -52,7 +53,7 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
5253
elif memb.isfile(): # memb.type == tarfile.REGTYPE:
5354
if memb.name.endswith(SIGMF_METADATA_EXT):
5455
json_contents = memb.name
55-
if data_offset_size is None:
56+
if data_offset is None:
5657
# consider a warnings.warn() here; the datafile should be earlier in the
5758
# archive than the metadata, so that updating it (like, adding an annotation)
5859
# is fast.
@@ -61,21 +62,28 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
6162
json_contents = memb_fid.read()
6263

6364
elif memb.name.endswith(SIGMF_DATASET_EXT):
64-
data_offset_size = memb.offset_data, memb.size
65+
data_offset = memb.offset_data
66+
data_size_bytes = memb.size
6567

6668
else:
6769
print('A regular file', memb.name, 'was found but ignored in the archive')
6870
else:
6971
print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.')
7072

71-
if data_offset_size is None:
73+
if data_offset is None:
7274
raise SigMFFileError('No .sigmf-data file found in archive!')
7375

7476
self.sigmffile = SigMFFile(metadata=json_contents)
7577
valid_md = self.sigmffile.validate()
7678

77-
self.sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0],
78-
size_bytes=data_offset_size[1], map_readonly=map_readonly)
79+
self.sigmffile.set_data_file(
80+
self.name,
81+
data_buffer=archive_buffer,
82+
skip_checksum=skip_checksum,
83+
offset=data_offset,
84+
size_bytes=data_size_bytes,
85+
map_readonly=map_readonly,
86+
)
7987

8088
self.ndim = self.sigmffile.ndim
8189
self.shape = self.sigmffile.shape

sigmf/sigmf_hash.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,27 @@
1010
import os
1111

1212

13-
def calculate_sha512(filename=None, fileobj=None, offset_and_size=None):
13+
def calculate_sha512(filename=None, fileobj=None, offset=None, size=None):
1414
"""
1515
Return sha512 of file or fileobj.
1616
"""
1717
the_hash = hashlib.sha512()
18+
bytes_to_hash = size
19+
bytes_read = 0
20+
1821
if filename is not None:
1922
fileobj = open(filename, "rb")
20-
if offset_and_size is None:
23+
if size is None:
2124
bytes_to_hash = os.path.getsize(filename)
2225
else:
23-
fileobj.seek(offset_and_size[0])
24-
bytes_to_hash = offset_and_size[1]
25-
bytes_read = 0
26+
fileobj.seek(offset)
27+
2628
while bytes_read < bytes_to_hash:
2729
buff = fileobj.read(min(4096, (bytes_to_hash - bytes_read)))
2830
the_hash.update(buff)
2931
bytes_read += len(buff)
32+
3033
if filename is not None:
3134
fileobj.close()
35+
3236
return the_hash.hexdigest()

sigmf/sigmffile.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ def _count_samples(self):
442442
sample_count = self._get_sample_count_from_annotations()
443443
else:
444444
header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()])
445-
file_size = path.getsize(self.data_file) if self.offset_and_size is None else self.offset_and_size[1]
445+
file_size = path.getsize(self.data_file) if self.data_size_bytes is None else self.data_size_bytes
446446
file_data_size = file_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes # bytes
447447
sample_size = self.get_sample_size() # size of a sample in bytes
448448
num_channels = self.get_num_channels()
@@ -483,9 +483,9 @@ def calculate_hash(self):
483483
"""
484484
old_hash = self.get_global_field(self.HASH_KEY)
485485
if self.data_file is not None:
486-
new_hash = sigmf_hash.calculate_sha512(self.data_file, offset_and_size=self.offset_and_size)
486+
new_hash = sigmf_hash.calculate_sha512(self.data_file, offset=self.data_offset, size=self.data_size_bytes)
487487
else:
488-
new_hash = sigmf_hash.calculate_sha512(fileobj=self.data_buffer, offset_and_size=self.offset_and_size)
488+
new_hash = sigmf_hash.calculate_sha512(fileobj=self.data_buffer, offset=self.data_offset, size=self.data_size_bytes)
489489
if old_hash:
490490
if old_hash != new_hash:
491491
raise SigMFFileError('Calculated file hash does not match associated metadata.')
@@ -503,7 +503,8 @@ def set_data_file(self, data_file=None, data_buffer=None, skip_checksum=False, o
503503

504504
self.data_file = data_file
505505
self.data_buffer = data_buffer
506-
self.offset_and_size = None if (offset == 0 and size_bytes is None) else (offset, size_bytes)
506+
self.data_offset = offset
507+
self.data_size_bytes = size_bytes
507508
self._count_samples()
508509

509510
dtype = dtype_info(self.get_global_field(self.DATATYPE_KEY))

0 commit comments

Comments
 (0)