sigmffile: separate data_offset and data_size

liambeguin · Teque5 · commit c986747a3297 · 2024-06-06T13:47:21.000-07:00
These elements are usually checked independently, separate them into two
arguments to facilitate checks.

Signed-off-by: Liam Beguin &lt;liambeguin@gmail.com&gt;
diff --git a/sigmf/archivereader.py b/sigmf/archivereader.py
@@ -42,7 +42,8 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
             raise ValueError('In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None')
 
         json_contents = None
-        data_offset_size = None
+        data_offset = None
+        data_size_bytes = None
 
         for memb in tar_obj.getmembers():
             if memb.isdir():  # memb.type == tarfile.DIRTYPE:
@@ -52,7 +53,7 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
             elif memb.isfile():  # memb.type == tarfile.REGTYPE:
                 if memb.name.endswith(SIGMF_METADATA_EXT):
                     json_contents = memb.name
-                    if data_offset_size is None:
+                    if data_offset is None:
                         # consider a warnings.warn() here; the datafile should be earlier in the
                         # archive than the metadata, so that updating it (like, adding an annotation)
                         # is fast.
@@ -61,21 +62,28 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
                         json_contents = memb_fid.read()
 
                 elif memb.name.endswith(SIGMF_DATASET_EXT):
-                    data_offset_size = memb.offset_data, memb.size
+                    data_offset = memb.offset_data
+                    data_size_bytes = memb.size
 
                 else:
                     print('A regular file', memb.name, 'was found but ignored in the archive')
             else:
                 print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.')
 
-        if data_offset_size is None:
+        if data_offset is None:
             raise SigMFFileError('No .sigmf-data file found in archive!')
 
         self.sigmffile = SigMFFile(metadata=json_contents)
         valid_md = self.sigmffile.validate()
 
-        self.sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0],
-                                     size_bytes=data_offset_size[1], map_readonly=map_readonly)
+        self.sigmffile.set_data_file(
+            self.name,
+            data_buffer=archive_buffer,
+            skip_checksum=skip_checksum,
+            offset=data_offset,
+            size_bytes=data_size_bytes,
+            map_readonly=map_readonly,
+        )
 
         self.ndim = self.sigmffile.ndim
         self.shape = self.sigmffile.shape
diff --git a/sigmf/sigmf_hash.py b/sigmf/sigmf_hash.py
@@ -10,23 +10,27 @@
 import os
 
 
-def calculate_sha512(filename=None, fileobj=None, offset_and_size=None):
+def calculate_sha512(filename=None, fileobj=None, offset=None, size=None):
     """
     Return sha512 of file or fileobj.
     """
     the_hash = hashlib.sha512()
+    bytes_to_hash = size
+    bytes_read = 0
+
     if filename is not None:
         fileobj = open(filename, "rb")
-    if offset_and_size is None:
+    if size is None:
         bytes_to_hash = os.path.getsize(filename)
     else:
-        fileobj.seek(offset_and_size[0])
-        bytes_to_hash = offset_and_size[1]
-    bytes_read = 0
+        fileobj.seek(offset)
+
     while bytes_read < bytes_to_hash:
         buff = fileobj.read(min(4096, (bytes_to_hash - bytes_read)))
         the_hash.update(buff)
         bytes_read += len(buff)
+
     if filename is not None:
         fileobj.close()
+
     return the_hash.hexdigest()
diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py
@@ -442,7 +442,7 @@ def _count_samples(self):
             sample_count = self._get_sample_count_from_annotations()
         else:
             header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()])
-            file_size = path.getsize(self.data_file) if self.offset_and_size is None else self.offset_and_size[1]
+            file_size = path.getsize(self.data_file) if self.data_size_bytes is None else self.data_size_bytes
             file_data_size = file_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes  # bytes
             sample_size = self.get_sample_size() # size of a sample in bytes
             num_channels = self.get_num_channels()
@@ -483,9 +483,9 @@ def calculate_hash(self):
         """
         old_hash = self.get_global_field(self.HASH_KEY)
         if self.data_file is not None:
-            new_hash = sigmf_hash.calculate_sha512(self.data_file, offset_and_size=self.offset_and_size)
+            new_hash = sigmf_hash.calculate_sha512(self.data_file, offset=self.data_offset, size=self.data_size_bytes)
         else:
-            new_hash = sigmf_hash.calculate_sha512(fileobj=self.data_buffer, offset_and_size=self.offset_and_size)
+            new_hash = sigmf_hash.calculate_sha512(fileobj=self.data_buffer, offset=self.data_offset, size=self.data_size_bytes)
         if old_hash:
             if old_hash != new_hash:
                 raise SigMFFileError('Calculated file hash does not match associated metadata.')
@@ -503,7 +503,8 @@ def set_data_file(self, data_file=None, data_buffer=None, skip_checksum=False, o
 
         self.data_file = data_file
         self.data_buffer = data_buffer
-        self.offset_and_size = None if (offset == 0 and size_bytes is None) else (offset, size_bytes)
+        self.data_offset = offset
+        self.data_size_bytes = size_bytes
         self._count_samples()
 
         dtype = dtype_info(self.get_global_field(self.DATATYPE_KEY))