Skip to content

Commit c32f18f

Browse files
liambeguinTeque5
authored andcommitted
archive: use BytesIO to store data file
When opening a sigmf archive with SigMFArchiveReader(), the data file is currently set to the full archive (including metadata). This causes issues when writing the archive back to disk and invalidates the metadata hash since the data_file is now a tar archive and not just the set of samples. To work around the issue, carry around a BytesIO buffer with the content of the data_file and write it to the tmpdir just before saving a new archive to disk. Signed-off-by: Liam Beguin <[email protected]>
1 parent c986747 commit c32f18f

File tree

3 files changed

+28
-11
lines changed

3 files changed

+28
-11
lines changed

sigmf/archive.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"""Create and extract SigMF archives."""
88

99
import os
10+
import io
1011
import shutil
1112
import tarfile
1213
import tempfile
@@ -72,7 +73,12 @@ def __init__(self, sigmffile, name=None, fileobj=None):
7273
with open(sigmf_md_path, "w") as mdfile:
7374
self.sigmffile.dump(mdfile, pretty=True)
7475

75-
shutil.copy(self.sigmffile.data_file, sigmf_data_path)
76+
if isinstance(self.sigmffile.data_buffer, io.BytesIO):
77+
self.sigmffile.data_file = sigmf_data_path
78+
with open(sigmf_data_path, 'wb') as f:
79+
f.write(self.sigmffile.data_buffer.getbuffer())
80+
else:
81+
shutil.copy(self.sigmffile.data_file, sigmf_data_path)
7682

7783
def chmod(tarinfo):
7884
if tarinfo.isdir():
@@ -110,7 +116,7 @@ def _ensure_name_has_correct_extension(self):
110116
self.name = name if has_correct_extension else name + SIGMF_ARCHIVE_EXT
111117

112118
def _ensure_data_file_set(self):
113-
if not self.sigmffile.data_file:
119+
if not self.sigmffile.data_file and not isinstance(self.sigmffile.data_buffer, io.BytesIO):
114120
err = "no data file - use `set_data_file`"
115121
raise SigMFFileError(err)
116122

sigmf/archivereader.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
"""Access SigMF archives without extracting them."""
88

99
import os
10+
import io
1011
import shutil
1112
import tarfile
1213
import tempfile
14+
from pathlib import Path
1315

1416
from . import __version__
1517
from .archive import SIGMF_ARCHIVE_EXT, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SigMFArchive
@@ -64,6 +66,8 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
6466
elif memb.name.endswith(SIGMF_DATASET_EXT):
6567
data_offset = memb.offset_data
6668
data_size_bytes = memb.size
69+
with tar_obj.extractfile(memb) as memb_fid:
70+
data_buffer = io.BytesIO(memb_fid.read())
6771

6872
else:
6973
print('A regular file', memb.name, 'was found but ignored in the archive')
@@ -77,10 +81,8 @@ def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_bu
7781
valid_md = self.sigmffile.validate()
7882

7983
self.sigmffile.set_data_file(
80-
self.name,
81-
data_buffer=archive_buffer,
84+
data_buffer=data_buffer,
8285
skip_checksum=skip_checksum,
83-
offset=data_offset,
8486
size_bytes=data_size_bytes,
8587
map_readonly=map_readonly,
8688
)

sigmf/sigmffile.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from os import path
1616

1717
import numpy as np
18+
import io
1819

1920
from . import __specification__, __version__, schema, sigmf_hash, validate
2021
from .archive import SIGMF_ARCHIVE_EXT, SIGMF_COLLECTION_EXT, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SigMFArchive
@@ -246,7 +247,7 @@ def _is_conforming_dataset(self):
246247
# check for any non-zero `header_bytes` fields in captures segments
247248
if capture.get(self.HEADER_BYTES_KEY, 0):
248249
return False
249-
if not path.isfile(self.data_file):
250+
if self.data_file is not None and not path.isfile(self.data_file):
250251
return False
251252
# if we get here, the file exists and is conforming
252253
return True
@@ -631,7 +632,7 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F
631632
raise IOError('Number of samples must be greater than zero, or -1 for all samples.')
632633
elif start_index + count > self.sample_count:
633634
raise IOError("Cannot read beyond EOF.")
634-
if self.data_file is None:
635+
if self.data_file is None and not isinstance(self.data_buffer, io.BytesIO):
635636
if self.get_global_field(self.METADATA_ONLY_KEY, False):
636637
# only if data_file is `None` allows access to dynamically generated datsets
637638
raise SigMFFileError("Cannot read samples from a metadata only distribution.")
@@ -658,9 +659,15 @@ def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
658659
data_type_out = np.dtype("f4") if not self.is_complex_data else np.dtype("f4, f4")
659660
num_channels = self.get_num_channels()
660661

661-
fp = open(self.data_file, "rb")
662-
fp.seek(first_byte, 0)
663-
data = np.fromfile(fp, dtype=data_type_in, count=nitems)
662+
if self.data_file is not None:
663+
fp = open(self.data_file, "rb")
664+
fp.seek(first_byte, 0)
665+
data = np.fromfile(fp, dtype=data_type_in, count=nitems)
666+
elif self.data_buffer is not None:
667+
data = np.frombuffer(self.data_buffer.getbuffer(), dtype=data_type_in, count=nitems)
668+
else:
669+
data = self._memmap
670+
664671
if num_channels != 1:
665672
# return reshaped view for num_channels
666673
# first dimension will be double size if `is_complex_data`
@@ -678,7 +685,9 @@ def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
678685
else:
679686
data = data.view(component_type_in)
680687

681-
fp.close()
688+
if self.data_file is not None:
689+
fp.close()
690+
682691
return data
683692

684693

0 commit comments

Comments
 (0)