Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
68b794d
added .idea to the gitignore for pycharm developerse
cparcerisas Oct 24, 2024
8417d19
Get metadata from wav file instead of xml
bramcuyx Nov 12, 2024
b568260
debugging
bramcuyx Nov 12, 2024
374f1b0
Fixed the start of the deployment being overridden by the start of ea…
bramcuyx Nov 13, 2024
13bf8f5
log faulty wav files and break loop
bramcuyx Nov 13, 2024
4cee670
Printing the amount of erroneous files
bramcuyx Nov 13, 2024
0e29dc9
Testing with a differen wav header reader
bramcuyx Nov 13, 2024
f4e3d3e
prior to pull request, logs faulty file but then halts execution
bramcuyx Nov 13, 2024
ab5f8d9
Raise an error in faulty files
bramcuyx Nov 13, 2024
e6038df
Merge branch 'main' of https://github.com/lifewatch/pbp
cparcerisas Jan 20, 2025
4bb0267
Merge branch 'main' of https://github.com/lifewatch/pbp
cparcerisas Apr 1, 2025
f3c380a
added an expection when parsing the xml file for defectuous files
cparcerisas Oct 24, 2024
c1ed422
Get metadata from wav file instead of xml
bramcuyx Nov 12, 2024
cd4fe8e
debugging
bramcuyx Nov 12, 2024
cb8bfd6
Fixed the start of the deployment being overridden by the start of ea…
bramcuyx Nov 13, 2024
fb28eeb
log faulty wav files and break loop
bramcuyx Nov 13, 2024
4e71eab
Printing the amount of erroneous files
bramcuyx Nov 13, 2024
897c3ee
Testing with a differen wav header reader
bramcuyx Nov 13, 2024
fdabd63
prior to pull request, logs faulty file but then halts execution
bramcuyx Nov 13, 2024
2defd19
Raise an error in faulty files
bramcuyx Nov 13, 2024
9ac60f0
Merge branch 'wav-metadata' of https://github.com/lifewatch/pbp into …
cparcerisas Apr 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.env
.idea/
pbp-doc/site/
output*/
cloud_tmp*/
Expand Down
24 changes: 8 additions & 16 deletions pbp/meta_gen/gen_soundtrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def run(self):
# Set the start and end dates to 1 day before and after the start and end dates
start_dt = self.start - timedelta(days=1)
end_dt = self.end + timedelta(days=1)

errorcount = 0
if scheme == "file":
parsed_uri = urllib.parse.urlparse(self.audio_loc)

Expand All @@ -100,36 +100,28 @@ def run(self):
sorted(wav_path.rglob("*.wav")), prefix="Searching : "
):
wav_path = filename.parent / f"{filename.stem}.wav"
xml_path = Path(self.xml_dir + "/" + f"{filename.stem}.log.xml")
#xml_path = Path(self.xml_dir + "/" + f"{filename.stem}.log.xml")
start_dt = get_datetime(wav_path, self.prefixes)

# Must have a start date to be valid and also must have a corresponding xml file
if (
start_dt and xml_path.exists() and start_dt <= start_dt <= end_dt
start_dt and (self.start-timedelta(days=1)) <= start_dt <= end_dt
): # TODO : Saying that a str object can not have an .exists()
wav_files.append(
SoundTrapWavFile(wav_path.as_posix(), xml_path, start_dt)
SoundTrapWavFile(wav_path.as_posix(), start_dt)
)
else:
if not xml_path.exists():
self.log.error(
"The path set by --xml-dir :"
+ str(xml_path)
+ " could not be located at the user specified directory."
)

else:
# if the audio_loc is a s3 url, then we need to list the files in buckets that cover the start and end
# dates
self.log.debug(f"Searching between {start_dt} and {end_dt}")
self.log.debug(f"Searching between {self.start-timedelta(days=1)} and {end_dt}")

client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
paginator = client.get_paginator("list_objects")

operation_parameters = {"Bucket": bucket}
page_iterator = paginator.paginate(**operation_parameters)
self.log.info(
f"Searching in bucket: {bucket} for .wav and .xml files between {start_dt} and {end_dt}"
f"Searching in bucket: {bucket} for .wav and .xml files between {self.start-timedelta(days=1)} and {end_dt}"
)

# list the objects in the bucket
Expand All @@ -150,15 +142,15 @@ def run(self):
try:
self.log.debug(f"Downloading {key_xml} ...")
client.download_file(bucket, key_xml, xml_path)
wav_files.append(SoundTrapWavFile(uri, xml_path, key_dt))
wav_files.append(SoundTrapWavFile(uri, key_dt))
except Exception as ex:
self.log.error(
f"Could not download {key_xml} - {str(ex)}"
)
continue

self.log.info(
f"Found {len(wav_files)} files to process that covers the expanded period {start_dt} - {end_dt}"
f"Found {len(wav_files)} files to process that covers the expanded period {self.start-timedelta(days=1)} - {end_dt}"
)

if len(wav_files) == 0:
Expand Down
61 changes: 23 additions & 38 deletions pbp/meta_gen/meta_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def get_max_freq(self):


class SoundTrapWavFile(AudioFile):
def __init__(self, path_or_url: str, xml_file: str, start: datetime):
def __init__(self, path_or_url: str, start: datetime):
"""
SoundTrapWavFile uses the metadata from the xml files, not the wav file itself
:param path_or_url:
Expand All @@ -88,46 +88,31 @@ def __init__(self, path_or_url: str, xml_file: str, start: datetime):
:param start:
"""
super().__init__(path_or_url, start)
tree = ET.parse(xml_file)
root = tree.getroot()
wav_start_dt = None
wav_stop_dt = None
sample_count = None
sample_rate = 48_000 # default sample rate

# Iterate over the XML elements grabbing the needed metadata values
for element in root.iter("CFG"):
if element.get("ID") == "4":
value = element.find("FS")
if value is not None:
sample_rate = int(value.text) # type: ignore[arg-type]

for element in root.iter("WavFileHandler"):
value = element.get("SamplingStartTimeUTC")
if value:
wav_start_dt = datetime.strptime(value, "%Y-%m-%dT%H:%M:%S")

value = element.get("SamplingStopTimeUTC")
if value:
wav_stop_dt = datetime.strptime(value, "%Y-%m-%dT%H:%M:%S")

value = element.get("SampleCount")
if value:
sample_count = int(value)

# Error checking
if not wav_start_dt or not wav_stop_dt or not sample_count:
raise ValueError(f"Error reading {xml_file}. Missing metadata")

self.path_or_url = path_or_url
self.start = wav_start_dt
self.end = wav_stop_dt
self.duration_secs = sample_count / sample_rate
self.fs = sample_rate
self.frames = sample_count
self.channels = 1
self.start = start
self.end = None
self.duration_secs = None
self.fs = None
self.frames = None
self.channels = None
self.subtype = "SoundTrap"
self.exception = "" # no exceptions for SoundTrap files
try:
self._read_wav_metadata()
except ValueError as ex:
raise ValueError(f"Error reading {self.path_or_url}. {ex}")

def _read_wav_metadata(self):
# read the wav file to get the metadata
metadata = sf.info(self.path_or_url)
self.fs = metadata.samplerate
self.frames = metadata.frames
self.channels = metadata.channels
self.duration_secs = metadata.duration
self.end = self.start + timedelta(seconds=self.duration_secs)

if not self.start or not self.end or not self.frames:
raise ValueError(f"Error reading {self.path_or_url}. Faulty wavfile")


class GenericWavFile(AudioFile):
Expand Down