Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add der md_eval_22 #43

Merged
merged 10 commits into from
Dec 7, 2023
2 changes: 1 addition & 1 deletion .github/workflows/noneeditable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: '>=3.10'
python-version: '3.11'
cache: 'pip'
- name: Install dependencies
run: |
Expand Down
Empty file added meeteval/der/__init__.py
Empty file.
14 changes: 14 additions & 0 deletions meeteval/der/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

def cli():
from meeteval.wer.__main__ import CLI
from meeteval.der.md_eval import md_eval_22

cli = CLI()

cli.add_command(md_eval_22)

cli.run()


if __name__ == '__main__':
cli()
175 changes: 175 additions & 0 deletions meeteval/der/md_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import logging
import re
import decimal
import shutil
import tempfile
import subprocess
import dataclasses
from pathlib import Path

import meeteval.io
from meeteval.wer.wer.error_rate import ErrorRate


def _fix_channel(r):
return meeteval.io.rttm.RTTM([
line.replace(channel='1') # Thilo puts there usually some random value, e.g. <NA> for hyp and 0 for ref, while common default is 1
thequilo marked this conversation as resolved.
Show resolved Hide resolved
for line in r
])


@dataclasses.dataclass(frozen=True)
class DiaErrorRate:
"""

"""
error_rate: 'float | decimal.Decimal'
thequilo marked this conversation as resolved.
Show resolved Hide resolved

scored_speaker_time: 'float | decimal.Decimal'
missed_speaker_time: 'float | decimal.Decimal' # deletions
falarm_speaker_time: 'float | decimal.Decimal' # insertions
speaker_error_time: 'float | decimal.Decimal' # substitutions

@classmethod
def zero(cls):
return cls(0, 0, 0, 0, 0)

def __post_init__(self):
assert self.scored_speaker_time >= 0
assert self.missed_speaker_time >= 0
assert self.falarm_speaker_time >= 0
assert self.speaker_error_time >= 0
errors = self.speaker_error_time + self.falarm_speaker_time + self.missed_speaker_time
error_rate = decimal.Decimal(errors / self.scored_speaker_time)
if self.error_rate is None:
object.__setattr__(self, 'error_rate', error_rate)
else:
# decimal.Decimal.quantize rounds to the same number of digits as self.error_rate has.
error_rate = error_rate.quantize(self.error_rate)
assert self.error_rate == error_rate, (error_rate, self)

def __radd__(self, other: 'int') -> 'ErrorRate':
if isinstance(other, int) and other == 0:
# Special case to support sum.
return self
return NotImplemented

def __add__(self, other: 'DiaErrorRate'):
if not isinstance(other, self.__class__):
raise ValueError()

return DiaErrorRate(
error_rate=None,
scored_speaker_time=self.scored_speaker_time + other.scored_speaker_time,
missed_speaker_time=self.missed_speaker_time + other.missed_speaker_time,
falarm_speaker_time=self.falarm_speaker_time + other.falarm_speaker_time,
speaker_error_time=self.speaker_error_time + other.speaker_error_time,
)


def md_eval_22(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you split this into a Python function that takes RTTM objects and the cli function that takes file paths?

For WERs, the cli functions are defined in __main__.py.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conclusion after offline discussion:
It is possible to split the md_eval_22 function, but the code either gets ugly or differs from the WER code.
So we made this function private and for now, md_eval_22 has only a CLI interface in meeteval and no python interface.

reference,
hypothesis,
average_out='{parent}/{stem}_md_eval_22.json',
per_reco_out='{parent}/{stem}_md_eval_22_per_reco.json',
collar=0,
regex=None,
):
from meeteval.wer.__main__ import _load_texts

r, _, h, hypothesis_paths = _load_texts(
reference, hypothesis, regex)

r = _fix_channel(r.to_rttm())
h = _fix_channel(h.to_rttm())

r = r.grouped_by_filename()
h = h.grouped_by_filename()

keys = set(r.keys()) & set(h.keys())
missing = set(r.keys()) ^ set(h.keys())
if len(missing) > 0:
logging.warning(f'Missing {len(missing)} filenames:', missing)
logging.warning(f'Found {len(keys)} filenames:', keys)

md_eval_22 = shutil.which('md-eval-22.pl')
if not md_eval_22:
md_eval_22 = Path(__file__).parent / 'md-eval-22.pl'
if md_eval_22.exists():
pass
else:
url = 'https://github.com/nryant/dscore/raw/master/scorelib/md-eval-22.pl'
logging.info(f'md-eval-22.pl not found. Trying to download it from {url}.')
import urllib.request
urllib.request.urlretrieve(url, md_eval_22)
logging.info(f'Wrote {md_eval_22}')

def get_details(r, h, key, tmpdir):
r_file = tmpdir / f'{key}.ref.rttm'
h_file = tmpdir / f'{key}.hyp.rttm'
r.dump(r_file)
h.dump(h_file)

cmd = [
'perl', f'{md_eval_22}',
'-c', f'{collar}',
'-r', f'{r_file}',
'-s', f'{h_file}',
]

cp = subprocess.run(cmd, stdout=subprocess.PIPE,
check=True, universal_newlines=True)
# SCORED SPEAKER TIME =4309.340250 secs
# MISSED SPEAKER TIME =4309.340250 secs
# FALARM SPEAKER TIME =0.000000 secs
# SPEAKER ERROR TIME =0.000000 secs
# OVERALL SPEAKER DIARIZATION ERROR = 100.00 percent of scored speaker time `(ALL)

error_rate, = re.findall(r'OVERALL SPEAKER DIARIZATION ERROR = ([\d.]+) percent of scored speaker time', cp.stdout)
length, = re.findall(r'SCORED SPEAKER TIME =([\d.]+) secs', cp.stdout)
deletions, = re.findall(r'MISSED SPEAKER TIME =([\d.]+) secs', cp.stdout)
insertions, = re.findall(r'FALARM SPEAKER TIME =([\d.]+) secs', cp.stdout)
substitutions, = re.findall(r'SPEAKER ERROR TIME =([\d.]+) secs', cp.stdout)

def convert(string):
return decimal.Decimal(string)

details = dict(
scored_speaker_time=convert(length),
# errors=float(errors),
boeddeker marked this conversation as resolved.
Show resolved Hide resolved
missed_speaker_time=convert(deletions),
falarm_speaker_time=convert(insertions),
speaker_error_time=convert(substitutions),
error_rate=convert(error_rate) / 100,
)
return DiaErrorRate(**details)
boeddeker marked this conversation as resolved.
Show resolved Hide resolved

per_reco = {}
with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = Path(tmpdir)
for key in keys:
per_reco[key] = get_details(r[key], h[key], key, tmpdir)

md_eval = get_details(
meeteval.io.RTTM([line for key in keys for line in r[key]]),
meeteval.io.RTTM([line for key in keys for line in h[key]]),
'',
tmpdir,
)
summary = sum(per_reco.values())
error_rate = summary.error_rate.quantize(md_eval.error_rate)
if error_rate != md_eval.error_rate:
raise RuntimeError(
f'The error rate of md-eval-22.pl on all recordings '
f'({summary.error_rate})\n'
f'does not match the average error rate of md-eval-22.pl '
f'applied to each recording ({md_eval.error_rate}).'
)

from meeteval.wer.__main__ import _save_results
_save_results(per_reco, hypothesis_paths, per_reco_out, average_out)


if __name__ == '__main__':
boeddeker marked this conversation as resolved.
Show resolved Hide resolved
import fire
fire.Fire(md_eval_22)
Loading