Skip to content

Commit

Permalink
feat(table): add mean and median values
Browse files Browse the repository at this point in the history
  • Loading branch information
camball committed Mar 15, 2024
1 parent cd60185 commit db859be
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 22 deletions.
37 changes: 28 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@ Calculate the word error rate (WER) from provided correct and actual text file(s
$ wer --expected Expected --actual Actual # usage comparing folders of corresponding files

+-----------------+-----------------------+---------+-----------+
| Filename | Word Error Rate (WER) | % Error | % Success |
| Filename | Word Error Rate (WER) | % Error | % Success |
+-----------------+-----------------------+---------+-----------+
| test_data_1.txt | 0.3157894736842105 | 31.58% | 68.42% |
| test_data_2.txt | 0.3684210526315789 | 36.84% | 63.16% |
| test_data_1.txt | 0.3157894736842105 | 31.58% | 68.42% |
| test_data_2.txt | 0.3684210526315789 | 36.84% | 63.16% |
| test_data_3.txt | 0.14285714285714285 | 14.29% | 85.71% |
+-----------------+-----------------------+---------+-----------+
| Mean: | 0.2756892230576441 | 27.57% | 72.43% |
| Median: | 0.3157894736842105 | 31.58% | 68.42% |
+-----------------+-----------------------+---------+-----------+

$ wer --expected expected.txt --actual actual.txt # usage comparing single files
Expand All @@ -22,9 +26,16 @@ Percent Success: 68.42%
## Installation

```sh
# clone on your machine
git clone https://github.com/camball/word-error-rate-cli

# enter the install directory
cd word-error-rate-cli/

# install
pip3 install .

# use the program
wer --expected Expected --actual Actual
```

Expand All @@ -45,19 +56,27 @@ To handle this, the program allows you to pass a custom regex matcher, where any
$ wer -e Expected -a Actual

+-----------------+-----------------------+---------+-----------+
| Filename | Word Error Rate (WER) | % Error | % Success |
| Filename | Word Error Rate (WER) | % Error | % Success |
+-----------------+-----------------------+---------+-----------+
| test_data_1.txt | 0.2727272727272727 | 27.27% | 72.73% |
| test_data_2.txt | 0.3181818181818182 | 31.82% | 68.18% |
| test_data_3.txt | 0.125 | 12.50% | 87.50% |
+-----------------+-----------------------+---------+-----------+
| test_data_1.txt | 0.2727272727272727 | 27.27% | 72.73% |
| test_data_2.txt | 0.3181818181818182 | 31.82% | 68.18% |
| Mean: | 0.23863636363636362 | 23.86% | 76.14% |
| Median: | 0.2727272727272727 | 27.27% | 72.73% |
+-----------------+-----------------------+---------+-----------+

$ wer -e Expected -a Actual -i "^(?:Agent|Customer):"

+-----------------+-----------------------+---------+-----------+
| Filename | Word Error Rate (WER) | % Error | % Success |
| Filename | Word Error Rate (WER) | % Error | % Success |
+-----------------+-----------------------+---------+-----------+
| test_data_1.txt | 0.3157894736842105 | 31.58% | 68.42% |
| test_data_2.txt | 0.3684210526315789 | 36.84% | 63.16% |
| test_data_3.txt | 0.14285714285714285 | 14.29% | 85.71% |
+-----------------+-----------------------+---------+-----------+
| test_data_1.txt | 0.3157894736842105 | 31.58% | 68.42% |
| test_data_2.txt | 0.3684210526315789 | 36.84% | 63.16% |
| Mean: | 0.2756892230576441 | 27.57% | 72.43% |
| Median: | 0.3157894736842105 | 31.58% | 68.42% |
+-----------------+-----------------------+---------+-----------+
```

Expand Down
1 change: 1 addition & 0 deletions test_data/Actual/test_data_3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Agent: I won't go to the stores.
1 change: 1 addition & 0 deletions test_data/Expected/test_data_3.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Agent: I will not go to the store.
36 changes: 23 additions & 13 deletions wer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from jiwer import wer as _real_wer, transforms as tr
from pathlib import Path
from prettytable import PrettyTable
from statistics import mean, median
import sys


Expand Down Expand Up @@ -85,7 +86,7 @@ def wer(
actual_path: Path,
enforce_file_length_check: bool,
regex_to_ignore: str,
):
) -> float:
return _real_wer(
reference=lines_from_file(expected_path),
hypothesis=lines_from_file(actual_path),
Expand Down Expand Up @@ -135,27 +136,36 @@ def main():
)
}

table = PrettyTable()
table.field_names = [
"Filename",
"Word Error Rate (WER)",
r"% Error",
r"% Success",
]
columns = {
"Filename": "r",
"Word Error Rate (WER)": "l",
r"% Error": "r",
r"% Success": "r",
}
table = PrettyTable(columns.keys())
for filename, alignment in columns.items():
table.align[filename] = alignment

for filename, word_error_rate in word_error_rates.items():
for idx, (filename, word_error_rate) in enumerate(word_error_rates.items()):
table.add_row(
[
filename,
word_error_rate,
f"{word_error_rate:.2%}",
f"{1-word_error_rate:.2%}",
]
],
divider=idx == len(word_error_rates) - 1,
)

print()
print(table)
print()
wer_mean = mean(word_error_rates.values())
wer_median = median(word_error_rates.values())

table.add_row(["Mean:", wer_mean, f"{wer_mean:.2%}", f"{1-wer_mean:.2%}"])
table.add_row(
["Median:", wer_median, f"{wer_median:.2%}", f"{1-wer_median:.2%}"]
)

print(f"\n{table}\n")

elif not expected_path.exists() or not actual_path.exists():
raise FileNotFoundError(
Expand Down

0 comments on commit db859be

Please sign in to comment.