feat(table): add mean and median values

camball · Mar 15, 2024 · db859be · db859be
1 parent cd60185
commit db859be
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -6,10 +6,14 @@ Calculate the word error rate (WER) from provided correct and actual text file(s
 $ wer --expected Expected --actual Actual  # usage comparing folders of corresponding files
 
 +-----------------+-----------------------+---------+-----------+
-|     Filename    | Word Error Rate (WER) | % Error | % Success |
+|        Filename | Word Error Rate (WER) | % Error | % Success |
 +-----------------+-----------------------+---------+-----------+
-| test_data_1.txt |   0.3157894736842105  |  31.58% |   68.42%  |
-| test_data_2.txt |   0.3684210526315789  |  36.84% |   63.16%  |
+| test_data_1.txt | 0.3157894736842105    |  31.58% |    68.42% |
+| test_data_2.txt | 0.3684210526315789    |  36.84% |    63.16% |
+| test_data_3.txt | 0.14285714285714285   |  14.29% |    85.71% |
++-----------------+-----------------------+---------+-----------+
+|           Mean: | 0.2756892230576441    |  27.57% |    72.43% |
+|         Median: | 0.3157894736842105    |  31.58% |    68.42% |
 +-----------------+-----------------------+---------+-----------+
 
 $ wer --expected expected.txt --actual actual.txt  # usage comparing single files
@@ -22,9 +26,16 @@ Percent Success:        68.42%
 ## Installation
 
 ```sh
+# clone on your machine
 git clone https://github.com/camball/word-error-rate-cli
+
+# enter the install directory
 cd word-error-rate-cli/
+
+# install
 pip3 install .
+
+# use the program
 wer --expected Expected --actual Actual
 ```
 
@@ -45,19 +56,27 @@ To handle this, the program allows you to pass a custom regex matcher, where any
 $ wer -e Expected -a Actual
 
 +-----------------+-----------------------+---------+-----------+
-|     Filename    | Word Error Rate (WER) | % Error | % Success |
+|        Filename | Word Error Rate (WER) | % Error | % Success |
++-----------------+-----------------------+---------+-----------+
+| test_data_1.txt | 0.2727272727272727    |  27.27% |    72.73% |
+| test_data_2.txt | 0.3181818181818182    |  31.82% |    68.18% |
+| test_data_3.txt | 0.125                 |  12.50% |    87.50% |
 +-----------------+-----------------------+---------+-----------+
-| test_data_1.txt |   0.2727272727272727  |  27.27% |   72.73%  |
-| test_data_2.txt |   0.3181818181818182  |  31.82% |   68.18%  |
+|           Mean: | 0.23863636363636362   |  23.86% |    76.14% |
+|         Median: | 0.2727272727272727    |  27.27% |    72.73% |
 +-----------------+-----------------------+---------+-----------+
 
 $ wer -e Expected -a Actual -i "^(?:Agent|Customer):"
 
 +-----------------+-----------------------+---------+-----------+
-|     Filename    | Word Error Rate (WER) | % Error | % Success |
+|        Filename | Word Error Rate (WER) | % Error | % Success |
++-----------------+-----------------------+---------+-----------+
+| test_data_1.txt | 0.3157894736842105    |  31.58% |    68.42% |
+| test_data_2.txt | 0.3684210526315789    |  36.84% |    63.16% |
+| test_data_3.txt | 0.14285714285714285   |  14.29% |    85.71% |
 +-----------------+-----------------------+---------+-----------+
-| test_data_1.txt |   0.3157894736842105  |  31.58% |   68.42%  |
-| test_data_2.txt |   0.3684210526315789  |  36.84% |   63.16%  |
+|           Mean: | 0.2756892230576441    |  27.57% |    72.43% |
+|         Median: | 0.3157894736842105    |  31.58% |    68.42% |
 +-----------------+-----------------------+---------+-----------+
 ```
 

diff --git a/test_data/Actual/test_data_3.txt b/test_data/Actual/test_data_3.txt
@@ -0,0 +1 @@
+Agent: I won't go to the stores.
diff --git a/test_data/Expected/test_data_3.txt b/test_data/Expected/test_data_3.txt
@@ -0,0 +1 @@
+Agent: I will not go to the store.
diff --git a/wer/cli.py b/wer/cli.py
@@ -2,6 +2,7 @@
 from jiwer import wer as _real_wer, transforms as tr
 from pathlib import Path
 from prettytable import PrettyTable
+from statistics import mean, median
 import sys
 
 
@@ -85,7 +86,7 @@ def wer(
     actual_path: Path,
     enforce_file_length_check: bool,
     regex_to_ignore: str,
-):
+) -> float:
     return _real_wer(
         reference=lines_from_file(expected_path),
         hypothesis=lines_from_file(actual_path),
@@ -135,27 +136,36 @@ def main():
             )
         }
 
-        table = PrettyTable()
-        table.field_names = [
-            "Filename",
-            "Word Error Rate (WER)",
-            r"% Error",
-            r"% Success",
-        ]
+        columns = {
+            "Filename": "r",
+            "Word Error Rate (WER)": "l",
+            r"% Error": "r",
+            r"% Success": "r",
+        }
+        table = PrettyTable(columns.keys())
+        for filename, alignment in columns.items():
+            table.align[filename] = alignment
 
-        for filename, word_error_rate in word_error_rates.items():
+        for idx, (filename, word_error_rate) in enumerate(word_error_rates.items()):
             table.add_row(
                 [
                     filename,
                     word_error_rate,
                     f"{word_error_rate:.2%}",
                     f"{1-word_error_rate:.2%}",
-                ]
+                ],
+                divider=idx == len(word_error_rates) - 1,
             )
 
-        print()
-        print(table)
-        print()
+        wer_mean = mean(word_error_rates.values())
+        wer_median = median(word_error_rates.values())
+
+        table.add_row(["Mean:", wer_mean, f"{wer_mean:.2%}", f"{1-wer_mean:.2%}"])
+        table.add_row(
+            ["Median:", wer_median, f"{wer_median:.2%}", f"{1-wer_median:.2%}"]
+        )
+
+        print(f"\n{table}\n")
 
     elif not expected_path.exists() or not actual_path.exists():
         raise FileNotFoundError(