From aec324a652405265a3854b2c910a5fc1f0cd5311 Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Fri, 26 Sep 2025 13:23:54 +0000
Subject: [PATCH 1/5] Update multiple translations classes with confidence
 methods

---
 silnlp/common/translator.py       | 283 ++++++++++++++++++++----------
 silnlp/nmt/hugging_face_config.py | 123 ++++++-------
 2 files changed, 244 insertions(+), 162 deletions(-)

diff --git a/silnlp/common/translator.py b/silnlp/common/translator.py
index ebbf9a1b..8c7be5a3 100644
--- a/silnlp/common/translator.py
+++ b/silnlp/common/translator.py
@@ -36,112 +36,209 @@
 
 CONFIDENCE_SCORES_SUFFIX = ".confidences.tsv"
 
+
+class SentenceTranslation:
+    def __init__(
+        self,
+        translation: str,
+        tokens: List[str],
+        token_scores: List[float],
+        sequence_score: Optional[float],
+    ):
+        self._translation = translation
+        self._tokens = tokens
+        self._token_scores = token_scores
+        self._sequence_score = sequence_score
+
+    def get_translation(self) -> str:
+        return self._translation
+
+    def has_sequence_confidence_score(self) -> bool:
+        return self._sequence_score is not None
+
+    def get_sequence_confidence_score(self) -> Optional[float]:
+        return self._sequence_score
+
+    def join_tokens_for_confidence_file(self) -> str:
+        return "\t".join(self._tokens)
+
+    def join_token_scores_for_confidence_file(self) -> str:
+        return "\t".join([self._sequence_score] + self._token_scores)
+
+
 # A group of multiple translations of a single sentence
-TranslationGroup = List[str]
+SentenceTranslationGroup = List[SentenceTranslation]
+
 
 # A list representing a single draft (one translation of each input sentence)
-TranslatedDraft = List[str]
+class TranslatedDraft:
+    def __init__(self, sentence_translations: List[SentenceTranslation]):
+        self._sentence_translations = sentence_translations
+
+    def has_sequence_confidence_scores(self) -> bool:
+        # If any sentence has a sequence score, all sentences should have one
+        return self._sentence_translations[0].has_sequence_confidence_score()
+
+    def write_confidence_scores_to_file(
+        self,
+        confidences_path: Path,
+        row1col1_label: str,
+        vrefs: Optional[List[VerseRef]] = None,
+    ) -> None:
+        with confidences_path.open("w", encoding="utf-8", newline="\n") as confidences_file:
+            confidences_file.write("\t".join([f"{row1col1_label}"] + [f"Token {i}" for i in range(200)]) + "\n")
+            confidences_file.write("\t".join(["Sequence Score"] + [f"Token Score {i}" for i in range(200)]) + "\n")
+            for sentence_num, sentence_translation in enumerate(self._sentence_translations):
+                sequence_label = str(sentence_num)
+                if vrefs is not None:
+                    sequence_label = str(vrefs[sentence_num])
+                confidences_file.write(
+                    sequence_label + "\t" + sentence_translation.join_tokens_for_confidence_file() + "\n"
+                )
+                confidences_file.write(sentence_translation.join_token_scores_for_confidence_file() + "\n")
+
+    def write_chapter_confidence_scores_to_file(self, chapter_confidences_path: Path, vrefs: List[VerseRef]):
+        chapter_confidences: DefaultDict[int, List[float]] = defaultdict(list)
+        for sentence_num, vref in enumerate(vrefs):
+            if not vref.is_verse or self._sentence_translations[sentence_num].get_sequence_confidence_score() is None:
+                continue
+            vref_confidence = exp(self._sentence_translations[sentence_num].get_sequence_confidence_score())
+            chapter_confidences[vref.chapter_num].append(vref_confidence)
+
+        with chapter_confidences_path.open("w", encoding="utf-8", newline="\n") as chapter_confidences_file:
+            chapter_confidences_file.write("Chapter\tConfidence\n")
+            for chapter, confidences in chapter_confidences.items():
+                chapter_confidence = gmean(confidences)
+                chapter_confidences_file.write(f"{chapter}\t{chapter_confidence}\n")
+
+    def get_all_sequence_confidence_scores(self) -> List[float]:
+        return [
+            st.get_sequence_confidence_score()
+            for st in self._sentence_translations
+            if st.get_sequence_confidence_score() is not None
+        ]
 
+    def get_rows_for_postprocess(self) -> List[str]:
+        return [st.get_translation() for st in self._sentence_translations]
 
-# A wrapper around List[TranslationGroup] that allows upstream consumers to view a
+
+# A wrapper around List[SentenceTranslationGroup] that allows upstream consumers to view a
 # list of translation groups as a collection of discrete drafts
 class DraftGroup:
-    def __init__(self, translation_groups: List[TranslationGroup]):
+    def __init__(self, translation_groups: List[SentenceTranslationGroup]):
         self.translation_groups = translation_groups
         self.num_drafts: int = len(self.translation_groups[0])
 
     def get_drafts(self) -> List[TranslatedDraft]:
-        translated_draft_sentences = [[] for _ in range(self.num_drafts)]
+        translated_draft_sentences: List[List[SentenceTranslation]] = [[] for _ in range(self.num_drafts)]
 
         for translation_group in self.translation_groups:
-            if len(translation_group) == 0:
-                translation_group = self._createEmptyTranslationGroup()
-
             for draft_index in range(self.num_drafts):
                 translated_draft_sentences[draft_index].append(translation_group[draft_index])
 
-        return translated_draft_sentences
-
-    def _createEmptyTranslationGroup(self):
-        return ["" for _ in range(self.num_drafts)]
+        return [TranslatedDraft(sentences) for sentences in translated_draft_sentences]
 
 
 def generate_confidence_files(
-    output: List[TranslationGroup],
+    translated_draft: TranslatedDraft,
     trg_file_path: Path,
-    translate_step: bool = False,
     trg_prefix: str = "",
     produce_multiple_translations: bool = False,
-    draft_index: int = 0,
     vrefs: Optional[List[VerseRef]] = None,
+    draft_index: int = 0,
 ) -> None:
+    if not translated_draft.has_sequence_confidence_scores():
+        LOGGER.warning(
+            f"{trg_file_path} was not translated with beam search, so confidence scores will not be calculated for this file."
+        )
+        return
+
     if produce_multiple_translations:
         confidences_path = trg_file_path.with_suffix(f".{draft_index}{trg_file_path.suffix}{CONFIDENCE_SCORES_SUFFIX}")
     else:
         confidences_path = trg_file_path.with_suffix(f"{trg_file_path.suffix}{CONFIDENCE_SCORES_SUFFIX}")
-    sequence_confidences: List[float] = []
+
     ext = trg_file_path.suffix.lower()
-    with confidences_path.open("w", encoding="utf-8", newline="\n") as confidences_file:
-        if translate_step and ext in {".usfm", ".sfm"}:
-            row1_col1_header = "VRef"
-        else:
-            row1_col1_header = "Sequence Number"
-        confidences_file.write("\t".join([f"{row1_col1_header}"] + [f"Token {i}" for i in range(200)]) + "\n")
-        confidences_file.write("\t".join(["Sequence Score"] + [f"Token Score {i}" for i in range(200)]) + "\n")
-        for sentence_num, _ in enumerate(output):
-            if output[sentence_num][0] is None:
-                continue
-            sequence_label = [str(sentence_num)]
-            if translate_step:
-                if ext in {".usfm", ".sfm"}:
-                    sequence_label = [str(vrefs[sentence_num])]
-                elif ext == ".txt":
-                    sequence_confidences.append(exp(output[sentence_num][3][draft_index - 1]))
-            confidences_file.write("\t".join(sequence_label + output[sentence_num][1][draft_index - 1]) + "\n")
-            confidences_file.write(
-                "\t".join(
-                    [str(exp(output[sentence_num][3][draft_index - 1]))]
-                    + [str(exp(token_score)) for token_score in output[sentence_num][2][draft_index - 1]]
-                )
-                + "\n"
-            )
-    if translate_step:
-        if ext in {".usfm", ".sfm"}:
-            chapter_confidences: DefaultDict[int, List[float]] = defaultdict(list)
-            for sentence_num, vref in enumerate(vrefs):
-                if not vref.is_verse or output[sentence_num][0] is None:
-                    continue
-                vref_confidence = exp(output[sentence_num][3][draft_index - 1])
-                chapter_confidences[vref.chapter_num].append(vref_confidence)
-
-            with confidences_path.with_suffix(".chapters.tsv").open(
-                "w", encoding="utf-8", newline="\n"
-            ) as chapter_confidences_file:
-                chapter_confidences_file.write("Chapter\tConfidence\n")
-                for chapter, confidences in chapter_confidences.items():
-                    sequence_confidences += confidences
-                    chapter_confidence = gmean(confidences)
-                    chapter_confidences_file.write(f"{chapter}\t{chapter_confidence}\n")
-
-            file_confidences_path = trg_file_path.parent / "confidences.books.tsv"
-            row1_col1_header = "Book"
-            if vrefs:
-                col1_entry = vrefs[0].book
-            else:
-                col1_entry = trg_file_path.stem
-        elif ext == ".txt":
-            file_confidences_path = trg_file_path.parent / f"{trg_prefix}confidences.files.tsv"
-            row1_col1_header = "File"
-            col1_entry = trg_file_path.name
-        else:
-            raise ValueError(
-                f"Invalid trg file extension {ext} when using --save-confidences in the translate step."
-                f"Valid file extensions for --save-confidences are .usfm, .sfm, and .txt."
-            )
-        with file_confidences_path.open("a", encoding="utf-8", newline="\n") as file_confidences_file:
-            if file_confidences_file.tell() == 0:
-                file_confidences_file.write(f"{row1_col1_header}\tConfidence\n")
-            file_confidences_file.write(f"{col1_entry}\t{gmean(sequence_confidences)}\n")
+    if ext in {".usfm", ".sfm"}:
+        assert vrefs is not None
+        generate_usfm_confidence_files(translated_draft, trg_file_path, confidences_path, vrefs, draft_index)
+    elif ext == ".txt":
+        generate_txt_confidence_files(translated_draft, trg_file_path, confidences_path, trg_prefix)
+    else:
+        raise ValueError(
+            f"Invalid trg file extension {ext} when using --save-confidences in the translate step."
+            f"Valid file extensions for --save-confidences are .usfm, .sfm, and .txt."
+        )
+
+
+def generate_usfm_confidence_files(
+    translated_draft: TranslatedDraft,
+    trg_file_path: Path,
+    confidences_path: Path,
+    vrefs: List[VerseRef],
+    draft_index: int = 0,
+) -> None:
+
+    translated_draft.write_confidence_scores_to_file(confidences_path, "VRef", vrefs)
+    translated_draft.write_chapter_confidence_scores_to_file(confidences_path.with_suffix(".chapters.tsv"), vrefs)
+    _append_book_confidence_score(translated_draft, trg_file_path, vrefs)
+
+
+def _append_book_confidence_score(
+    translated_draft: TranslatedDraft,
+    trg_file_path: Path,
+    vrefs: List[VerseRef],
+) -> None:
+    file_confidences_path = trg_file_path.parent / "confidences.books.tsv"
+    row1_col1_header = "Book"
+    if vrefs:
+        col1_entry = vrefs[0].book
+    else:
+        col1_entry = trg_file_path.stem
+
+    with file_confidences_path.open("a", encoding="utf-8", newline="\n") as file_confidences_file:
+        if file_confidences_file.tell() == 0:
+            file_confidences_file.write(f"{row1_col1_header}\tConfidence\n")
+        file_confidences_file.write(f"{col1_entry}\t{gmean(translated_draft.get_all_sequence_confidence_scores())}\n")
+
+
+def generate_txt_confidence_files(
+    translated_draft: TranslatedDraft,
+    trg_file_path: Path,
+    confidences_path: Path,
+    trg_prefix: str = "",
+) -> None:
+    translated_draft.write_confidence_scores_to_file(confidences_path, "Sequence Number")
+
+    _append_file_confidence_score(translated_draft, trg_file_path, trg_prefix)
+
+
+def _append_file_confidence_score(
+    translated_draft: TranslatedDraft,
+    trg_file_path: Path,
+    trg_prefix: str = "",
+) -> None:
+    file_confidences_path = trg_file_path.parent / f"{trg_prefix}confidences.files.tsv"
+
+    with file_confidences_path.open("a", encoding="utf-8", newline="\n") as file_confidences_file:
+        if file_confidences_file.tell() == 0:
+            file_confidences_file.write("File\tConfidence\n")
+        file_confidences_file.write(
+            f"{trg_file_path.name}\t{gmean(translated_draft.get_all_sequence_confidence_scores())}\n"
+        )
+
+
+def generate_test_confidence_files(
+    translated_draft: TranslatedDraft,
+    trg_file_path: Path,
+    produce_multiple_translations: bool = False,
+    draft_index: int = 0,
+) -> None:
+    if produce_multiple_translations:
+        confidences_path = trg_file_path.with_suffix(f".{draft_index}{trg_file_path.suffix}{CONFIDENCE_SCORES_SUFFIX}")
+    else:
+        confidences_path = trg_file_path.with_suffix(f"{trg_file_path.suffix}{CONFIDENCE_SCORES_SUFFIX}")
+    translated_draft.write_confidence_scores_to_file(confidences_path, "Sequence Number")
 
 
 class Translator(ABC):
@@ -153,7 +250,7 @@ def translate(
         trg_iso: str,
         produce_multiple_translations: bool = False,
         vrefs: Optional[Iterable[VerseRef]] = None,
-    ) -> Iterable[TranslationGroup]:
+    ) -> Iterable[SentenceTranslationGroup]:
         pass
 
     def translate_text(
@@ -166,21 +263,21 @@ def translate_text(
         save_confidences: bool = False,
         trg_prefix: str = "",
     ) -> None:
-        output = list(self.translate(load_corpus(src_file_path), src_iso, trg_iso, produce_multiple_translations))
-        translations = [translation for translation, _, _, _ in output]
-        draft_set = DraftGroup(translations)
+        sentence_translation_groups: List[SentenceTranslationGroup] = list(
+            self.translate(load_corpus(src_file_path), src_iso, trg_iso, produce_multiple_translations)
+        )
+        draft_set = DraftGroup(sentence_translation_groups)
         for draft_index, translated_draft in enumerate(draft_set.get_drafts(), 1):
             if produce_multiple_translations:
                 trg_draft_file_path = trg_file_path.with_suffix(f".{draft_index}{trg_file_path.suffix}")
             else:
                 trg_draft_file_path = trg_file_path
-            write_corpus(trg_draft_file_path, translated_draft)
+            write_corpus(trg_draft_file_path, translated_draft.get_all_translations())
 
             if save_confidences:
                 generate_confidence_files(
-                    output,
+                    translated_draft,
                     trg_file_path,
-                    translate_step=True,
                     trg_prefix=trg_prefix,
                     produce_multiple_translations=produce_multiple_translations,
                     draft_index=draft_index,
@@ -280,25 +377,24 @@ def translate_usfm(
                 sentences.pop(i)
                 empty_sents.append((i, vrefs.pop(i)))
 
-        output = list(self.translate(sentences, src_iso, trg_iso, produce_multiple_translations, vrefs))
-
-        translations = [translation for translation, _, _, _ in output]
+        sentence_translation_groups: List[SentenceTranslationGroup] = list(
+            self.translate(sentences, src_iso, trg_iso, produce_multiple_translations, vrefs)
+        )
 
         # Add empty sentences back in
         # Prevents pre-existing text from showing up in the sections of translated text
         for idx, vref in reversed(empty_sents):
             sentences.insert(idx, "")
-            translations.insert(idx, ["" for _ in range(len(translations[0]))])
             vrefs.insert(idx, vref)
-            output.insert(idx, [None, None, None, None])
+            sentence_translation_groups.insert(idx, SentenceTranslation("", [], [], None))
 
         text_behavior = (
             UpdateUsfmTextBehavior.PREFER_NEW if trg_project is not None else UpdateUsfmTextBehavior.STRIP_EXISTING
         )
 
-        draft_set: DraftGroup = DraftGroup(translations)
+        draft_set: DraftGroup = DraftGroup(sentence_translation_groups)
         for draft_index, translated_draft in enumerate(draft_set.get_drafts(), 1):
-            postprocess_handler.construct_rows(vrefs, sentences, translated_draft)
+            postprocess_handler.construct_rows(vrefs, sentences, translated_draft.get_rows_for_postprocess())
 
             for config in postprocess_handler.configs:
 
@@ -382,12 +478,11 @@ def translate_usfm(
 
             if save_confidences:
                 generate_confidence_files(
-                    output,
+                    translated_draft,
                     trg_file_path,
-                    translate_step=True,
                     produce_multiple_translations=produce_multiple_translations,
-                    draft_index=draft_index,
                     vrefs=vrefs,
+                    draft_index=draft_index,
                 )
 
     def translate_docx(
diff --git a/silnlp/nmt/hugging_face_config.py b/silnlp/nmt/hugging_face_config.py
index 7c4679bb..88618137 100644
--- a/silnlp/nmt/hugging_face_config.py
+++ b/silnlp/nmt/hugging_face_config.py
@@ -6,9 +6,9 @@
 import shutil
 from contextlib import ExitStack
 from copy import deepcopy
+from dataclasses import dataclass
 from enum import Enum
 from itertools import repeat
-from math import exp, prod
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, TypeVar, Union, cast
 
@@ -19,7 +19,6 @@
 import torch
 import transformers.utils.logging as transformers_logging
 import yaml
-from accelerate import infer_auto_device_map, init_empty_weights
 from accelerate.utils.memory import should_reduce_batch_size
 from datasets import Dataset
 from machine.scripture import ORIGINAL_VERSIFICATION, VerseRef
@@ -74,7 +73,12 @@
 
 from ..common.corpus import Term, count_lines, get_terms
 from ..common.environment import SIL_NLP_ENV
-from ..common.translator import DraftGroup, TranslationGroup, generate_confidence_files
+from ..common.translator import (
+    DraftGroup,
+    SentenceTranslation,
+    SentenceTranslationGroup,
+    generate_test_confidence_files,
+)
 from ..common.utils import NoiseMethod, ReplaceRandomToken, Side, create_noise_methods, get_mt_exp_dir, merge_dict
 from .config import CheckpointType, Config, NMTModel
 from .corpora import DataFile
@@ -816,21 +820,38 @@ def batch_sentences(
         yield batch, None
 
 
-class OutputGroup:
-    def __init__(self, outputs: List[dict]):
-        self.outputs = outputs
+@dataclass
+class ModelOutput:
+    translated_text: str
+    translation_token_ids: List[int]
+    token_scores: List[float]
+    sequence_score: Optional[float]
 
-    def get_translated_text(self) -> List[str]:
-        return [output["translation_text"] for output in self.outputs]
+    def convert_to_sentence_translation(self, tokenizer: PreTrainedTokenizer) -> SentenceTranslation:
+        tokens = tokenizer.convert_ids_to_tokens(self.translation_token_ids)
+        return SentenceTranslation(self.translated_text, tokens, self.token_scores, self.sequence_score)
 
-    def get_token_ids(self) -> List[List[int]]:
-        return [output["translation_token_ids"] for output in self.outputs]
 
-    def get_token_scores(self) -> List[float]:
-        return [output["token_scores"] for output in self.outputs]
+# This class represents multiple translations of a single input sequence
+class ModelOutputGroup:
+    def __init__(self, outputs: List[dict]):
+        self._outputs = outputs
+
+    def _get_model_outputs(self) -> List[ModelOutput]:
+        return [
+            ModelOutput(
+                output["translation_text"],
+                output["translation_token_ids"],
+                output["token_scores"],
+                output["sequence_score"],
+            )
+            for output in self._outputs
+        ]
 
-    def get_sequence_score(self) -> List[float]:
-        return [output["sequence_score"] for output in self.outputs]
+    def convert_to_sentence_translation_group(self, tokenizer: PreTrainedTokenizer) -> SentenceTranslationGroup:
+        return SentenceTranslationGroup(
+            [model_output.convert_to_sentence_translation(tokenizer) for model_output in self._get_model_outputs()]
+        )
 
 
 class HuggingFaceNMTModel(NMTModel):
@@ -1152,8 +1173,8 @@ def translate_test_files(
                     out_file.write("\n".join(translated_draft) + "\n")
 
                     if save_confidences:
-                        generate_confidence_files(
-                            output,
+                        generate_test_confidence_files(
+                            translated_draft,
                             translation_path,
                             produce_multiple_translations=produce_multiple_translations,
                             draft_index=draft_index,
@@ -1167,7 +1188,7 @@ def _translate_test_sentences(
         vrefs: Iterable[VerseRef],
         length: int,
         produce_multiple_translations: bool = False,
-    ) -> Iterable[TranslationGroup]:
+    ) -> Iterable[SentenceTranslationGroup]:
         num_drafts = self.get_num_drafts()
         if produce_multiple_translations and num_drafts > 1:
             LOGGER.info("Producing %i translated drafts", num_drafts)
@@ -1177,31 +1198,14 @@ def _translate_test_sentences(
                 "Falling back to a single translation."
             )
 
-        for output_group in tqdm(
+        for model_output_group in tqdm(
             self._translate_sentences(
                 tokenizer, pipeline, sentences, vrefs, produce_multiple_translations, return_tensors=True
             ),
             total=length,
             unit="ex",
         ):
-            all_ids = to_py_obj(output_group.get_token_ids())
-            all_scores = to_py_obj(output_group.get_token_scores())
-            sequence_score = to_py_obj(output_group.get_sequence_score())
-            ids = []
-            token_scores = []
-            for output_id, output_score in zip(all_ids, all_scores):
-                output_ids = []
-                output_scores = []
-                for id, score in zip(output_id[1:], output_score[1:]):
-                    if id == tokenizer.pad_token_id:
-                        continue
-                    output_ids.append(id)
-                    output_scores.append(score)
-                ids.append(output_ids)
-                token_scores.append(output_scores)
-            # ids = [[id for id in output[1:] if id != tokenizer.pad_token_id] for output in ids]
-            tokens = [tokenizer.convert_ids_to_tokens(id_group) for id_group in ids]
-            yield [" ".join(token_group) for token_group in tokens], tokens, token_scores, sequence_score
+            yield model_output_group.convert_to_sentence_translation_group(tokenizer)
 
     def get_num_drafts(self) -> int:
         num_drafts = self._config.infer.get("num_drafts", 1)
@@ -1215,7 +1219,7 @@ def translate(
         produce_multiple_translations: bool = False,
         vrefs: Optional[Iterable[VerseRef]] = None,
         ckpt: Union[CheckpointType, str, int] = CheckpointType.LAST,
-    ) -> Iterable[TranslationGroup]:
+    ) -> Iterable[SentenceTranslationGroup]:
         src_lang = self._config.data["lang_codes"].get(src_iso, src_iso)
         trg_lang = self._config.data["lang_codes"].get(trg_iso, trg_iso)
         tokenizer = self._config.get_tokenizer()
@@ -1248,32 +1252,12 @@ def translate(
         pipeline.model = torch.compile(pipeline.model)
         if not isinstance(sentences, list):
             sentences = list(sentences)
-        for outputs in tqdm(
+        for model_output_group in tqdm(
             self._translate_sentences(tokenizer, pipeline, sentences, vrefs, produce_multiple_translations),
             total=len(sentences),
             unit="ex",
         ):
-            if isinstance(outputs, OutputGroup):
-                outputs = [outputs]
-            for output_group in outputs:
-                translated_text = to_py_obj(output_group.get_translated_text())
-                all_ids = to_py_obj(output_group.get_token_ids())
-                all_scores = to_py_obj(output_group.get_token_scores())
-                sequence_score = to_py_obj(output_group.get_sequence_score())
-                ids = []
-                token_scores = []
-                for output_id, output_score in zip(all_ids, all_scores):
-                    output_ids = []
-                    output_scores = []
-                    for id, score in zip(output_id[1:], output_score[1:]):
-                        if id == tokenizer.pad_token_id:
-                            continue
-                        output_ids.append(id)
-                        output_scores.append(score)
-                    ids.append(output_ids)
-                    token_scores.append(output_scores)
-                tokens = [tokenizer.convert_ids_to_tokens(id_group) for id_group in ids]
-                yield translated_text, tokens, token_scores, sequence_score
+            yield model_output_group.convert_to_sentence_translation_group(model_output_group, tokenizer)
 
     def get_checkpoint_path(self, ckpt: Union[CheckpointType, str, int]) -> Tuple[Path, int]:
         step: Optional[int] = None
@@ -1474,7 +1458,7 @@ def _translate_sentences(
         vrefs: Optional[Iterable[VerseRef]],
         produce_multiple_translations: bool = False,
         return_tensors: bool = False,
-    ) -> Iterable[OutputGroup]:
+    ) -> Iterable[ModelOutputGroup]:
         batch_size: int = self._config.infer["infer_batch_size"]
 
         dictionary = self._get_dictionary()
@@ -1511,7 +1495,7 @@ def _translate_sentence_helper(
         return_tensors: bool,
         force_words_ids: List[List[List[int]]] = None,
         produce_multiple_translations: bool = False,
-    ) -> Iterable[OutputGroup]:
+    ) -> Iterable[ModelOutputGroup]:
 
         num_drafts = self.get_num_drafts()
         if produce_multiple_translations and num_drafts > 1:
@@ -1540,12 +1524,13 @@ def _translate_sentence_helper(
 
                 # concatenate the beam search results with the sampling results
                 yield from [
-                    OutputGroup(beam_search_results[i] + sampling_results[i]) for i in range(len(beam_search_results))
+                    ModelOutputGroup(beam_search_results[i] + sampling_results[i])
+                    for i in range(len(beam_search_results))
                 ]
 
             elif multiple_translations_method == "sampling":
                 yield from [
-                    OutputGroup(result)
+                    ModelOutputGroup(result)
                     for result in self._translate_with_sampling(
                         pipeline,
                         sentences,
@@ -1558,7 +1543,7 @@ def _translate_sentence_helper(
 
             elif multiple_translations_method == "beam_search":
                 yield from [
-                    OutputGroup(result)
+                    ModelOutputGroup(result)
                     for result in self._translate_with_beam_search(
                         pipeline,
                         sentences,
@@ -1571,7 +1556,7 @@ def _translate_sentence_helper(
 
             elif multiple_translations_method == "diverse_beam_search":
                 yield from [
-                    OutputGroup(result)
+                    ModelOutputGroup(result)
                     for result in self._translate_with_diverse_beam_search(
                         pipeline,
                         sentences,
@@ -1586,7 +1571,7 @@ def _translate_sentence_helper(
 
         else:
             yield from [
-                OutputGroup([translated_sentence[0]])
+                ModelOutputGroup([translated_sentence[0]])
                 for translated_sentence in self._translate_with_beam_search(
                     pipeline,
                     sentences,
@@ -1925,7 +1910,7 @@ def _forward(self, model_inputs, **generate_kwargs):
             beam_indices = torch.zeros_like(output_ids)
             assert output.scores is not None
             scores = tuple(torch.nn.functional.log_softmax(logits, dim=-1) for logits in output.scores)
-            sequences_scores = output.sequences_scores
+            sequences_scores = None
         else:
             raise RuntimeError("Cannot postprocess the output of the model.")
 
@@ -1980,7 +1965,9 @@ def postprocess(self, model_outputs, return_type=None, clean_up_tokenization_spa
                     "translation_tokens": output_tokens,
                     "translation_token_ids": output_token_ids,
                     "token_scores": scores,
-                    "sequence_score": model_outputs["sequences_scores"][0],
+                    "sequence_score": (
+                        model_outputs["sequences_scores"][0] if model_outputs["sequences_scores"] is not None else None
+                    ),
                     "translation_text": self.tokenizer.decode(
                         output_ids,
                         skip_special_tokens=True,

From 7e06364f69c988e7f504eb06cc22f858e5b4e1f5 Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Fri, 26 Sep 2025 19:15:08 +0000
Subject: [PATCH 2/5] Minor bug fixes for implementation

---
 silnlp/common/translator.py       | 11 ++++++-----
 silnlp/nmt/config.py              |  4 ++--
 silnlp/nmt/hugging_face_config.py | 12 +++++++++---
 silnlp/nmt/translate.py           |  4 ++--
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/silnlp/common/translator.py b/silnlp/common/translator.py
index 8c7be5a3..ecf65b2c 100644
--- a/silnlp/common/translator.py
+++ b/silnlp/common/translator.py
@@ -63,7 +63,7 @@ def join_tokens_for_confidence_file(self) -> str:
         return "\t".join(self._tokens)
 
     def join_token_scores_for_confidence_file(self) -> str:
-        return "\t".join([self._sequence_score] + self._token_scores)
+        return "\t".join([str(exp(ts)) for ts in [self._sequence_score] + self._token_scores])
 
 
 # A group of multiple translations of a single sentence
@@ -76,8 +76,7 @@ def __init__(self, sentence_translations: List[SentenceTranslation]):
         self._sentence_translations = sentence_translations
 
     def has_sequence_confidence_scores(self) -> bool:
-        # If any sentence has a sequence score, all sentences should have one
-        return self._sentence_translations[0].has_sequence_confidence_score()
+        return any([st.has_sequence_confidence_score() for st in self._sentence_translations])
 
     def write_confidence_scores_to_file(
         self,
@@ -89,6 +88,8 @@ def write_confidence_scores_to_file(
             confidences_file.write("\t".join([f"{row1col1_label}"] + [f"Token {i}" for i in range(200)]) + "\n")
             confidences_file.write("\t".join(["Sequence Score"] + [f"Token Score {i}" for i in range(200)]) + "\n")
             for sentence_num, sentence_translation in enumerate(self._sentence_translations):
+                if not sentence_translation.has_sequence_confidence_score():
+                    continue
                 sequence_label = str(sentence_num)
                 if vrefs is not None:
                     sequence_label = str(vrefs[sentence_num])
@@ -113,7 +114,7 @@ def write_chapter_confidence_scores_to_file(self, chapter_confidences_path: Path
 
     def get_all_sequence_confidence_scores(self) -> List[float]:
         return [
-            st.get_sequence_confidence_score()
+            exp(st.get_sequence_confidence_score())
             for st in self._sentence_translations
             if st.get_sequence_confidence_score() is not None
         ]
@@ -386,7 +387,7 @@ def translate_usfm(
         for idx, vref in reversed(empty_sents):
             sentences.insert(idx, "")
             vrefs.insert(idx, vref)
-            sentence_translation_groups.insert(idx, SentenceTranslation("", [], [], None))
+            sentence_translation_groups.insert(idx, [SentenceTranslation("", [], [], None)])
 
         text_behavior = (
             UpdateUsfmTextBehavior.PREFER_NEW if trg_project is not None else UpdateUsfmTextBehavior.STRIP_EXISTING
diff --git a/silnlp/nmt/config.py b/silnlp/nmt/config.py
index 5f81694f..359e503b 100644
--- a/silnlp/nmt/config.py
+++ b/silnlp/nmt/config.py
@@ -32,7 +32,7 @@
     write_corpus,
 )
 from ..common.environment import SIL_NLP_ENV
-from ..common.translator import TranslationGroup
+from ..common.translator import SentenceTranslationGroup
 from ..common.utils import NoiseMethod, Side, get_mt_exp_dir, set_seed
 from .augment import AugmentMethod
 from .corpora import (
@@ -86,7 +86,7 @@ def translate(
         trg_iso: str,
         vrefs: Optional[Iterable[VerseRef]] = None,
         ckpt: Union[CheckpointType, str, int] = CheckpointType.LAST,
-    ) -> Iterable[TranslationGroup]: ...
+    ) -> Iterable[SentenceTranslationGroup]: ...
 
     @abstractmethod
     def get_checkpoint_path(self, ckpt: Union[CheckpointType, str, int]) -> Tuple[Path, int]: ...
diff --git a/silnlp/nmt/hugging_face_config.py b/silnlp/nmt/hugging_face_config.py
index 88618137..c4b48d77 100644
--- a/silnlp/nmt/hugging_face_config.py
+++ b/silnlp/nmt/hugging_face_config.py
@@ -9,6 +9,7 @@
 from dataclasses import dataclass
 from enum import Enum
 from itertools import repeat
+from math import prod
 from pathlib import Path
 from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, TypeVar, Union, cast
 
@@ -829,7 +830,12 @@ class ModelOutput:
 
     def convert_to_sentence_translation(self, tokenizer: PreTrainedTokenizer) -> SentenceTranslation:
         tokens = tokenizer.convert_ids_to_tokens(self.translation_token_ids)
-        return SentenceTranslation(self.translated_text, tokens, self.token_scores, self.sequence_score)
+        return SentenceTranslation(
+            to_py_obj(self.translated_text),
+            to_py_obj(tokens),
+            to_py_obj(self.token_scores),
+            to_py_obj(self.sequence_score),
+        )
 
 
 # This class represents multiple translations of a single input sequence
@@ -849,7 +855,7 @@ def _get_model_outputs(self) -> List[ModelOutput]:
         ]
 
     def convert_to_sentence_translation_group(self, tokenizer: PreTrainedTokenizer) -> SentenceTranslationGroup:
-        return SentenceTranslationGroup(
+        return list(
             [model_output.convert_to_sentence_translation(tokenizer) for model_output in self._get_model_outputs()]
         )
 
@@ -1257,7 +1263,7 @@ def translate(
             total=len(sentences),
             unit="ex",
         ):
-            yield model_output_group.convert_to_sentence_translation_group(model_output_group, tokenizer)
+            yield model_output_group.convert_to_sentence_translation_group(tokenizer)
 
     def get_checkpoint_path(self, ckpt: Union[CheckpointType, str, int]) -> Tuple[Path, int]:
         step: Optional[int] = None
diff --git a/silnlp/nmt/translate.py b/silnlp/nmt/translate.py
index 52771a26..581d4b1b 100644
--- a/silnlp/nmt/translate.py
+++ b/silnlp/nmt/translate.py
@@ -11,7 +11,7 @@
 from ..common.environment import SIL_NLP_ENV
 from ..common.paratext import book_file_name_digits, get_project_dir
 from ..common.postprocesser import PostprocessConfig, PostprocessHandler
-from ..common.translator import TranslationGroup, Translator
+from ..common.translator import SentenceTranslationGroup, Translator
 from ..common.utils import get_git_revision_hash, show_attrs
 from .clearml_connection import SILClearML
 from .config import CheckpointType, Config, NMTModel
@@ -31,7 +31,7 @@ def translate(
         trg_iso: str,
         produce_multiple_translations: bool = False,
         vrefs: Optional[Iterable[VerseRef]] = None,
-    ) -> Iterable[TranslationGroup]:
+    ) -> Iterable[SentenceTranslationGroup]:
         return self._model.translate(
             sentences, src_iso, trg_iso, produce_multiple_translations, vrefs, self._checkpoint
         )

From 4b483c0af5ff752c4e982aad28ece7e2990300db Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Mon, 29 Sep 2025 15:00:43 +0000
Subject: [PATCH 3/5] Fix for empty sentences

---
 silnlp/common/translator.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/silnlp/common/translator.py b/silnlp/common/translator.py
index ecf65b2c..3c589f84 100644
--- a/silnlp/common/translator.py
+++ b/silnlp/common/translator.py
@@ -381,13 +381,14 @@ def translate_usfm(
         sentence_translation_groups: List[SentenceTranslationGroup] = list(
             self.translate(sentences, src_iso, trg_iso, produce_multiple_translations, vrefs)
         )
+        num_drafts = len(sentence_translation_groups[0])
 
         # Add empty sentences back in
         # Prevents pre-existing text from showing up in the sections of translated text
         for idx, vref in reversed(empty_sents):
             sentences.insert(idx, "")
             vrefs.insert(idx, vref)
-            sentence_translation_groups.insert(idx, [SentenceTranslation("", [], [], None)])
+            sentence_translation_groups.insert(idx, [SentenceTranslation("", [], [], None)] * num_drafts)
 
         text_behavior = (
             UpdateUsfmTextBehavior.PREFER_NEW if trg_project is not None else UpdateUsfmTextBehavior.STRIP_EXISTING

From 634c600e71c499ff5cae963d0a44aaec7be7a7f0 Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Mon, 29 Sep 2025 15:24:04 +0000
Subject: [PATCH 4/5] Improved comments on new classes

---
 silnlp/common/translator.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/silnlp/common/translator.py b/silnlp/common/translator.py
index 3c589f84..a235cca5 100644
--- a/silnlp/common/translator.py
+++ b/silnlp/common/translator.py
@@ -37,6 +37,7 @@
 CONFIDENCE_SCORES_SUFFIX = ".confidences.tsv"
 
 
+# A single translation of a single sentence
 class SentenceTranslation:
     def __init__(
         self,
@@ -70,7 +71,7 @@ def join_token_scores_for_confidence_file(self) -> str:
 SentenceTranslationGroup = List[SentenceTranslation]
 
 
-# A list representing a single draft (one translation of each input sentence)
+# A class representing a single draft (one translation of each input sentence)
 class TranslatedDraft:
     def __init__(self, sentence_translations: List[SentenceTranslation]):
         self._sentence_translations = sentence_translations

From 4fbf1c76e8c9a7f6717c04438b307ce96ded677f Mon Sep 17 00:00:00 2001
From: Ben King <benjaminking@sil.org>
Date: Fri, 3 Oct 2025 20:27:16 +0000
Subject: [PATCH 5/5] Fix method naming bug

---
 silnlp/common/translator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/silnlp/common/translator.py b/silnlp/common/translator.py
index a235cca5..7431b082 100644
--- a/silnlp/common/translator.py
+++ b/silnlp/common/translator.py
@@ -120,7 +120,7 @@ def get_all_sequence_confidence_scores(self) -> List[float]:
             if st.get_sequence_confidence_score() is not None
         ]
 
-    def get_rows_for_postprocess(self) -> List[str]:
+    def get_all_translations(self) -> List[str]:
         return [st.get_translation() for st in self._sentence_translations]
 
 
@@ -397,7 +397,7 @@ def translate_usfm(
 
         draft_set: DraftGroup = DraftGroup(sentence_translation_groups)
         for draft_index, translated_draft in enumerate(draft_set.get_drafts(), 1):
-            postprocess_handler.construct_rows(vrefs, sentences, translated_draft.get_rows_for_postprocess())
+            postprocess_handler.construct_rows(vrefs, sentences, translated_draft.get_all_translations())
 
             for config in postprocess_handler.configs: