Fix multiline value assignment hooking for VZ

gsarti · gsarti · commit e57eca3ae24f · 2024-02-27T17:13:06.000+01:00
diff --git a/inseq/attr/feat/ops/value_zeroing.py b/inseq/attr/feat/ops/value_zeroing.py
@@ -21,7 +21,13 @@
 from torch import nn
 from torch.utils.hooks import RemovableHandle
 
-from ....utils import StackFrame, find_block_stack, get_post_variable_assignment_hook, validate_indices
+from ....utils import (
+    StackFrame,
+    find_block_stack,
+    get_post_variable_assignment_hook,
+    recursive_get_submodule,
+    validate_indices,
+)
 from ....utils.typing import (
     EmbeddingsTensor,
     InseqAttribution,
@@ -99,6 +105,11 @@ def value_zeroing_forward_mid_hook(
             zeroed_units_indices: Optional[OneOrMoreIndices] = None,
             batch_size: int = 1,
         ) -> None:
+            if varname not in frame.f_locals:
+                raise ValueError(
+                    f"Variable {varname} not found in the local frame."
+                    f"Other variable names: {', '.join(frame.f_locals.keys())}"
+                )
             # Zeroing value vectors corresponding to the given token index
             if zeroed_token_index is not None:
                 values_size = frame.f_locals[varname].size()
@@ -234,7 +245,9 @@ def compute_modules_post_zeroing_similarity(
             value_zeroing_hook_handles: list[RemovableHandle] = []
             # Value zeroing hooks are registered for every token separately since they are token-dependent
             for block_idx, block in enumerate(modules):
-                attention_module = block.get_submodule(attention_module_name)
+                attention_module = recursive_get_submodule(block, attention_module_name)
+                if attention_module is None:
+                    raise ValueError(f"Attention module {attention_module_name} not found in block {block_idx}.")
                 if isinstance(zeroed_units_indices, dict):
                     if block_idx not in zeroed_units_indices:
                         continue
diff --git a/inseq/commands/commands_utils.py b/inseq/commands/commands_utils.py
@@ -18,5 +18,4 @@ def command_args_docstring(cls):
             field_help = field.metadata.get("help", "")
             docstring += textwrap.dedent(f"\n**{field.name}** (``{field_type}``): {field_help}\n")
     cls.__doc__ = docstring
-    print(docstring)
     return cls
diff --git a/inseq/utils/__init__.py b/inseq/utils/__init__.py
@@ -56,6 +56,7 @@
     get_sequences_from_batched_steps,
     normalize,
     pad_with_nan,
+    recursive_get_submodule,
     remap_from_filtered,
     top_p_logits_mask,
     validate_indices,
@@ -126,4 +127,5 @@
     "StackFrame",
     "validate_indices",
     "pad_with_nan",
+    "recursive_get_submodule",
 ]
diff --git a/inseq/utils/hooks.py b/inseq/utils/hooks.py
@@ -5,6 +5,8 @@
 
 from torch import nn
 
+from .misc import get_left_padding
+
 StackFrame = TypeVar("StackFrame")
 
 
@@ -31,7 +33,21 @@ def get_last_variable_assignment_position(
     # Matches any assignment of variable varname
     pattern = rf"^\s*(?:\w+\s*,\s*)*\b{varname}\b\s*(?:,.+\s*)*=\s*[^\W=]+.*$"
     code, startline = getsourcelines(getattr(module, fname))
-    line_numbers = [i for i, line in enumerate(code) if re.match(pattern, line)]
+    line_numbers = []
+    i = 0
+    while i < len(code):
+        line = code[i]
+        # Handles multi-line assignments
+        if re.match(pattern, line):
+            parentheses_count = line.count("(") - line.count(")")
+            ends_with_newline = lambda l: l.strip().endswith("\\")
+            follow_indent = lambda l, i: len(code) > i + 1 and get_left_padding(code[i + 1]) > get_left_padding(l)
+            while (ends_with_newline(line) or follow_indent(line, i) or parentheses_count > 0) and len(code) > i + 1:
+                i += 1
+                line = code[i]
+                parentheses_count += line.count("(") - line.count(")")
+            line_numbers.append(i)
+        i += 1
     if len(line_numbers) == 0:
         return None
     return line_numbers[-1] + startline + 1
diff --git a/inseq/utils/misc.py b/inseq/utils/misc.py
@@ -434,3 +434,8 @@ def clean_tokens(tokens: list[str], remove_tokens: list[str]) -> tuple[list[str]
         else:
             removed_token_idxs += [idx]
     return clean_tokens, removed_token_idxs
+
+
+def get_left_padding(text: str):
+    """Returns the number of spaces at the beginning of a string."""
+    return len(text) - len(text.lstrip())
diff --git a/inseq/utils/torch_utils.py b/inseq/utils/torch_utils.py
@@ -348,3 +348,17 @@ def pad_with_nan(t: torch.Tensor, dim: int, pad_size: int, front: bool = False)
     if front:
         return torch.cat([nan_tensor, t], dim=dim)
     return torch.cat([t, nan_tensor], dim=dim)
+
+
+def recursive_get_submodule(parent: nn.Module, target: str) -> Optional[nn.Module]:
+    if target == "":
+        return parent
+    mod = None
+    if hasattr(parent, target):
+        mod = getattr(parent, target)
+    else:
+        for submodule in parent.children():
+            mod = recursive_get_submodule(submodule, target)
+            if mod is not None:
+                break
+    return mod