EleutherAI
diff --git a/‎delphi/__main__.py
Lines changed: 3 additions & 3 deletions b/‎delphi/__main__.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎delphi/clients/client.py
Lines changed: 1 addition & 1 deletion b/‎delphi/clients/client.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎delphi/clients/offline.py
Lines changed: 8 additions & 2 deletions b/‎delphi/clients/offline.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎delphi/clients/openrouter.py
Lines changed: 6 additions & 1 deletion b/‎delphi/clients/openrouter.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎delphi/clients/types.py
Lines changed: 9 additions & 0 deletions b/‎delphi/clients/types.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎delphi/explainers/contrastive_explainer.py
Lines changed: 6 additions & 2 deletions b/‎delphi/explainers/contrastive_explainer.py
Lines changed: 6 additions & 2 deletions
diff --git a/‎delphi/explainers/explainer.py
Lines changed: 2 additions & 1 deletion b/‎delphi/explainers/explainer.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎delphi/latents/cache.py
Lines changed: 64 additions & 61 deletions b/‎delphi/latents/cache.py
Lines changed: 64 additions & 61 deletions
diff --git a/‎delphi/latents/collect_activations.py
Lines changed: 3 additions & 1 deletion b/‎delphi/latents/collect_activations.py
Lines changed: 3 additions & 1 deletion
@@ -91,12 +91,12 @@ def create_neighbours(
         elif constructor_cfg.neighbours_type == "decoder_similarity":
 
             neighbour_calculator = NeighbourCalculator(
-                autoencoder=saes[hookpoint].cuda(), number_of_neighbours=250
+                autoencoder=saes[hookpoint].to("cuda"), number_of_neighbours=250
             )
 
         elif constructor_cfg.neighbours_type == "encoder_similarity":
             neighbour_calculator = NeighbourCalculator(
-                autoencoder=saes[hookpoint].cuda(), number_of_neighbours=250
+                autoencoder=saes[hookpoint].to("cuda"), number_of_neighbours=250
             )
         else:
             raise ValueError(
@@ -136,7 +136,7 @@ async def process_cache(
         }  # The latent range to explain
 
     dataset = LatentDataset(
-        raw_dir=str(latents_path),
+        raw_dir=latents_path,
         sampler_cfg=run_cfg.sampler_cfg,
         constructor_cfg=run_cfg.constructor_cfg,
         modules=hookpoints,
 
@@ -17,7 +17,7 @@ def __init__(self, model: str):
     @abstractmethod
     async def generate(
         self, prompt: Union[str, list[dict[str, str]]], **kwargs
-    ) -> Response:
+    ) -> str | Response:
         pass
 
     # @abstractmethod
 
@@ -74,7 +74,11 @@ def __init__(
             self.statistics_path = Path("statistics")
             self.statistics_path.mkdir(parents=True, exist_ok=True)
 
-    async def process_func(self, batches: Union[str, list[dict[str, str]]], kwargs):
+    async def process_func(
+        self,
+        batches: Union[str, list[Union[dict[str, str], list[dict[str, str]]]]],
+        kwargs,
+    ):
         """
         Process a single request.
         """
@@ -142,7 +146,9 @@ async def process_func(self, batches: Union[str, list[dict[str, str]]], kwargs):
             )
         return new_response
 
-    async def generate(self, prompt: Union[str, list[dict[str, str]]], **kwargs) -> str:  # type: ignore
+    async def generate(
+        self, prompt: Union[str, list[dict[str, str]]], **kwargs
+    ) -> Response:  # type: ignore
         """
         Enqueue a request and wait for the result.
         """
 
@@ -5,6 +5,7 @@
 
 from ..logger import logger
 from .client import Client
+from .types import ChatFormatRequest
 
 # Preferred provider routing arguments.
 # Change depending on what model you'd like to use.
@@ -37,7 +38,11 @@ def postprocess(self, response):
         return Response(msg)
 
     async def generate(  # type: ignore
-        self, prompt: str, raw: bool = False, max_retries: int = 1, **kwargs  # type: ignore
+        self,
+        prompt: ChatFormatRequest,
+        raw: bool = False,
+        max_retries: int = 1,
+        **kwargs,  # type: ignore
     ) -> Response:  # type: ignore
         kwargs.pop("schema", None)
         max_tokens = kwargs.pop("max_tokens", 500)
 
@@ -0,0 +1,9 @@
+from typing import Literal, TypedDict, Union
+
+
+class Message(TypedDict):
+    content: str
+    role: Literal["system", "user", "assistant"]
+
+
+ChatFormatRequest = Union[str, list[str], list[Message], None]
@@ -4,7 +4,7 @@
 import torch
 
 from delphi.explainers.default.prompts import SYSTEM_CONTRASTIVE
-from delphi.explainers.explainer import Explainer, ExplainerResult
+from delphi.explainers.explainer import Explainer, ExplainerResult, Response
 from delphi.latents.latents import ActivatingExample, LatentRecord, NonActivatingExample
 
 
@@ -54,7 +54,11 @@ async def __call__(self, record: LatentRecord) -> ExplainerResult:
         )
 
         try:
-            explanation = self.parse_explanation(response.text)
+            if isinstance(response, Response):
+                response_text = response.text
+            else:
+                response_text = response
+            explanation = self.parse_explanation(response_text)
             if self.verbose:
                 from ..logger import logger
 
 
@@ -8,7 +8,7 @@
 
 import aiofiles
 
-from ..clients.client import Client
+from ..clients.client import Client, Response
 from ..latents.latents import ActivatingExample, LatentRecord
 from ..logger import logger
 
@@ -44,6 +44,7 @@ async def __call__(self, record: LatentRecord) -> ExplainerResult:
         response = await self.client.generate(
             messages, temperature=self.temperature, **self.generation_kwargs
         )
+        assert isinstance(response, Response)
 
         try:
             explanation = self.parse_explanation(response.text)
 
@@ -1,8 +1,8 @@
 import json
 from collections import defaultdict
+from collections.abc import Callable
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Callable
 
 import numpy as np
 import torch
@@ -15,8 +15,49 @@
 from delphi.config import CacheConfig
 from delphi.latents.collect_activations import collect_activations
 
-location_tensor_shape = Float[Tensor, "batch sequence num_latents"]
-token_tensor_shape = Float[Tensor, "batch sequence"]
+location_tensor_type = Int[Tensor, "batch_sequence 3"]
+activation_tensor_type = Float[Tensor, "batch_sequence"]
+token_tensor_type = Int[Tensor, "batch sequence"]
+latent_tensor_type = Float[Tensor, "batch sequence num_latents"]
+
+
+def get_nonzeros_batch(
+    latents: latent_tensor_type,
+) -> tuple[
+    Float[Tensor, "batch sequence num_latents"], Float[Tensor, "batch sequence "]
+]:
+    """
+    Get non-zero activations for large batches that exceed int32 max value.
+
+    Args:
+        latents: Input latent activations.
+
+    Returns:
+        tuple[Tensor, Tensor]: Non-zero latent locations and activations.
+    """
+    # Calculate the maximum batch size that fits within sys.maxsize
+    max_batch_size = torch.iinfo(torch.int32).max // (
+        latents.shape[1] * latents.shape[2]
+    )
+    nonzero_latent_locations = []
+    nonzero_latent_activations = []
+
+    for i in range(0, latents.shape[0], max_batch_size):
+        batch = latents[i : i + max_batch_size]
+
+        # Get nonzero locations and activations
+        batch_locations = torch.nonzero(batch.abs() > 1e-5)
+        batch_activations = batch[batch.abs() > 1e-5]
+
+        # Adjust indices to account for batching
+        batch_locations[:, 0] += i
+        nonzero_latent_locations.append(batch_locations)
+        nonzero_latent_activations.append(batch_activations)
+
+    # Concatenate results
+    nonzero_latent_locations = torch.cat(nonzero_latent_locations, dim=0)
+    nonzero_latent_activations = torch.cat(nonzero_latent_activations, dim=0)
+    return nonzero_latent_locations, nonzero_latent_activations
 
 
 class InMemoryCache:
@@ -37,25 +78,25 @@ def __init__(
             filters: Filters for selecting specific latents.
             batch_size: Size of batches for processing. Defaults to 64.
         """
-        self.latent_locations_batches: dict[str, list[location_tensor_shape]] = (
+        self.latent_locations_batches: dict[str, list[location_tensor_type]] = (
             defaultdict(list)
         )
-        self.latent_activations_batches: dict[str, list[location_tensor_shape]] = (
+        self.latent_activations_batches: dict[str, list[latent_tensor_type]] = (
             defaultdict(list)
         )
-        self.tokens_batches: dict[str, list[token_tensor_shape]] = defaultdict(list)
+        self.tokens_batches: dict[str, list[token_tensor_type]] = defaultdict(list)
 
-        self.latent_locations: dict[str, location_tensor_shape] = {}
-        self.latent_activations: dict[str, location_tensor_shape] = {}
-        self.tokens: dict[str, token_tensor_shape] = {}
+        self.latent_locations: dict[str, location_tensor_type] = {}
+        self.latent_activations: dict[str, latent_tensor_type] = {}
+        self.tokens: dict[str, token_tensor_type] = {}
 
         self.filters = filters
         self.batch_size = batch_size
 
     def add(
         self,
-        latents: location_tensor_shape,
-        tokens: token_tensor_shape,
+        latents: latent_tensor_type,
+        tokens: token_tensor_type,
         batch_number: int,
         module_path: str,
     ):
@@ -96,47 +137,9 @@ def save(self):
                 self.tokens_batches[module_path], dim=0
             )
 
-    def get_nonzeros_batch(
-        self, latents: location_tensor_shape
-    ) -> tuple[
-        Float[Tensor, "batch sequence num_latents"], Float[Tensor, "batch sequence "]
-    ]:
-        """
-        Get non-zero activations for large batches that exceed int32 max value.
-
-        Args:
-            latents: Input latent activations.
-
-        Returns:
-            tuple[Tensor, Tensor]: Non-zero latent locations and activations.
-        """
-        # Calculate the maximum batch size that fits within sys.maxsize
-        max_batch_size = torch.iinfo(torch.int32).max // (
-            latents.shape[1] * latents.shape[2]
-        )
-        nonzero_latent_locations = []
-        nonzero_latent_activations = []
-
-        for i in range(0, latents.shape[0], max_batch_size):
-            batch = latents[i : i + max_batch_size]
-
-            # Get nonzero locations and activations
-            batch_locations = torch.nonzero(batch.abs() > 1e-5)
-            batch_activations = batch[batch.abs() > 1e-5]
-
-            # Adjust indices to account for batching
-            batch_locations[:, 0] += i
-            nonzero_latent_locations.append(batch_locations)
-            nonzero_latent_activations.append(batch_activations)
-
-        # Concatenate results
-        nonzero_latent_locations = torch.cat(nonzero_latent_locations, dim=0)
-        nonzero_latent_activations = torch.cat(nonzero_latent_activations, dim=0)
-        return nonzero_latent_locations, nonzero_latent_activations
-
-    def get_nonzeros(self, latents: location_tensor_shape, module_path: str) -> tuple[
-        location_tensor_shape,
-        location_tensor_shape,
+    def get_nonzeros(self, latents: latent_tensor_type, module_path: str) -> tuple[
+        location_tensor_type,
+        activation_tensor_type,
     ]:
         """
         Get the nonzero latent locations and activations.
@@ -153,7 +156,7 @@ def get_nonzeros(self, latents: location_tensor_shape, module_path: str) -> tupl
             (
                 nonzero_latent_locations,
                 nonzero_latent_activations,
-            ) = self.get_nonzeros_batch(latents)
+            ) = get_nonzeros_batch(latents)
         else:
             nonzero_latent_locations = torch.nonzero(latents.abs() > 1e-5)
             nonzero_latent_activations = latents[latents.abs() > 1e-5]
@@ -209,8 +212,8 @@ def __init__(
             self.filter_submodules(filters)
 
     def load_token_batches(
-        self, n_tokens: int, tokens: token_tensor_shape
-    ) -> list[token_tensor_shape]:
+        self, n_tokens: int, tokens: token_tensor_type
+    ) -> list[token_tensor_type]:
         """
         Load and prepare token batches for processing.
 
@@ -248,7 +251,7 @@ def filter_submodules(self, filters: dict[str, Float[Tensor, "indices"]]):
                 ]
         self.hookpoint_to_sparse_encode = filtered_submodules
 
-    def run(self, n_tokens: int, tokens: token_tensor_shape):
+    def run(self, n_tokens: int, tokens: token_tensor_type):
         """
         Run the latent caching process.
 
@@ -521,11 +524,11 @@ def generate_statistics_cache(
         print(f"Fraction of strong single token latents: {strong_token_fraction:%}")
 
     return CacheStatistics(
-        frac_alive=fraction_alive,
-        frac_fired_1pct=one_percent,
-        frac_fired_10pct=ten_percent,
-        frac_weak_single_token=single_token_fraction,
-        frac_strong_single_token=strong_token_fraction,
+        frac_alive=float(fraction_alive),
+        frac_fired_1pct=float(one_percent),
+        frac_fired_10pct=float(ten_percent),
+        frac_weak_single_token=float(single_token_fraction),
+        frac_strong_single_token=float(strong_token_fraction),
     )
 
 
 
@@ -25,7 +25,9 @@ def collect_activations(
     handles = []
 
     def create_hook(hookpoint: str, transcode: bool = False):
-        def hook_fn(module: nn.Module, input: Any, output: Tensor) -> Tensor | None:
+        def hook_fn(
+            module: nn.Module, input: Any, output: Tensor | tuple[Tensor]
+        ) -> Tensor | None:
             # If output is a tuple (like in some transformer layers), take first element
             if transcode:
                 if isinstance(input, tuple):