OpenMOSS · dest1n1s · Mar 3, 2026 · Mar 1, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/src/lm_saes/backend/language_model.py b/src/lm_saes/backend/language_model.py
diff --git a/src/lm_saes/circuit/utils/attribution_utils.py b/src/lm_saes/circuit/utils/attribution_utils.py
@@ -183,6 +183,7 @@ def select_feature_activations(
     return torch.stack(activations)
 
 
+# TODO: remove this function
 def ensure_tokenized(prompt: Union[str, torch.Tensor, List[int]], tokenizer) -> torch.Tensor:
     """Convert *prompt* → 1-D tensor of token ids (no batch dim)."""
 

diff --git a/src/lm_saes/clt.py b/src/lm_saes/clt.py
@@ -104,6 +104,14 @@ def associated_hook_points(self) -> list[str]:
         """All hook points used by the CLT."""
         return self.hook_points_in + self.hook_points_out
 
+    @property
+    def hooks_in(self) -> list[str]:
+        return self.hook_points_in
+
+    @property
+    def hooks_out(self) -> list[str]:
+        return self.hook_points_out
+
     def model_post_init(self, __context):
         super().model_post_init(__context)
         assert len(self.hook_points_in) == len(self.hook_points_out), (

diff --git a/src/lm_saes/crosscoder.py b/src/lm_saes/crosscoder.py
@@ -61,6 +61,14 @@ class CrossCoderConfig(SparseDictionaryConfig):
     def associated_hook_points(self) -> list[str]:
         return self.hook_points
 
+    @property
+    def hooks_in(self) -> list[str]:
+        return self.hook_points
+
+    @property
+    def hooks_out(self) -> list[str]:
+        return self.hook_points
+
     @property
     def n_heads(self) -> int:
         return len(self.hook_points)

diff --git a/src/lm_saes/lorsa.py b/src/lm_saes/lorsa.py
@@ -79,6 +79,14 @@ def associated_hook_points(self) -> list[str]:
         """All hook points used by Lorsa."""
         return [self.hook_point_in, self.hook_point_out]
 
+    @property
+    def hooks_in(self) -> list[str]:
+        return [self.hook_point_in]
+
+    @property
+    def hooks_out(self) -> list[str]:
+        return [self.hook_point_out]
+
     def model_post_init(self, __context):
         super().model_post_init(__context)
         assert self.hook_point_in is not None and self.hook_point_out is not None, (

diff --git a/src/lm_saes/molt.py b/src/lm_saes/molt.py
@@ -116,6 +116,14 @@ def num_rank_types(self) -> int:
     def associated_hook_points(self) -> list[str]:
         return [self.hook_point_in, self.hook_point_out]
 
+    @property
+    def hooks_in(self) -> list[str]:
+        return [self.hook_point_in]
+
+    @property
+    def hooks_out(self) -> list[str]:
+        return [self.hook_point_out]
+
 
 @register_sae_model("molt")
 class MixtureOfLinearTransform(SparseDictionary):

diff --git a/src/lm_saes/sae.py b/src/lm_saes/sae.py
@@ -32,6 +32,14 @@ class SAEConfig(SparseDictionaryConfig):
     def associated_hook_points(self) -> list[str]:
         return [self.hook_point_in, self.hook_point_out]
 
+    @property
+    def hooks_in(self) -> list[str]:
+        return [self.hook_point_in]
+
+    @property
+    def hooks_out(self) -> list[str]:
+        return [self.hook_point_out]
+
 
 @register_sae_model("sae")
 class SparseAutoEncoder(SparseDictionary):

diff --git a/src/lm_saes/sparse_dictionary.py b/src/lm_saes/sparse_dictionary.py
@@ -188,6 +188,18 @@ def associated_hook_points(self) -> list[str]:
         """List of hook points used by the sparse dictionary, including all input and label hook points. This is used to retrieve useful data from the input activation source."""
         raise NotImplementedError("Subclasses must implement this method")
 
+    @property
+    @abstractmethod
+    def hooks_in(self) -> list[str]:
+        """List of hook points used by the sparse dictionary, including all input hook points. This is used to retrieve useful data from the input activation source."""
+        raise NotImplementedError("Subclasses must implement this method")
+
+    @property
+    @abstractmethod
+    def hooks_out(self) -> list[str]:
+        """List of hook points used by the sparse dictionary, including all output hook points. This is used to retrieve useful data from the output activation source."""
+        raise NotImplementedError("Subclasses must implement this method")
+
 
 class SparseDictionary(HookedRootModule, ABC):
     """Abstract base class for all sparse dictionary models.

diff --git a/src/lm_saes/utils/misc.py b/src/lm_saes/utils/misc.py
@@ -177,3 +177,17 @@ def get_slice_length(s: slice, length: int):
     start, stop, step = s.indices(length)
     length = (stop - start + step - 1) // step
     return length
+
+
+def ensure_tokenized(
+    prompt: str | torch.Tensor | list[int], tokenizer, device: torch.device | str = "cpu"
+) -> torch.Tensor:
+    """Convert *prompt* → 1-D tensor of token ids (no batch dim)."""
+
+    if isinstance(prompt, str):
+        return tokenizer(prompt, return_tensors="pt").input_ids[0].to(device)
+    if isinstance(prompt, torch.Tensor):
+        return prompt.squeeze(0).to(device) if prompt.ndim == 2 else prompt.to(device)
+    if isinstance(prompt, list):
+        return torch.tensor(prompt, dtype=torch.long, device=device)
+    raise TypeError(f"Unsupported prompt type: {type(prompt)}")