make agnostic across huggingface models

robbiemccorkell · robbiemccorkell · commit 5e9540fa61b0 · 2024-04-05T15:32:39.000+01:00
diff --git a/README.md b/README.md
@@ -53,9 +53,10 @@ model = transformers.GPT2LMHeadModel.from_pretrained(model_name)
 if not isinstance(model, transformers.GPT2LMHeadModel):
     raise ValueError("model not found")
 
+embeddings = model.transformer.wte.weight.detach()
 model.eval()
 
-attributor = Attributor(model=model, tokenizer=tokenizer)
+attributor = Attributor(model=model, embeddings=embeddings, tokenizer=tokenizer)
 attr_scores, token_ids = attributor.get_attributions(
     input_string="the five continents are asia, europe, afri",
     generation_length=7,
@@ -71,6 +72,16 @@ attributor.print_attributions(
 
 You can run this script with `example.py`.
 
+### Limitations
+
+This library only supports models that have a common interface to pass in embeddings, and generate outputs without sampling of the form:
+
+```python
+outputs = model(inputs_embeds=input_embeddings)
+```
+
+This format is common across HuggingFace models.
+
 ### GPU Acceleration
 
 To run the attribution process on a device of your choice, pass the device identifier into the `Attributor` class constructor:
diff --git a/attribution/attribution.py b/attribution/attribution.py
@@ -1,32 +1,39 @@
 import gc
 import logging
-from typing import Optional, Tuple
+from typing import Optional, Tuple, cast
 import torch
+from torch import nn
 import transformers
 from attribution.visualization import RichTablePrinter
 
 
 class Attributor:
     device: str
-    model: transformers.GPT2LMHeadModel
+    model: nn.Module
     tokenizer: transformers.PreTrainedTokenizerBase
+    embeddings: torch.Tensor
 
     def __init__(
         self,
-        model: transformers.GPT2LMHeadModel,
+        model: nn.Module,
         tokenizer: transformers.PreTrainedTokenizerBase,
+        embeddings: torch.Tensor,
         device: Optional[str] = None,
         log_level: int = logging.WARNING,
     ):
         logging.basicConfig(level=log_level)
 
         if device is None:
-            device = model.device.type
+            if model.device:
+                device = cast(str, model.device.type)
+            else:
+                device = "cpu"
 
         logging.info(f"Using device: {device}")
         self.device = device
 
         self.model = model
+        self.embeddings = embeddings
         self.tokenizer = tokenizer
 
     def get_attributions(
@@ -58,13 +65,12 @@ def get_attributions(
         token_ids: torch.Tensor = torch.tensor(
             self.tokenizer(input_string).input_ids
         ).to(self.device)
-        embeddings: torch.Tensor = self.model.transformer.wte.weight.detach()
         input_length: int = token_ids.shape[0]
 
         attr_scores = torch.zeros(generation_length, generation_length + len(token_ids))
 
         for it in range(generation_length):
-            input_embeddings = self._get_input_embeddings(embeddings, token_ids)
+            input_embeddings = self._get_input_embeddings(self.embeddings, token_ids)
             output = self.model(inputs_embeds=input_embeddings)
 
             gen_tokens, next_token_id = self._generate_tokens(
@@ -126,7 +132,7 @@ def _get_input_embeddings(
 
     def _generate_tokens(
         self,
-        model: transformers.GPT2LMHeadModel,
+        model: nn.Module,
         token_ids: torch.Tensor,
         tokenizer: transformers.PreTrainedTokenizerBase,
     ):
@@ -166,7 +172,7 @@ def _get_attr_scores_next_token(
 
     def _validate_inputs(
         self,
-        model: transformers.GPT2LMHeadModel,
+        model: nn.Module,
         tokenizer: transformers.PreTrainedTokenizerBase,
         input_string: str,
         generation_length: int,
diff --git a/example.py b/example.py
@@ -9,9 +9,10 @@
 if not isinstance(model, transformers.GPT2LMHeadModel):
     raise ValueError("model not found")
 
+embeddings = model.transformer.wte.weight.detach()
 model.eval()
 
-attributor = Attributor(model=model, tokenizer=tokenizer)
+attributor = Attributor(model=model, embeddings=embeddings, tokenizer=tokenizer)
 attr_scores, token_ids = attributor.get_attributions(
     input_string="the five continents are asia, europe, afri",
     generation_length=7,
diff --git a/tests/test_attribution.py b/tests/test_attribution.py
@@ -13,6 +13,11 @@ def model():
     return model
 
 
+@pytest.fixture
+def embeddings(model):
+    return model.transformer.wte.weight.detach()
+
+
 @pytest.fixture
 def tokenizer():
     tokenizer = transformers.GPT2Tokenizer.from_pretrained(
@@ -23,8 +28,8 @@ def tokenizer():
 
 
 @pytest.fixture
-def attributor(model, tokenizer):
-    return Attributor(model=model, tokenizer=tokenizer)
+def attributor(model, embeddings, tokenizer):
+    return Attributor(model=model, embeddings=embeddings, tokenizer=tokenizer)
 
 
 def test_get_input_embeddings(attributor, model, tokenizer):