adding dynamic threshold and llm call counter

Acusick1 · Acusick1 · commit da19e3adecfa · 2024-07-05T19:31:30.000+01:00
diff --git a/attribution/api_attribution.py b/attribution/api_attribution.py
@@ -1,6 +1,7 @@
 import asyncio
 import itertools
 import os
+import statistics
 from copy import deepcopy
 from typing import Any, List, Optional
 
@@ -116,11 +117,15 @@ def get_perturbations(self, input_text, chunksize, **kwargs):
         
         return perturbations
 
-    async def hierarchical_perturbation(self, input_text: str, init_chunksize: int, stages: int, **kwargs):
+    async def hierarchical_perturbation(self, input_text: str, init_chunksize: int, stages: int, threshold: float = 0.5, **kwargs):
         perturbation_strategy: PerturbationStrategy = kwargs.get(
             "perturbation_strategy", FixedPerturbationStrategy()
         )
 
+        attribution_strategies: List[str] = kwargs.get(
+            "attribution_strategies", ["cosine", "prob_diff"]
+        )
+
         logger: ExperimentLogger = kwargs.get("logger", None)
         perturb_word_wise: bool = kwargs.get("perturb_word_wise", False)
 
@@ -139,21 +144,25 @@ async def hierarchical_perturbation(self, input_text: str, init_chunksize: int,
         process_chunks = None
         prev_perturbations = None
         prev_process_chunks = None
-        all_scores = []
+        total_llm_calls = 0
         for stage in range(stages):
             
             perturbations = self.get_perturbations(input_text, chunksize, **kwargs)
 
             if stage > 0:
-                process_chunks = []
-                for p, processed in zip(prev_perturbations, prev_process_chunks):
+                scores = []
+                for perturbation, processed in zip(prev_perturbations, prev_process_chunks):
                     if processed:
-                        score = chunk_scores.pop(0)
-                        decision = score["cosine"]["sentence_attribution"] > 0.5
+                        attr = chunk_scores.pop(0)
+                        scores.append(attr[attribution_strategies[0]]["sentence_attribution"])
                     else:
-                        decision = False
+                        scores.append(None)
                     
-                    process_chunks.extend([decision] * (2 if chunksize > 1 else len(p["unit_tokens"])))
+                process_chunks = []
+                median_score = statistics.median([s for s in scores if s is not None])
+                for score in scores:
+                    decision = score is not None and (score > threshold or score > median_score) 
+                    process_chunks.extend([decision] * (2 if chunksize > 1 else len(perturbation["unit_tokens"])))
             else:
                 process_chunks = [True] * len(perturbations)
 
@@ -165,12 +174,12 @@ async def hierarchical_perturbation(self, input_text: str, init_chunksize: int,
             outputs = await self.compute_attribution_chunks(perturbations, **kwargs)
             chunk_scores = self.get_scores(outputs, original_output, **kwargs)
 
+            total_llm_calls += len(outputs)
             prev_process_chunks = process_chunks
 
-
             if logger:
-                for p, output, score in zip(perturbations, outputs, chunk_scores):
-                    for unit_token, token_id in zip(p["unit_tokens"], p["token_idx"]):
+                for perturbation, output, score in zip(perturbations, outputs, chunk_scores):
+                    for unit_token, token_id in zip(perturbation["unit_tokens"], perturbation["token_idx"]):
 
                         for attribution_strategy, attr_result in score.items():
                             
@@ -187,22 +196,21 @@ async def hierarchical_perturbation(self, input_text: str, init_chunksize: int,
                                     j,
                                     attr_result["attributed_tokens"][j],
                                     attr_score.squeeze(),
-                                    p["input"],
+                                    perturbation["input"],
                                     output.message.content,
                                 )
 
                 logger.log_perturbation(
                     0, # TODO: Why is this here?
-                    self.tokenizer.decode(p["replaced_token_ids"], skip_special_tokens=True)[
+                    self.tokenizer.decode(perturbation["replaced_token_ids"], skip_special_tokens=True)[
                         0
                     ],
                     perturbation_strategy,
                     input_text,
                     original_output.message.content,
-                    p["input"],
+                    perturbation["input"],
                     output.message.content,
                 )
-                logger.stop_experiment()
 
             if stage == stages - 2:
                 chunksize = 1
@@ -211,10 +219,9 @@ async def hierarchical_perturbation(self, input_text: str, init_chunksize: int,
                 if chunksize == 0:
                     break
 
-        logger.df_token_attribution_matrix = logger.df_token_attribution_matrix.drop_duplicates(subset="input_token_pos", keep="last").sort_values(by="input_token_pos")
-        logger.df_input_token_attribution = logger.df_input_token_attribution.drop_duplicates(subset="input_token_pos", keep="last").sort_values(by="input_token_pos")
-
-        return all_scores
+        logger.df_token_attribution_matrix = logger.df_token_attribution_matrix.drop_duplicates(subset=["input_token_pos", "output_token"], keep="last").sort_values(by="input_token_pos")
+        logger.df_input_token_attribution = logger.df_input_token_attribution.drop_duplicates(subset=["input_token_pos"], keep="last").sort_values(by="input_token_pos")
+        logger.stop_experiment(num_llm_calls=total_llm_calls)
 
     def get_scores(self, perturbed_output, original_output, **kwargs):
         attribution_strategies: List[str] = kwargs.get(
@@ -463,4 +470,4 @@ async def compute_attributions(self, input_text: str, **kwargs):
                 perturbation["input"],
                 perturbed_output.message.content,
             )
-            logger.stop_experiment()
+            logger.stop_experiment(num_llm_calls=len(outputs))
diff --git a/attribution/experiment_logger.py b/attribution/experiment_logger.py
@@ -20,6 +20,7 @@ def __init__(self, experiment_id=0):
                 "perturbation_strategy",
                 "perturb_word_wise",
                 "duration",
+                "num_llm_calls",
             ]
         )
         self.df_input_token_attribution = pd.DataFrame(
@@ -72,12 +73,16 @@ def start_experiment(
             "perturbation_strategy": str(perturbation_strategy),
             "perturb_word_wise": perturb_word_wise,
             "duration": None,
+            "num_llm_calls": None,
         }
 
-    def stop_experiment(self):
+    def stop_experiment(self, num_llm_calls: Optional[int] = None):
         self.df_experiments.loc[len(self.df_experiments) - 1, "duration"] = (
             time.time() - self.experiment_start_time
         )
+        self.df_experiments.loc[len(self.df_experiments) - 1, "num_llm_calls"] = (
+            num_llm_calls
+        )
 
     def log_input_token_attribution(
         self,

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@ def __init__(self, experiment_id=0):`
`20`	`20`	`"perturbation_strategy",`
`21`	`21`	`"perturb_word_wise",`
`22`	`22`	`"duration",`
	`23`	`+ "num_llm_calls",`
`23`	`24`	`]`
`24`	`25`	`)`
`25`	`26`	`self.df_input_token_attribution = pd.DataFrame(`
`@@ -72,12 +73,16 @@ def start_experiment(`
`72`	`73`	`"perturbation_strategy": str(perturbation_strategy),`
`73`	`74`	`"perturb_word_wise": perturb_word_wise,`
`74`	`75`	`"duration": None,`
	`76`	`+ "num_llm_calls": None,`
`75`	`77`	`}`
`76`	`78`
`77`		`- def stop_experiment(self):`
	`79`	`+ def stop_experiment(self, num_llm_calls: Optional[int] = None):`
`78`	`80`	`self.df_experiments.loc[len(self.df_experiments) - 1, "duration"] = (`
`79`	`81`	`time.time() - self.experiment_start_time`
`80`	`82`	`)`
	`83`	`+ self.df_experiments.loc[len(self.df_experiments) - 1, "num_llm_calls"] = (`
	`84`	`+ num_llm_calls`
	`85`	`+ )`
`81`	`86`
`82`	`87`	`def log_input_token_attribution(`
`83`	`88`	`self,`