Merge pull request #327 from dianna-ai/fix-text-dashboard

loostrum · web-flow · commit 1a0020bf9a38 · 2022-09-09T11:34:33.000+02:00
Fix text visualization in dashboard
diff --git a/dashboard/callbacks.py b/dashboard/callbacks.py
@@ -28,6 +28,7 @@
 
 folder_on_server = "app_data"
 os.makedirs(folder_on_server, exist_ok=True)
+tokenizer = SpacyTokenizer()  # for now always use SpacyTokenizer, needs to be changed
 
 # Build App
 external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
@@ -376,7 +377,6 @@ def global_store_t(method_sel, model_runner, input_text):
     labels = tuple(class_name_text)
     pred_idx = labels.index(pred_class)
 
-    tokenizer = SpacyTokenizer()  # for now always use SpacyTokenizer, needs to be changed
 
     # expensive query
     relevances = dianna.explain_text(
@@ -450,6 +450,7 @@ def update_multi_options_t(fn_m, input_text, sel_methods, new_model, new_text):
         model_runner = MovieReviewsModelRunner(onnx_model_path, word_vector_path, max_filter_size=5)
 
         try:
+            input_tokens = tokenizer.tokenize(input_text)
             predictions = model_runner(input_text)
             class_name = class_name_text
             pred_class = class_name[np.argmax(predictions)]
@@ -463,7 +464,7 @@ def update_multi_options_t(fn_m, input_text, sel_methods, new_model, new_text):
                     relevances_lime = global_store_t(
                         m, model_runner, input_text)
 
-                    output = _create_html(input_text, relevances_lime[0], max_opacity=0.8)
+                    output = _create_html(input_tokens, relevances_lime[0], max_opacity=0.8)
                     hti = Html2Image()
                     expl_path = 'text_expl.jpg'
 
@@ -493,7 +494,7 @@ def update_multi_options_t(fn_m, input_text, sel_methods, new_model, new_text):
                     relevances_rise = global_store_t(
                         m, model_runner, input_text)
 
-                    output = _create_html(input_text, relevances_rise[0], max_opacity=0.8)
+                    output = _create_html(input_tokens, relevances_rise[0], max_opacity=0.8)
                     hti = Html2Image()
                     expl_path = 'text_expl.jpg'
 
diff --git a/dashboard/utilities.py b/dashboard/utilities.py
@@ -117,17 +117,24 @@ def preprocess_function(image):
     """For LIME: we divided the input data by 256 for the model (binary mnist) and LIME needs RGB values."""
     return (image / 256).astype(np.float32)
 
-
-def _create_html(original_text, explanation, max_opacity):
+def _create_html(input_tokens, explanation, max_opacity):
     """Creates text explaination map using html format."""
     max_importance = max(abs(item[2]) for item in explanation)
-    body = original_text
-    words_in_reverse_order = sorted(explanation, key=lambda item: item[1], reverse=True)
-    for word, word_start, importance in words_in_reverse_order:
-        word_end = word_start + len(word)
-        highlighted_word = _highlight_word(word, importance, max_importance, max_opacity)
-        body = body[:word_start] + highlighted_word + body[word_end:]
-    return '<html><body>' + body + '</body></html>'
+    explained_indices = [index for _, index, _ in explanation]
+    highlighted_words = []
+    for index, word in enumerate(input_tokens):
+        # if word has an explanation, highlight based on that, otherwise
+        # make it grey
+        try:
+            explained_index = explained_indices.index(index)
+            importance = explanation[explained_index][2]
+            highlighted_words.append(
+                _highlight_word(word, importance, max_importance, max_opacity)
+                )
+        except ValueError:
+            highlighted_words.append(f'<span style="background:rgba(128, 128, 128, 0.3)">{word}</span>')
+
+    return '<html><body>' + ' '.join(highlighted_words) + '</body></html>'
 
 
 def _highlight_word(word, importance, max_importance, max_opacity):