Update 06-lsa.md

qualiaMachine · web-flow · commit 1942966d0517 · 2024-04-15T21:29:39.000-05:00
diff --git a/_episodes/06-lsa.md b/_episodes/06-lsa.md
@@ -281,28 +281,28 @@ Let's write a helper to get the strongest words for each topic. This will show t
 ```python
 import pandas as pd
 
-def show_topics(topic, n):
-    # Get the feature names (terms) from the vectorizer
+def show_topics(vectorizer, svdmodel, topic_number, n):
+    # Get the feature names (terms) from the TF-IDF vectorizer
     terms = vectorizer.get_feature_names_out()
     
     # Get the weights of the terms for the specified topic from the SVD model
-    weights = svdmodel.components_[topic]
+    weights = svdmodel.components_[topic_number]
     
     # Create a DataFrame with terms and their corresponding weights
     df = pd.DataFrame({"Term": terms, "Weight": weights})
     
-    # Sort the DataFrame by weights in descending order to get top n terms
-    tops = df.sort_values(by=["Weight"], ascending=False)[0:n]
+    # Sort the DataFrame by weights in descending order to get top n terms (largest positive weights)
+    highs = df.sort_values(by=["Weight"], ascending=False)[0:n]
     
-    # Sort the DataFrame by weights in ascending order to get bottom n terms
-    bottoms = df.sort_values(by=["Weight"], ascending=False)[-n:]
+    # Sort the DataFrame by weights in ascending order to get bottom n terms (largest negative weights)
+    lows = df.sort_values(by=["Weight"], ascending=False)[-n:]
     
     # Concatenate top and bottom terms into a single DataFrame and return
-    return pd.concat([tops, bottoms])
+    return pd.concat([highs, lows])
 
 # Get the top 5 and bottom 5 terms for each specified topic
-topic_words_x = show_topics(1, 5)  # Topic 1
-topic_words_y = show_topics(2, 5)  # Topic 2
+topic_words_x = show_topics(vectorizer, svdmodel, 1, 5)  # Topic 1
+topic_words_y = show_topics(vectorizer, svdmodel, 2, 5)  # Topic 2
 
 ```