@@ -281,28 +281,28 @@ Let's write a helper to get the strongest words for each topic. This will show t
281
281
``` python
282
282
import pandas as pd
283
283
284
- def show_topics (topic , n ):
285
- # Get the feature names (terms) from the vectorizer
284
+ def show_topics (vectorizer , svdmodel , topic_number , n ):
285
+ # Get the feature names (terms) from the TF-IDF vectorizer
286
286
terms = vectorizer.get_feature_names_out()
287
287
288
288
# Get the weights of the terms for the specified topic from the SVD model
289
- weights = svdmodel.components_[topic ]
289
+ weights = svdmodel.components_[topic_number ]
290
290
291
291
# Create a DataFrame with terms and their corresponding weights
292
292
df = pd.DataFrame({" Term" : terms, " Weight" : weights})
293
293
294
- # Sort the DataFrame by weights in descending order to get top n terms
295
- tops = df.sort_values(by = [" Weight" ], ascending = False )[0 :n]
294
+ # Sort the DataFrame by weights in descending order to get top n terms (largest positive weights)
295
+ highs = df.sort_values(by = [" Weight" ], ascending = False )[0 :n]
296
296
297
- # Sort the DataFrame by weights in ascending order to get bottom n terms
298
- bottoms = df.sort_values(by = [" Weight" ], ascending = False )[- n:]
297
+ # Sort the DataFrame by weights in ascending order to get bottom n terms (largest negative weights)
298
+ lows = df.sort_values(by = [" Weight" ], ascending = False )[- n:]
299
299
300
300
# Concatenate top and bottom terms into a single DataFrame and return
301
- return pd.concat([tops, bottoms ])
301
+ return pd.concat([highs, lows ])
302
302
303
303
# Get the top 5 and bottom 5 terms for each specified topic
304
- topic_words_x = show_topics(1 , 5 ) # Topic 1
305
- topic_words_y = show_topics(2 , 5 ) # Topic 2
304
+ topic_words_x = show_topics(vectorizer, svdmodel, 1 , 5 ) # Topic 1
305
+ topic_words_y = show_topics(vectorizer, svdmodel, 2 , 5 ) # Topic 2
306
306
307
307
```
308
308
0 commit comments