kambris commited on
Commit
e2c8b5b
·
verified ·
1 Parent(s): a1fcd63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -214,7 +214,6 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
214
  "n_gram_range": (1, 3),
215
  "top_n_words": 15,
216
  "verbose": True,
217
- "stop_words": ARABIC_STOP_WORDS
218
  }
219
 
220
  if topic_strategy == "Manual" and n_topics is not None:
@@ -223,6 +222,11 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
223
  topic_model_params["nr_topics"] = "auto"
224
 
225
  topic_model = BERTopic(**topic_model_params)
 
 
 
 
 
226
 
227
  for country, group in df.groupby('country'):
228
  progress_text = f"Processing poems for {country}..."
@@ -319,10 +323,7 @@ def main():
319
  if topic_strategy == "Manual":
320
  # Calculate reasonable max topics based on dataset size
321
  n_documents = len(df)
322
- if n_documents < 1000:
323
- max_topics = min(50, n_documents // 20)
324
- else:
325
- max_topics = min(500, int(np.log10(n_documents) * 100))
326
 
327
  n_topics = st.slider(
328
  "Number of Topics",
 
214
  "n_gram_range": (1, 3),
215
  "top_n_words": 15,
216
  "verbose": True,
 
217
  }
218
 
219
  if topic_strategy == "Manual" and n_topics is not None:
 
222
  topic_model_params["nr_topics"] = "auto"
223
 
224
  topic_model = BERTopic(**topic_model_params)
225
+
226
+ # Create vectorizer with stop words
227
+ from sklearn.feature_extraction.text import CountVectorizer
228
+ vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS))
229
+ topic_model.vectorizer_model = vectorizer
230
 
231
  for country, group in df.groupby('country'):
232
  progress_text = f"Processing poems for {country}..."
 
323
  if topic_strategy == "Manual":
324
  # Calculate reasonable max topics based on dataset size
325
  n_documents = len(df)
326
+ max_topics = max(2, min(50, n_documents // 20))
 
 
 
327
 
328
  n_topics = st.slider(
329
  "Number of Topics",