kambris commited on
Commit
89e32b2
·
verified ·
1 Parent(s): 9704d3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -2
app.py CHANGED
@@ -237,6 +237,30 @@ def format_emotions(emotion_counts):
237
  def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=3):
238
  summaries = []
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  # Create a placeholder for the progress bar
241
  progress_placeholder = st.empty()
242
  progress_bar = progress_placeholder.progress(0)
@@ -286,8 +310,30 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
286
  except Exception as e:
287
  st.warning(f"Error classifying emotion for poem {i+1} in {country}: {str(e)}")
288
  continue
289
-
290
- # Rest of your existing processing code...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  # Clear progress for next country
293
  progress_placeholder.empty()
 
237
  def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, top_n=50, topic_strategy="Auto", n_topics=None, min_topic_size=3):
238
  summaries = []
239
 
240
+ topic_model_params = {
241
+ "language": "arabic",
242
+ "calculate_probabilities": True,
243
+ "min_topic_size": min_topic_size,
244
+ "n_gram_range": (1, 1),
245
+ "top_n_words": 15,
246
+ "verbose": True,
247
+ }
248
+
249
+ if topic_strategy == "Manual":
250
+ topic_model_params["nr_topics"] = n_topics
251
+ else:
252
+ topic_model_params["nr_topics"] = "auto"
253
+
254
+ topic_model = BERTopic(
255
+ embedding_model=bert_model,
256
+ **topic_model_params
257
+ )
258
+
259
+ vectorizer = CountVectorizer(stop_words=list(ARABIC_STOP_WORDS),
260
+ min_df=1,
261
+ max_df=1.0)
262
+ topic_model.vectorizer_model = vectorizer
263
+
264
  # Create a placeholder for the progress bar
265
  progress_placeholder = st.empty()
266
  progress_bar = progress_placeholder.progress(0)
 
310
  except Exception as e:
311
  st.warning(f"Error classifying emotion for poem {i+1} in {country}: {str(e)}")
312
  continue
313
+
314
+ try:
315
+ if len(texts) < min_topic_size:
316
+ st.warning(f"Not enough documents for {country} to generate meaningful topics (minimum {min_topic_size} required)")
317
+ continue
318
+
319
+ topics, probs = topic_model.fit_transform(texts, embeddings)
320
+
321
+ topic_counts = Counter(topics)
322
+
323
+ top_topics = format_topics(topic_model, topic_counts.most_common(top_n))
324
+ top_emotions = format_emotions(Counter(all_emotions).most_common(top_n))
325
+
326
+ summaries.append({
327
+ 'country': country,
328
+ 'total_poems': len(texts),
329
+ 'top_topics': top_topics,
330
+ 'top_emotions': top_emotions
331
+ })
332
+ progress_bar.progress(1.0, text="Processing complete!")
333
+
334
+ except Exception as e:
335
+ st.warning(f"Could not generate topics for {country}: {str(e)}")
336
+ continue
337
 
338
  # Clear progress for next country
339
  progress_placeholder.empty()