kambris commited on
Commit
950bcef
·
verified ·
1 Parent(s): 6e846e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -40
app.py CHANGED
@@ -331,60 +331,44 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
331
  texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
332
  all_emotions = []
333
 
 
334
  embeddings = []
335
  for i, text in enumerate(texts):
336
- try:
337
- embedding = get_embedding_for_text(text, bert_tokenizer, bert_model)
338
- if embedding is not None and not np.isnan(embedding).any():
339
- embeddings.append(embedding)
340
- else:
341
- st.warning(f"Invalid embedding generated for text {i+1} in {country}")
342
- continue
343
- except Exception as e:
344
- st.warning(f"Error generating embedding for text {i+1} in {country}: {str(e)}")
345
- continue
346
  progress = (i + 1) / len(texts) * 0.4
347
  progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
348
 
349
- if len(embeddings) != len(texts):
350
- texts = texts[:len(embeddings)]
351
  embeddings = np.array(embeddings)
352
 
 
353
  for i, text in enumerate(texts):
354
- emotion = classify_emotion(text, emotion_classifier)
355
  all_emotions.append(emotion)
356
  progress = 0.4 + ((i + 1) / len(texts) * 0.3)
357
  progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
358
-
359
- try:
360
-
361
- if len(texts) < min_topic_size:
362
- st.warning(f"Not enough documents for {country} to generate meaningful topics (minimum {min_topic_size} required)")
363
- continue
364
-
365
-
366
- topics, probs = topic_model.fit_transform(texts, embeddings)
367
-
368
-
369
- topic_counts = Counter(topics)
370
-
371
- top_topics = format_topics(topic_model, topic_counts.most_common(top_n))
372
- top_emotions = format_emotions(Counter(all_emotions).most_common(top_n))
373
-
374
- summaries.append({
375
- 'country': country,
376
- 'total_poems': len(texts),
377
- 'top_topics': top_topics,
378
- 'top_emotions': top_emotions
379
- })
380
- progress_bar.progress(1.0, text="Processing complete!")
381
-
382
- except Exception as e:
383
- st.warning(f"Could not generate topics for {country}: {str(e)}")
384
  continue
385
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  return summaries, topic_model
387
 
 
388
  try:
389
  bert_tokenizer, bert_model, emotion_classifier = load_models()
390
  st.success("Models loaded successfully!")
 
331
  texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
332
  all_emotions = []
333
 
334
+ # Get embeddings with proper output handling
335
  embeddings = []
336
  for i, text in enumerate(texts):
337
+ embedding = get_embedding_for_text(text, bert_tokenizer, bert_model)
338
+ embeddings.append(embedding)
 
 
 
 
 
 
 
 
339
  progress = (i + 1) / len(texts) * 0.4
340
  progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
341
 
 
 
342
  embeddings = np.array(embeddings)
343
 
344
+ # Process emotions with correct output structure
345
  for i, text in enumerate(texts):
346
+ emotion = emotion_classifier(text)[0]['label'] # Access the label directly
347
  all_emotions.append(emotion)
348
  progress = 0.4 + ((i + 1) / len(texts) * 0.3)
349
  progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
350
+
351
+ if len(texts) < min_topic_size:
352
+ st.info(f"Not enough documents for {country} to generate meaningful topics (minimum {min_topic_size} required)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  continue
354
+
355
+ topics, _ = topic_model.fit_transform(texts, embeddings)
356
+ topic_counts = Counter(topics)
357
+
358
+ top_topics = format_topics(topic_model, topic_counts.most_common(top_n))
359
+ top_emotions = format_emotions(Counter(all_emotions).most_common(top_n))
360
+
361
+ summaries.append({
362
+ 'country': country,
363
+ 'total_poems': len(texts),
364
+ 'top_topics': top_topics,
365
+ 'top_emotions': top_emotions
366
+ })
367
+ progress_bar.progress(1.0, text="Processing complete!")
368
+
369
  return summaries, topic_model
370
 
371
+
372
  try:
373
  bert_tokenizer, bert_model, emotion_classifier = load_models()
374
  st.success("Models loaded successfully!")