Spaces:

kambris
/

SoLProject

Runtime error

App Files Files Community

kambris commited on Dec 6, 2024

Commit

950bcef

verified ·

1 Parent(s): 6e846e7

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -40

app.py CHANGED Viewed

@@ -331,60 +331,44 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
         texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
         all_emotions = []
         embeddings = []
         for i, text in enumerate(texts):
-            try:
-                embedding = get_embedding_for_text(text, bert_tokenizer, bert_model)
-                if embedding is not None and not np.isnan(embedding).any():
-                    embeddings.append(embedding)
-                else:
-                    st.warning(f"Invalid embedding generated for text {i+1} in {country}")
-                    continue
-            except Exception as e:
-                st.warning(f"Error generating embedding for text {i+1} in {country}: {str(e)}")
-                continue
             progress = (i + 1) / len(texts) * 0.4
             progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
-        if len(embeddings) != len(texts):
-            texts = texts[:len(embeddings)]
         embeddings = np.array(embeddings)
         for i, text in enumerate(texts):
-            emotion = classify_emotion(text, emotion_classifier)
             all_emotions.append(emotion)
             progress = 0.4 + ((i + 1) / len(texts) * 0.3)
             progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
-        try:
-            if len(texts) < min_topic_size:
-                st.warning(f"Not enough documents for {country} to generate meaningful topics (minimum {min_topic_size} required)")
-                continue
-            topics, probs = topic_model.fit_transform(texts, embeddings)
-            topic_counts = Counter(topics)
-            top_topics = format_topics(topic_model, topic_counts.most_common(top_n))
-            top_emotions = format_emotions(Counter(all_emotions).most_common(top_n))
-            summaries.append({
-                'country': country,
-                'total_poems': len(texts),
-                'top_topics': top_topics,
-                'top_emotions': top_emotions
-            })
-            progress_bar.progress(1.0, text="Processing complete!")
-        except Exception as e:
-            st.warning(f"Could not generate topics for {country}: {str(e)}")
             continue
     return summaries, topic_model
 try:
     bert_tokenizer, bert_model, emotion_classifier = load_models()
     st.success("Models loaded successfully!")

         texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
         all_emotions = []
+        # Get embeddings with proper output handling
         embeddings = []
         for i, text in enumerate(texts):
+            embedding = get_embedding_for_text(text, bert_tokenizer, bert_model)
+            embeddings.append(embedding)
             progress = (i + 1) / len(texts) * 0.4
             progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
         embeddings = np.array(embeddings)
+        # Process emotions with correct output structure
         for i, text in enumerate(texts):
+            emotion = emotion_classifier(text)[0]['label']  # Access the label directly
             all_emotions.append(emotion)
             progress = 0.4 + ((i + 1) / len(texts) * 0.3)
             progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
+        if len(texts) < min_topic_size:
+            st.info(f"Not enough documents for {country} to generate meaningful topics (minimum {min_topic_size} required)")
             continue
+        topics, _ = topic_model.fit_transform(texts, embeddings)
+        topic_counts = Counter(topics)
+        top_topics = format_topics(topic_model, topic_counts.most_common(top_n))
+        top_emotions = format_emotions(Counter(all_emotions).most_common(top_n))
+        summaries.append({
+            'country': country,
+            'total_poems': len(texts),
+            'top_topics': top_topics,
+            'top_emotions': top_emotions
+        })
+        progress_bar.progress(1.0, text="Processing complete!")
     return summaries, topic_model
 try:
     bert_tokenizer, bert_model, emotion_classifier = load_models()
     st.success("Models loaded successfully!")