Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -331,60 +331,44 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
331 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
332 |
all_emotions = []
|
333 |
|
|
|
334 |
embeddings = []
|
335 |
for i, text in enumerate(texts):
|
336 |
-
|
337 |
-
|
338 |
-
if embedding is not None and not np.isnan(embedding).any():
|
339 |
-
embeddings.append(embedding)
|
340 |
-
else:
|
341 |
-
st.warning(f"Invalid embedding generated for text {i+1} in {country}")
|
342 |
-
continue
|
343 |
-
except Exception as e:
|
344 |
-
st.warning(f"Error generating embedding for text {i+1} in {country}: {str(e)}")
|
345 |
-
continue
|
346 |
progress = (i + 1) / len(texts) * 0.4
|
347 |
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
348 |
|
349 |
-
if len(embeddings) != len(texts):
|
350 |
-
texts = texts[:len(embeddings)]
|
351 |
embeddings = np.array(embeddings)
|
352 |
|
|
|
353 |
for i, text in enumerate(texts):
|
354 |
-
emotion =
|
355 |
all_emotions.append(emotion)
|
356 |
progress = 0.4 + ((i + 1) / len(texts) * 0.3)
|
357 |
progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
if len(texts) < min_topic_size:
|
362 |
-
st.warning(f"Not enough documents for {country} to generate meaningful topics (minimum {min_topic_size} required)")
|
363 |
-
continue
|
364 |
-
|
365 |
-
|
366 |
-
topics, probs = topic_model.fit_transform(texts, embeddings)
|
367 |
-
|
368 |
-
|
369 |
-
topic_counts = Counter(topics)
|
370 |
-
|
371 |
-
top_topics = format_topics(topic_model, topic_counts.most_common(top_n))
|
372 |
-
top_emotions = format_emotions(Counter(all_emotions).most_common(top_n))
|
373 |
-
|
374 |
-
summaries.append({
|
375 |
-
'country': country,
|
376 |
-
'total_poems': len(texts),
|
377 |
-
'top_topics': top_topics,
|
378 |
-
'top_emotions': top_emotions
|
379 |
-
})
|
380 |
-
progress_bar.progress(1.0, text="Processing complete!")
|
381 |
-
|
382 |
-
except Exception as e:
|
383 |
-
st.warning(f"Could not generate topics for {country}: {str(e)}")
|
384 |
continue
|
385 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
return summaries, topic_model
|
387 |
|
|
|
388 |
try:
|
389 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
390 |
st.success("Models loaded successfully!")
|
|
|
331 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
332 |
all_emotions = []
|
333 |
|
334 |
+
# Get embeddings with proper output handling
|
335 |
embeddings = []
|
336 |
for i, text in enumerate(texts):
|
337 |
+
embedding = get_embedding_for_text(text, bert_tokenizer, bert_model)
|
338 |
+
embeddings.append(embedding)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
progress = (i + 1) / len(texts) * 0.4
|
340 |
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
341 |
|
|
|
|
|
342 |
embeddings = np.array(embeddings)
|
343 |
|
344 |
+
# Process emotions with correct output structure
|
345 |
for i, text in enumerate(texts):
|
346 |
+
emotion = emotion_classifier(text)[0]['label'] # Access the label directly
|
347 |
all_emotions.append(emotion)
|
348 |
progress = 0.4 + ((i + 1) / len(texts) * 0.3)
|
349 |
progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
|
350 |
+
|
351 |
+
if len(texts) < min_topic_size:
|
352 |
+
st.info(f"Not enough documents for {country} to generate meaningful topics (minimum {min_topic_size} required)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
continue
|
354 |
+
|
355 |
+
topics, _ = topic_model.fit_transform(texts, embeddings)
|
356 |
+
topic_counts = Counter(topics)
|
357 |
+
|
358 |
+
top_topics = format_topics(topic_model, topic_counts.most_common(top_n))
|
359 |
+
top_emotions = format_emotions(Counter(all_emotions).most_common(top_n))
|
360 |
+
|
361 |
+
summaries.append({
|
362 |
+
'country': country,
|
363 |
+
'total_poems': len(texts),
|
364 |
+
'top_topics': top_topics,
|
365 |
+
'top_emotions': top_emotions
|
366 |
+
})
|
367 |
+
progress_bar.progress(1.0, text="Processing complete!")
|
368 |
+
|
369 |
return summaries, topic_model
|
370 |
|
371 |
+
|
372 |
try:
|
373 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
374 |
st.success("Models loaded successfully!")
|