Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -253,7 +253,7 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
253 |
topic_model_params["nr_topics"] = "auto"
|
254 |
|
255 |
topic_model = BERTopic(
|
256 |
-
embedding_model=None, #
|
257 |
**topic_model_params
|
258 |
)
|
259 |
|
@@ -264,49 +264,37 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
264 |
)
|
265 |
topic_model.vectorizer_model = vectorizer
|
266 |
|
267 |
-
progress_placeholder = st.empty()
|
268 |
-
progress_bar = progress_placeholder.progress(0)
|
269 |
-
status_message = st.empty()
|
270 |
-
|
271 |
for country, group in df.groupby('country'):
|
272 |
-
|
273 |
-
|
274 |
|
275 |
-
status_message.text(f"Processing poems for {country}...")
|
276 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
277 |
all_emotions = []
|
278 |
-
embeddings_list = []
|
279 |
|
280 |
-
|
|
|
281 |
for i, text in enumerate(texts):
|
282 |
try:
|
283 |
-
embedding =
|
284 |
if embedding is not None and not np.isnan(embedding).any():
|
285 |
-
|
286 |
-
if len(embedding.shape) == 1:
|
287 |
-
embedding = embedding.reshape(1, -1)
|
288 |
-
embeddings_list.append(embedding)
|
289 |
|
290 |
-
|
291 |
-
|
292 |
-
progress_bar.progress(progress)
|
293 |
-
status_message.text(f"Generated embeddings for {i+1}/{total_texts} poems in {country}...")
|
294 |
-
|
295 |
except Exception as e:
|
296 |
st.warning(f"Error processing poem {i+1} in {country}: {str(e)}")
|
297 |
continue
|
298 |
|
|
|
|
|
|
|
299 |
# Process emotions
|
300 |
for i, text in enumerate(texts):
|
301 |
try:
|
302 |
-
emotion =
|
303 |
all_emotions.append(emotion)
|
304 |
-
|
305 |
-
|
306 |
-
progress = 0.4 + ((i + 1) / total_texts * 0.3)
|
307 |
-
progress_bar.progress(progress)
|
308 |
-
status_message.text(f"Classified emotions for {i+1}/{total_texts} poems in {country}...")
|
309 |
-
|
310 |
except Exception as e:
|
311 |
st.warning(f"Error classifying emotion for poem {i+1} in {country}: {str(e)}")
|
312 |
continue
|
@@ -316,37 +304,31 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
316 |
st.warning(f"Not enough documents for {country} to generate meaningful topics (minimum {min_topic_size} required)")
|
317 |
continue
|
318 |
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
except Exception as e:
|
340 |
st.warning(f"Could not generate topics for {country}: {str(e)}")
|
341 |
continue
|
342 |
-
|
343 |
-
progress_placeholder.empty()
|
344 |
-
status_message.empty()
|
345 |
-
progress_placeholder = st.empty()
|
346 |
-
progress_bar = progress_placeholder.progress(0)
|
347 |
-
status_message = st.empty()
|
348 |
-
|
349 |
return summaries, topic_model
|
|
|
350 |
try:
|
351 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
352 |
st.success("Models loaded successfully!")
|
|
|
253 |
topic_model_params["nr_topics"] = "auto"
|
254 |
|
255 |
topic_model = BERTopic(
|
256 |
+
embedding_model=None, # Changed from bert_model to None
|
257 |
**topic_model_params
|
258 |
)
|
259 |
|
|
|
264 |
)
|
265 |
topic_model.vectorizer_model = vectorizer
|
266 |
|
|
|
|
|
|
|
|
|
267 |
for country, group in df.groupby('country'):
|
268 |
+
progress_text = f"Processing poems for {country}..."
|
269 |
+
progress_bar = st.progress(0, text=progress_text)
|
270 |
|
|
|
271 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
272 |
all_emotions = []
|
|
|
273 |
|
274 |
+
# Generate embeddings
|
275 |
+
embeddings = []
|
276 |
for i, text in enumerate(texts):
|
277 |
try:
|
278 |
+
embedding = get_embedding_for_text(text, bert_tokenizer, bert_model)
|
279 |
if embedding is not None and not np.isnan(embedding).any():
|
280 |
+
embeddings.append(embedding)
|
|
|
|
|
|
|
281 |
|
282 |
+
progress = (i + 1) / len(texts) * 0.4
|
283 |
+
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
|
|
|
|
|
|
284 |
except Exception as e:
|
285 |
st.warning(f"Error processing poem {i+1} in {country}: {str(e)}")
|
286 |
continue
|
287 |
|
288 |
+
# Convert embeddings to numpy array
|
289 |
+
embeddings = np.array(embeddings)
|
290 |
+
|
291 |
# Process emotions
|
292 |
for i, text in enumerate(texts):
|
293 |
try:
|
294 |
+
emotion = classify_emotion(text, emotion_classifier)
|
295 |
all_emotions.append(emotion)
|
296 |
+
progress = 0.4 + ((i + 1) / len(texts) * 0.3)
|
297 |
+
progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
|
|
|
|
|
|
|
|
|
298 |
except Exception as e:
|
299 |
st.warning(f"Error classifying emotion for poem {i+1} in {country}: {str(e)}")
|
300 |
continue
|
|
|
304 |
st.warning(f"Not enough documents for {country} to generate meaningful topics (minimum {min_topic_size} required)")
|
305 |
continue
|
306 |
|
307 |
+
# Ensure texts and embeddings match
|
308 |
+
if len(embeddings) != len(texts):
|
309 |
+
texts = texts[:len(embeddings)]
|
310 |
+
|
311 |
+
# Fit and transform the topic model
|
312 |
+
topics, probs = topic_model.fit_transform(texts, embeddings)
|
313 |
+
topic_counts = Counter(topics)
|
314 |
+
|
315 |
+
top_topics = format_topics(topic_model, topic_counts.most_common(top_n))
|
316 |
+
top_emotions = format_emotions(Counter(all_emotions).most_common(top_n))
|
317 |
+
|
318 |
+
summaries.append({
|
319 |
+
'country': country,
|
320 |
+
'total_poems': len(texts),
|
321 |
+
'top_topics': top_topics,
|
322 |
+
'top_emotions': top_emotions
|
323 |
+
})
|
324 |
+
progress_bar.progress(1.0, text="Processing complete!")
|
325 |
+
|
|
|
326 |
except Exception as e:
|
327 |
st.warning(f"Could not generate topics for {country}: {str(e)}")
|
328 |
continue
|
329 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
return summaries, topic_model
|
331 |
+
|
332 |
try:
|
333 |
bert_tokenizer, bert_model, emotion_classifier = load_models()
|
334 |
st.success("Models loaded successfully!")
|