Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -340,20 +340,37 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
|
|
340 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
341 |
all_emotions = []
|
342 |
|
343 |
-
#
|
344 |
embeddings = []
|
345 |
for i, text in enumerate(texts):
|
346 |
-
|
347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
progress = (i + 1) / len(texts) * 0.4
|
349 |
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
350 |
|
351 |
embeddings = np.array(embeddings)
|
352 |
|
353 |
-
# Process emotions with
|
354 |
for i, text in enumerate(texts):
|
355 |
-
|
356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
progress = 0.4 + ((i + 1) / len(texts) * 0.3)
|
358 |
progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
|
359 |
|
|
|
340 |
texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
|
341 |
all_emotions = []
|
342 |
|
343 |
+
# Enhanced embedding generation
|
344 |
embeddings = []
|
345 |
for i, text in enumerate(texts):
|
346 |
+
text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
|
347 |
+
chunk_embeddings = []
|
348 |
+
|
349 |
+
for chunk in text_chunks:
|
350 |
+
chunk_embedding = get_embedding_for_text(chunk, bert_tokenizer, bert_model)
|
351 |
+
chunk_embeddings.append(chunk_embedding)
|
352 |
+
|
353 |
+
full_embedding = np.mean(chunk_embeddings, axis=0)
|
354 |
+
embeddings.append(full_embedding)
|
355 |
+
|
356 |
progress = (i + 1) / len(texts) * 0.4
|
357 |
progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
|
358 |
|
359 |
embeddings = np.array(embeddings)
|
360 |
|
361 |
+
# Process emotions with proper chunking
|
362 |
for i, text in enumerate(texts):
|
363 |
+
text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
|
364 |
+
chunk_emotions = []
|
365 |
+
|
366 |
+
for chunk in text_chunks:
|
367 |
+
emotion = emotion_classifier(chunk)[0]['label']
|
368 |
+
chunk_emotions.append(emotion)
|
369 |
+
|
370 |
+
# Use most common emotion for the full text
|
371 |
+
final_emotion = max(set(chunk_emotions), key=chunk_emotions.count)
|
372 |
+
all_emotions.append(final_emotion)
|
373 |
+
|
374 |
progress = 0.4 + ((i + 1) / len(texts) * 0.3)
|
375 |
progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
|
376 |
|