kambris commited on
Commit
ecc1b19
·
verified ·
1 Parent(s): 2198b18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -6
app.py CHANGED
@@ -340,20 +340,37 @@ def process_and_summarize(df, bert_tokenizer, bert_model, emotion_classifier, to
340
  texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
341
  all_emotions = []
342
 
343
- # Get embeddings with proper output handling
344
  embeddings = []
345
  for i, text in enumerate(texts):
346
- embedding = get_embedding_for_text(text, bert_tokenizer, bert_model)
347
- embeddings.append(embedding)
 
 
 
 
 
 
 
 
348
  progress = (i + 1) / len(texts) * 0.4
349
  progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
350
 
351
  embeddings = np.array(embeddings)
352
 
353
- # Process emotions with correct output structure
354
  for i, text in enumerate(texts):
355
- emotion = emotion_classifier(text)[0]['label'] # Access the label directly
356
- all_emotions.append(emotion)
 
 
 
 
 
 
 
 
 
357
  progress = 0.4 + ((i + 1) / len(texts) * 0.3)
358
  progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
359
 
 
340
  texts = [clean_arabic_text(poem) for poem in group['poem'].dropna()]
341
  all_emotions = []
342
 
343
+ # Enhanced embedding generation
344
  embeddings = []
345
  for i, text in enumerate(texts):
346
+ text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
347
+ chunk_embeddings = []
348
+
349
+ for chunk in text_chunks:
350
+ chunk_embedding = get_embedding_for_text(chunk, bert_tokenizer, bert_model)
351
+ chunk_embeddings.append(chunk_embedding)
352
+
353
+ full_embedding = np.mean(chunk_embeddings, axis=0)
354
+ embeddings.append(full_embedding)
355
+
356
  progress = (i + 1) / len(texts) * 0.4
357
  progress_bar.progress(progress, text=f"Generated embeddings for {i+1}/{len(texts)} poems...")
358
 
359
  embeddings = np.array(embeddings)
360
 
361
+ # Process emotions with proper chunking
362
  for i, text in enumerate(texts):
363
+ text_chunks = [text[i:i+512] for i in range(0, len(text), 512)]
364
+ chunk_emotions = []
365
+
366
+ for chunk in text_chunks:
367
+ emotion = emotion_classifier(chunk)[0]['label']
368
+ chunk_emotions.append(emotion)
369
+
370
+ # Use most common emotion for the full text
371
+ final_emotion = max(set(chunk_emotions), key=chunk_emotions.count)
372
+ all_emotions.append(final_emotion)
373
+
374
  progress = 0.4 + ((i + 1) / len(texts) * 0.3)
375
  progress_bar.progress(progress, text=f"Classified emotions for {i+1}/{len(texts)} poems...")
376