Spaces:

kambris
/

SoLProject

Runtime error

App Files Files Community

kambris commited on Dec 6, 2024

Commit

6e846e7

verified ·

1 Parent(s): c0f831c

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -52

app.py CHANGED Viewed

@@ -181,60 +181,57 @@ def clean_arabic_text(text):
     return ' '.join(cleaned_words)
 def classify_emotion(text, classifier):
-    """Classify emotion for complete text with proper token handling."""
-    try:
-        words = text.split()
-        chunks = []
-        current_chunk = []
-        current_length = 0
-        for word in words:
-            word_tokens = len(classifier.tokenizer.encode(word))
-            if current_length + word_tokens > 512:
-                if current_chunk:
-                    chunks.append(' '.join(current_chunk))
-                current_chunk = [word]
-                current_length = word_tokens
-            else:
-                current_chunk.append(word)
-                current_length += word_tokens
-        if current_chunk:
-            chunks.append(' '.join(current_chunk))
-        if not chunks:
-            chunks = [text]
-        all_scores = []
-        for chunk in chunks:
-            try:
-                # Direct classification without additional tokenization
-                result = classifier(chunk)
-                scores = result[0]
-                all_scores.append(scores)
-            except Exception as chunk_error:
-                st.warning(f"Skipping chunk due to error: {str(chunk_error)}")
-                continue
-        if all_scores:
-            label_scores = {}
-            count = len(all_scores)
-            for scores in all_scores:
-                label = scores['label']
-                if label not in label_scores:
-                    label_scores[label] = 0
-                label_scores[label] += scores['score']
-            avg_scores = {label: score/count for label, score in label_scores.items()}
-            final_emotion = max(avg_scores.items(), key=lambda x: x[1])[0]
-            return final_emotion
         return "LABEL_2"
-    except Exception as e:
-        st.warning(f"Error in emotion classification: {str(e)}")
         return "LABEL_2"
 def get_embedding_for_text(text, tokenizer, model):
     """Get embedding for complete text."""

     return ' '.join(cleaned_words)
 def classify_emotion(text, classifier):
+    """Classify emotion for complete text with precise token handling."""
+    # Ensure text is properly formatted
+    if not text or not isinstance(text, str):
         return "LABEL_2"
+    # Split into manageable chunks
+    words = text.split()
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    # Create proper-sized chunks
+    for word in words:
+        word_tokens = len(classifier.tokenizer.encode(word))
+        if current_length + word_tokens > 512:
+            if current_chunk:
+                chunks.append(' '.join(current_chunk))
+            current_chunk = [word]
+            current_length = word_tokens
+        else:
+            current_chunk.append(word)
+            current_length += word_tokens
+    if current_chunk:
+        chunks.append(' '.join(current_chunk))
+    if not chunks:
         return "LABEL_2"
+    # Process chunks with proper output handling
+    all_scores = []
+    for chunk in chunks:
+        # Direct classification with proper output structure
+        result = classifier(chunk, return_all_scores=True)[0]
+        all_scores.append(result)
+    # Calculate final emotion
+    label_scores = {}
+    count = len(all_scores)
+    for scores in all_scores:
+        for score_dict in scores:
+            label = score_dict['label']
+            if label not in label_scores:
+                label_scores[label] = 0
+            label_scores[label] += score_dict['score']
+    avg_scores = {label: score/count for label, score in label_scores.items()}
+    final_emotion = max(avg_scores.items(), key=lambda x: x[1])[0]
+    return final_emotion
 def get_embedding_for_text(text, tokenizer, model):
     """Get embedding for complete text."""