Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -181,60 +181,57 @@ def clean_arabic_text(text):
|
|
181 |
return ' '.join(cleaned_words)
|
182 |
|
183 |
def classify_emotion(text, classifier):
|
184 |
-
"""Classify emotion for complete text with
|
185 |
-
|
186 |
-
|
187 |
-
chunks = []
|
188 |
-
current_chunk = []
|
189 |
-
current_length = 0
|
190 |
-
|
191 |
-
for word in words:
|
192 |
-
word_tokens = len(classifier.tokenizer.encode(word))
|
193 |
-
if current_length + word_tokens > 512:
|
194 |
-
if current_chunk:
|
195 |
-
chunks.append(' '.join(current_chunk))
|
196 |
-
current_chunk = [word]
|
197 |
-
current_length = word_tokens
|
198 |
-
else:
|
199 |
-
current_chunk.append(word)
|
200 |
-
current_length += word_tokens
|
201 |
-
|
202 |
-
if current_chunk:
|
203 |
-
chunks.append(' '.join(current_chunk))
|
204 |
-
|
205 |
-
if not chunks:
|
206 |
-
chunks = [text]
|
207 |
-
|
208 |
-
all_scores = []
|
209 |
-
for chunk in chunks:
|
210 |
-
try:
|
211 |
-
# Direct classification without additional tokenization
|
212 |
-
result = classifier(chunk)
|
213 |
-
scores = result[0]
|
214 |
-
all_scores.append(scores)
|
215 |
-
except Exception as chunk_error:
|
216 |
-
st.warning(f"Skipping chunk due to error: {str(chunk_error)}")
|
217 |
-
continue
|
218 |
-
|
219 |
-
if all_scores:
|
220 |
-
label_scores = {}
|
221 |
-
count = len(all_scores)
|
222 |
-
|
223 |
-
for scores in all_scores:
|
224 |
-
label = scores['label']
|
225 |
-
if label not in label_scores:
|
226 |
-
label_scores[label] = 0
|
227 |
-
label_scores[label] += scores['score']
|
228 |
-
|
229 |
-
avg_scores = {label: score/count for label, score in label_scores.items()}
|
230 |
-
final_emotion = max(avg_scores.items(), key=lambda x: x[1])[0]
|
231 |
-
return final_emotion
|
232 |
-
|
233 |
return "LABEL_2"
|
234 |
-
|
235 |
-
|
236 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
return "LABEL_2"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
|
239 |
def get_embedding_for_text(text, tokenizer, model):
|
240 |
"""Get embedding for complete text."""
|
|
|
181 |
return ' '.join(cleaned_words)
|
182 |
|
183 |
def classify_emotion(text, classifier):
|
184 |
+
"""Classify emotion for complete text with precise token handling."""
|
185 |
+
# Ensure text is properly formatted
|
186 |
+
if not text or not isinstance(text, str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
return "LABEL_2"
|
188 |
+
|
189 |
+
# Split into manageable chunks
|
190 |
+
words = text.split()
|
191 |
+
chunks = []
|
192 |
+
current_chunk = []
|
193 |
+
current_length = 0
|
194 |
+
|
195 |
+
# Create proper-sized chunks
|
196 |
+
for word in words:
|
197 |
+
word_tokens = len(classifier.tokenizer.encode(word))
|
198 |
+
if current_length + word_tokens > 512:
|
199 |
+
if current_chunk:
|
200 |
+
chunks.append(' '.join(current_chunk))
|
201 |
+
current_chunk = [word]
|
202 |
+
current_length = word_tokens
|
203 |
+
else:
|
204 |
+
current_chunk.append(word)
|
205 |
+
current_length += word_tokens
|
206 |
+
|
207 |
+
if current_chunk:
|
208 |
+
chunks.append(' '.join(current_chunk))
|
209 |
+
|
210 |
+
if not chunks:
|
211 |
return "LABEL_2"
|
212 |
+
|
213 |
+
# Process chunks with proper output handling
|
214 |
+
all_scores = []
|
215 |
+
for chunk in chunks:
|
216 |
+
# Direct classification with proper output structure
|
217 |
+
result = classifier(chunk, return_all_scores=True)[0]
|
218 |
+
all_scores.append(result)
|
219 |
+
|
220 |
+
# Calculate final emotion
|
221 |
+
label_scores = {}
|
222 |
+
count = len(all_scores)
|
223 |
+
|
224 |
+
for scores in all_scores:
|
225 |
+
for score_dict in scores:
|
226 |
+
label = score_dict['label']
|
227 |
+
if label not in label_scores:
|
228 |
+
label_scores[label] = 0
|
229 |
+
label_scores[label] += score_dict['score']
|
230 |
+
|
231 |
+
avg_scores = {label: score/count for label, score in label_scores.items()}
|
232 |
+
final_emotion = max(avg_scores.items(), key=lambda x: x[1])[0]
|
233 |
+
|
234 |
+
return final_emotion
|
235 |
|
236 |
def get_embedding_for_text(text, tokenizer, model):
|
237 |
"""Get embedding for complete text."""
|