Spaces:

kambris
/

SoLProject

Runtime error

kambris commited on Dec 6, 2024

Commit

cb29d0f

verified ·

1 Parent(s): 950bcef

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -234,31 +234,27 @@ def classify_emotion(text, classifier):
     return final_emotion
 def get_embedding_for_text(text, tokenizer, model):
-    """Get embedding for complete text."""
-    chunks = split_text(text)
     chunk_embeddings = []
     for chunk in chunks:
-        try:
-            inputs = tokenizer(
-                chunk,
-                return_tensors="pt",
-                padding=True,
-                truncation=True,
-                max_length=512
-            )
-            inputs = {k: v.to(model.device) for k, v in inputs.items()}
-            with torch.no_grad():
-                # Get the correct output format
-                outputs = model(**inputs)[0]  # Access first element of tuple
-            embedding = outputs[:, 0, :].cpu().numpy()
-            chunk_embeddings.append(embedding[0])
-        except Exception as e:
-            st.warning(f"Error processing chunk: {str(e)}")
-            continue
     if chunk_embeddings:
         weights = np.array([len(chunk.split()) for chunk in chunks])
         weights = weights / weights.sum()

     return final_emotion
 def get_embedding_for_text(text, tokenizer, model):
+    """Get embedding for complete text while preserving all content."""
+    # Split into optimal chunks of 512 tokens
+    chunks = split_text(text, max_length=512)
     chunk_embeddings = []
     for chunk in chunks:
+        inputs = tokenizer(
+            chunk,
+            return_tensors="pt",
+            padding=True,
+            max_length=512
+        )
+        inputs = {k: v.to(model.device) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = model(**inputs)[0]
+        embedding = outputs[:, 0, :].cpu().numpy()
+        chunk_embeddings.append(embedding[0])
+    # Weight each chunk based on its content
     if chunk_embeddings:
         weights = np.array([len(chunk.split()) for chunk in chunks])
         weights = weights / weights.sum()