kambris commited on
Commit
e480aa0
·
verified ·
1 Parent(s): cb29d0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -17
app.py CHANGED
@@ -234,27 +234,30 @@ def classify_emotion(text, classifier):
234
  return final_emotion
235
 
236
  def get_embedding_for_text(text, tokenizer, model):
237
- """Get embedding for complete text while preserving all content."""
238
- # Split into optimal chunks of 512 tokens
239
- chunks = split_text(text, max_length=512)
240
  chunk_embeddings = []
241
 
242
  for chunk in chunks:
243
- inputs = tokenizer(
244
- chunk,
245
- return_tensors="pt",
246
- padding=True,
247
- max_length=512
248
- )
249
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
250
-
251
- with torch.no_grad():
252
- outputs = model(**inputs)[0]
253
-
254
- embedding = outputs[:, 0, :].cpu().numpy()
255
- chunk_embeddings.append(embedding[0])
 
 
 
 
 
256
 
257
- # Weight each chunk based on its content
258
  if chunk_embeddings:
259
  weights = np.array([len(chunk.split()) for chunk in chunks])
260
  weights = weights / weights.sum()
 
234
  return final_emotion
235
 
236
  def get_embedding_for_text(text, tokenizer, model):
237
+ """Get embedding for complete text."""
238
+ chunks = split_text(text)
 
239
  chunk_embeddings = []
240
 
241
  for chunk in chunks:
242
+ try:
243
+ inputs = tokenizer(
244
+ chunk,
245
+ return_tensors="pt",
246
+ padding=True,
247
+ truncation=True,
248
+ max_length=512
249
+ )
250
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
251
+
252
+ with torch.no_grad():
253
+ outputs = model(**inputs)
254
+
255
+ embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy()
256
+ chunk_embeddings.append(embedding[0])
257
+ except Exception as e:
258
+ st.warning(f"Error processing chunk: {str(e)}")
259
+ continue
260
 
 
261
  if chunk_embeddings:
262
  weights = np.array([len(chunk.split()) for chunk in chunks])
263
  weights = weights / weights.sum()