Spaces:

Sergidev
/

3dembed

Sleeping

Sergidev commited on Jul 18, 2024

Commit

2371338

verified ·

1 Parent(s): 4ede10d

Mistral v2

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,33 +3,21 @@ import spaces
 import torch
 from transformers import AutoTokenizer, AutoModel
 import plotly.graph_objects as go
-from huggingface_hub import HfApi
-from huggingface_hub import hf_hub_download
-import os
-import sys
-HF_TOKEN = os.getenv("HF_TOKEN")
-import gradio as gr
-import spaces
-import torch
-from transformers import AutoTokenizer, AutoModel
-import plotly.graph_objects as go
-from huggingface_hub import HfApi
-from huggingface_hub import hf_hub_download
-import os
-import sys
-# Update the model name to Mistral 7B
 model_name = "mistralai/Mistral-7B-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = None
 @spaces.GPU
 def get_embedding(text):
     global model
     if model is None:
         model = AutoModel.from_pretrained(model_name).cuda()
     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to('cuda')
     with torch.no_grad():
@@ -37,7 +25,6 @@ def get_embedding(text):
     return outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
 def reduce_to_3d(embedding):
-    # Instead of PCA, we'll just take the first 3 dimensions
     return embedding[:3]
 @spaces.GPU
@@ -65,7 +52,7 @@ iface = gr.Interface(
     ],
     outputs=gr.Plot(),
     title="3D Embedding Comparison",
-    description="Compare the embeddings of two strings visualized in 3D space."
 )
 iface.launch()

 import torch
 from transformers import AutoTokenizer, AutoModel
 import plotly.graph_objects as go
 model_name = "mistralai/Mistral-7B-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = None
+# Set pad token to eos token if not defined
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
 @spaces.GPU
 def get_embedding(text):
     global model
     if model is None:
         model = AutoModel.from_pretrained(model_name).cuda()
+        model.resize_token_embeddings(len(tokenizer))
     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to('cuda')
     with torch.no_grad():
     return outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
 def reduce_to_3d(embedding):
     return embedding[:3]
 @spaces.GPU
     ],
     outputs=gr.Plot(),
     title="3D Embedding Comparison",
+    description="Compare the embeddings of two strings visualized in 3D space using Mistral 7B."
 )
 iface.launch()