Sergidev commited on
Commit
1ba32de
·
verified ·
1 Parent(s): b6391ee
Files changed (1) hide show
  1. app.py +28 -35
app.py CHANGED
@@ -1,65 +1,58 @@
1
  import gradio as gr
 
2
  import torch
3
- from transformers import LlamaTokenizer, AutoModel
4
  import plotly.graph_objects as go
5
 
6
  model_name = "mistralai/Mistral-7B-v0.1"
7
- tokenizer = LlamaTokenizer.from_pretrained(model_name)
8
  model = None
9
 
10
  # Set pad token to eos token if not defined
11
  if tokenizer.pad_token is None:
12
  tokenizer.pad_token = tokenizer.eos_token
13
 
 
14
  def get_embedding(text):
15
  global model
16
  if model is None:
17
- model = AutoModel.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
 
18
 
19
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
20
  with torch.no_grad():
21
  outputs = model(**inputs)
22
- return outputs.last_hidden_state.mean(dim=1).squeeze().cpu()
23
 
24
  def reduce_to_3d(embedding):
25
  return embedding[:3]
26
 
27
- def compare_embeddings(text_input):
28
- try:
29
- texts = [t.strip() for t in text_input.split('\n') if t.strip()]
30
- embeddings = [get_embedding(text) for text in texts]
31
- embeddings_3d = [reduce_to_3d(emb) for emb in embeddings]
32
-
33
- fig = go.Figure()
34
-
35
- # Add origin point (black)
36
- fig.add_trace(go.Scatter3d(x=[0], y=[0], z=[0], mode='markers', name='Origin',
37
- marker=dict(size=5, color='black')))
38
-
39
- # Add lines and points for each text embedding
40
- colors = ['red', 'blue', 'green', 'purple', 'orange', 'cyan', 'magenta', 'yellow']
41
- for i, emb in enumerate(embeddings_3d):
42
- color = colors[i % len(colors)]
43
- fig.add_trace(go.Scatter3d(x=[0, emb[0].item()], y=[0, emb[1].item()], z=[0, emb[2].item()],
44
- mode='lines+markers', name=f'Text {i+1}',
45
- line=dict(color=color), marker=dict(color=color)))
46
-
47
- fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'))
48
-
49
- return fig
50
- except Exception as e:
51
- return f"An error occurred: {str(e)}"
52
 
53
  iface = gr.Interface(
54
  fn=compare_embeddings,
55
  inputs=[
56
- gr.Textbox(label="Input Texts", lines=5, placeholder="Enter multiple texts, each on a new line")
 
57
  ],
58
  outputs=gr.Plot(),
59
  title="3D Embedding Comparison",
60
- description="Compare the embeddings of multiple strings visualized in 3D space using Mistral 7B.",
61
- allow_flagging="never"
62
  )
63
 
64
- if __name__ == "__main__":
65
- iface.launch()
 
1
  import gradio as gr
2
+ import spaces
3
  import torch
4
+ from transformers import AutoTokenizer, AutoModel
5
  import plotly.graph_objects as go
6
 
7
  model_name = "mistralai/Mistral-7B-v0.1"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  model = None
10
 
11
  # Set pad token to eos token if not defined
12
  if tokenizer.pad_token is None:
13
  tokenizer.pad_token = tokenizer.eos_token
14
 
15
+ @spaces.GPU
16
  def get_embedding(text):
17
  global model
18
  if model is None:
19
+ model = AutoModel.from_pretrained(model_name).cuda()
20
+ model.resize_token_embeddings(len(tokenizer))
21
 
22
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to('cuda')
23
  with torch.no_grad():
24
  outputs = model(**inputs)
25
+ return outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
26
 
27
  def reduce_to_3d(embedding):
28
  return embedding[:3]
29
 
30
+ @spaces.GPU
31
+ def compare_embeddings(text1, text2):
32
+ emb1 = get_embedding(text1)
33
+ emb2 = get_embedding(text2)
34
+
35
+ emb1_3d = reduce_to_3d(emb1)
36
+ emb2_3d = reduce_to_3d(emb2)
37
+
38
+ fig = go.Figure(data=[
39
+ go.Scatter3d(x=[0, emb1_3d[0]], y=[0, emb1_3d[1]], z=[0, emb1_3d[2]], mode='lines+markers', name='Text 1'),
40
+ go.Scatter3d(x=[0, emb2_3d[0]], y=[0, emb2_3d[1]], z=[0, emb2_3d[2]], mode='lines+markers', name='Text 2')
41
+ ])
42
+
43
+ fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'))
44
+
45
+ return fig
 
 
 
 
 
 
 
 
 
46
 
47
  iface = gr.Interface(
48
  fn=compare_embeddings,
49
  inputs=[
50
+ gr.Textbox(label="Text 1"),
51
+ gr.Textbox(label="Text 2")
52
  ],
53
  outputs=gr.Plot(),
54
  title="3D Embedding Comparison",
55
+ description="Compare the embeddings of two strings visualized in 3D space using Mistral 7B."
 
56
  )
57
 
58
+ iface.launch()