Sergidev commited on
Commit
5a0b505
·
verified ·
1 Parent(s): 35042da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -38
app.py CHANGED
@@ -1,51 +1,58 @@
1
  import gradio as gr
 
 
 
2
  import plotly.graph_objects as go
3
- import hashlib
4
 
5
- def simple_embedding(text, dim=3):
6
- """A simple hash-based embedding function for demonstration purposes."""
7
- hash_value = hashlib.md5(text.encode()).hexdigest()
8
- return [int(hash_value[i:i+2], 16) / 255.0 for i in range(0, dim*2, 2)]
9
 
10
- def compare_embeddings(*texts):
11
- embeddings = [simple_embedding(text) for text in texts if text.strip()] # Only process non-empty texts
12
-
13
- fig = go.Figure()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- colors = ['red', 'blue', 'green', 'purple', 'orange', 'cyan', 'magenta', 'yellow']
 
16
 
17
- for i, emb in enumerate(embeddings):
18
- color = colors[i % len(colors)]
19
- fig.add_trace(go.Scatter3d(
20
- x=[0, emb[0]], y=[0, emb[1]], z=[0, emb[2]],
21
- mode='lines+markers',
22
- name=f'Text {i+1}',
23
- line=dict(color=color),
24
- marker=dict(color=color)
25
- ))
26
 
27
  fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'))
28
 
29
  return fig
30
 
31
- with gr.Blocks() as iface:
32
- gr.Markdown("# 3D Embedding Comparison (Simplified)")
33
- gr.Markdown("Compare simplified embeddings of multiple strings visualized in 3D space.")
34
- gr.Markdown("Note: This is a demonstration using a basic hash-based embedding, not a real NLP model.")
35
-
36
- with gr.Row():
37
- num_inputs = gr.Slider(minimum=2, maximum=10, step=1, value=2, label="Number of texts to compare")
38
-
39
- with gr.Row() as text_container:
40
- text_inputs = [gr.Textbox(label=f"Text {i+1}") for i in range(2)]
41
-
42
- output = gr.Plot()
43
- submit_btn = gr.Button("Compare Embeddings")
44
-
45
- def update_text_inputs(num):
46
- return {text_container: gr.Row.update(children=[gr.Textbox(label=f"Text {i+1}") for i in range(num)])}
47
-
48
- num_inputs.change(fn=update_text_inputs, inputs=[num_inputs], outputs=[text_container])
49
- submit_btn.click(fn=compare_embeddings, inputs=text_container.children, outputs=output)
50
 
51
  iface.launch()
 
1
  import gradio as gr
2
+ import spaces
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModel
5
  import plotly.graph_objects as go
 
6
 
7
+ model_name = "mistralai/Mistral-7B-v0.1"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = None
 
10
 
11
+ # Set pad token to eos token if not defined
12
+ if tokenizer.pad_token is None:
13
+ tokenizer.pad_token = tokenizer.eos_token
14
+
15
+ @spaces.GPU
16
+ def get_embedding(text):
17
+ global model
18
+ if model is None:
19
+ model = AutoModel.from_pretrained(model_name).cuda()
20
+ model.resize_token_embeddings(len(tokenizer))
21
+
22
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to('cuda')
23
+ with torch.no_grad():
24
+ outputs = model(**inputs)
25
+ return outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
26
+
27
+ def reduce_to_3d(embedding):
28
+ return embedding[:3]
29
+
30
+ @spaces.GPU
31
+ def compare_embeddings(text1, text2):
32
+ emb1 = get_embedding(text1)
33
+ emb2 = get_embedding(text2)
34
 
35
+ emb1_3d = reduce_to_3d(emb1)
36
+ emb2_3d = reduce_to_3d(emb2)
37
 
38
+ fig = go.Figure(data=[
39
+ go.Scatter3d(x=[0, emb1_3d[0]], y=[0, emb1_3d[1]], z=[0, emb1_3d[2]], mode='lines+markers', name='Text 1'),
40
+ go.Scatter3d(x=[0, emb2_3d[0]], y=[0, emb2_3d[1]], z=[0, emb2_3d[2]], mode='lines+markers', name='Text 2')
41
+ ])
 
 
 
 
 
42
 
43
  fig.update_layout(scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'))
44
 
45
  return fig
46
 
47
+ iface = gr.Interface(
48
+ fn=compare_embeddings,
49
+ inputs=[
50
+ gr.Textbox(label="Text 1"),
51
+ gr.Textbox(label="Text 2")
52
+ ],
53
+ outputs=gr.Plot(),
54
+ title="3D Embedding Comparison",
55
+ description="Compare the embeddings of two strings visualized in 3D space using Mistral 7B."
56
+ )
 
 
 
 
 
 
 
 
 
57
 
58
  iface.launch()