DexterSptizu commited on
Commit
1262b42
β€’
1 Parent(s): 2df7566

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -82
app.py CHANGED
@@ -1,8 +1,42 @@
1
  import streamlit as st
2
  import numpy as np
3
  from sentence_transformers import SentenceTransformer, util
 
4
 
5
- # Initialize sentence transformer model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  @st.cache_resource
7
  def load_model():
8
  return SentenceTransformer('all-MiniLM-L6-v2')
@@ -10,102 +44,185 @@ def load_model():
10
  model = load_model()
11
 
12
  def get_embedding_and_similarity(text1, text2):
13
- # Get embeddings
14
  embedding1 = model.encode(text1, convert_to_tensor=True)
15
  embedding2 = model.encode(text2, convert_to_tensor=True)
16
-
17
- # Calculate cosine similarity
18
  similarity = util.pytorch_cos_sim(embedding1, embedding2).item()
19
- return similarity
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  st.title("πŸ€— Interactive Sentence Embeddings Explorer")
22
  st.markdown("""
23
- This demo helps you understand how sentence transformers work by comparing text similarities.
24
- Try different sentences to see how the model captures semantic meaning!
25
- """)
26
-
27
- # Main comparison section
28
- st.header("Compare Two Texts")
29
 
30
- col1, col2 = st.columns(2)
31
- with col1:
32
- st.markdown("**First Text**")
33
- text1 = st.text_area("Enter first text", height=100,
34
- value="I love programming in Python")
35
 
36
- with col2:
37
- st.markdown("**Second Text**")
38
- text2 = st.text_area("Enter second text", height=100,
39
- value="Python is my favorite programming language")
40
-
41
- if st.button("Calculate Similarity"):
42
- similarity = get_embedding_and_similarity(text1, text2)
43
 
44
- st.markdown("### Similarity Score")
45
- st.progress(similarity)
46
- st.write(f"Cosine Similarity: {similarity:.4f}")
 
 
47
 
48
- if similarity > 0.8:
49
- st.success("These texts are very similar!")
50
- elif similarity > 0.5:
51
- st.info("These texts are somewhat similar")
52
- else:
53
- st.warning("These texts are quite different")
54
 
55
- # Interactive examples section
56
- st.header("Try These Examples")
57
- st.markdown("Click on any example to see how similar sentences are handled by the model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- examples = {
60
- "Similar Meaning, Different Words": {
61
- "text1": "The cat is sleeping on the couch",
62
- "text2": "A feline is resting on the sofa"
63
- },
64
- "Similar Words, Different Meaning": {
65
- "text1": "The bank is by the river",
66
- "text2": "I need to go to the bank for money"
67
- },
68
- "Technical Similarity": {
69
- "text1": "Python is a programming language",
70
- "text2": "Java is used for coding software"
71
- },
72
- "Opposite Meanings": {
73
- "text1": "The stock market is going up",
74
- "text2": "The stock market is going down"
 
 
 
 
 
 
 
 
75
  }
76
- }
77
 
78
- selected_example = st.selectbox("Choose an example", list(examples.keys()))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- if st.button("Try this example"):
81
- example = examples[selected_example]
82
- similarity = get_embedding_and_similarity(example["text1"], example["text2"])
83
 
84
- st.markdown("### Example Texts")
85
- st.write("Text 1:", example["text1"])
86
- st.write("Text 2:", example["text2"])
87
 
88
- st.markdown("### Similarity Score")
89
- st.progress(similarity)
90
- st.write(f"Cosine Similarity: {similarity:.4f}")
91
-
92
- # Educational section
93
- st.header("πŸ“š How It Works")
94
- st.markdown("""
95
- 1. **Text to Embeddings**: The model converts each text into a high-dimensional vector (embedding)
96
- 2. **Similarity Calculation**: Cosine similarity between vectors is calculated
97
- 3. **Score Interpretation**:
98
- - 1.0 = Identical meaning
99
- - >0.8 = Very similar
100
- - >0.5 = Somewhat similar
101
- - <0.5 = Different meanings
102
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- # Advanced settings
105
- with st.expander("πŸ”§ Advanced Settings"):
106
- st.markdown("""
107
- **Current Model**: all-MiniLM-L6-v2
108
- - Embedding Size: 384 dimensions
109
- - Optimized for semantic similarity tasks
110
- - Fast and efficient for real-time applications
111
- """)
 
1
  import streamlit as st
2
  import numpy as np
3
  from sentence_transformers import SentenceTransformer, util
4
+ import plotly.graph_objects as go
5
 
6
+ # Page configuration
7
+ st.set_page_config(
8
+ page_title="Sentence Embeddings Explorer",
9
+ page_icon="πŸ€—",
10
+ layout="wide"
11
+ )
12
+
13
+ # Custom CSS
14
+ st.markdown("""
15
+ <style>
16
+ .stTabs [data-baseweb="tab-list"] {
17
+ gap: 24px;
18
+ }
19
+ .stTabs [data-baseweb="tab"] {
20
+ height: 50px;
21
+ padding-left: 20px;
22
+ padding-right: 20px;
23
+ }
24
+ .big-font {
25
+ font-size:20px !important;
26
+ font-weight: bold;
27
+ }
28
+ .medium-font {
29
+ font-size:16px !important;
30
+ }
31
+ .highlight {
32
+ padding: 10px;
33
+ border-radius: 5px;
34
+ margin: 10px 0;
35
+ }
36
+ </style>
37
+ """, unsafe_allow_html=True)
38
+
39
+ # Initialize model
40
  @st.cache_resource
41
  def load_model():
42
  return SentenceTransformer('all-MiniLM-L6-v2')
 
44
  model = load_model()
45
 
46
  def get_embedding_and_similarity(text1, text2):
 
47
  embedding1 = model.encode(text1, convert_to_tensor=True)
48
  embedding2 = model.encode(text2, convert_to_tensor=True)
 
 
49
  similarity = util.pytorch_cos_sim(embedding1, embedding2).item()
50
+ return similarity, embedding1.cpu().numpy(), embedding2.cpu().numpy()
51
 
52
+ def create_radar_chart(embedding1, embedding2, num_dimensions=10):
53
+ # Select first few dimensions for visualization
54
+ categories = [f"Dim {i+1}" for i in range(num_dimensions)]
55
+
56
+ fig = go.Figure()
57
+ fig.add_trace(go.Scatterpolar(
58
+ r=embedding1[:num_dimensions],
59
+ theta=categories,
60
+ fill='toself',
61
+ name='Text 1'
62
+ ))
63
+ fig.add_trace(go.Scatterpolar(
64
+ r=embedding2[:num_dimensions],
65
+ theta=categories,
66
+ fill='toself',
67
+ name='Text 2'
68
+ ))
69
+
70
+ fig.update_layout(
71
+ polar=dict(radialaxis=dict(visible=True, range=[-1, 1])),
72
+ showlegend=True
73
+ )
74
+ return fig
75
+
76
+ # Title and Introduction
77
  st.title("πŸ€— Interactive Sentence Embeddings Explorer")
78
  st.markdown("""
79
+ <p class="medium-font">
80
+ Explore the fascinating world of sentence embeddings! This interactive tool helps you understand
81
+ how AI models capture the meaning of text and measure similarity between sentences.
82
+ </p>
83
+ """, unsafe_allow_html=True)
 
84
 
85
+ # Create tabs
86
+ tab1, tab2, tab3 = st.tabs(["πŸ” Compare Texts", "πŸ“š Learn by Examples", "ℹ️ How It Works"])
 
 
 
87
 
88
+ with tab1:
89
+ st.markdown("### Compare Any Two Texts")
90
+ col1, col2 = st.columns(2)
 
 
 
 
91
 
92
+ with col1:
93
+ text1 = st.text_area("First Text",
94
+ height=150,
95
+ value="I love programming in Python",
96
+ help="Enter your first text here")
97
 
98
+ with col2:
99
+ text2 = st.text_area("Second Text",
100
+ height=150,
101
+ value="Python is my favorite programming language",
102
+ help="Enter your second text here")
 
103
 
104
+ if st.button("Calculate Similarity", type="primary"):
105
+ similarity, emb1, emb2 = get_embedding_and_similarity(text1, text2)
106
+
107
+ # Create three columns for results
108
+ col1, col2, col3 = st.columns([2,1,2])
109
+
110
+ with col2:
111
+ st.markdown("### Similarity Score")
112
+ st.markdown(f"<h1 style='text-align: center;'>{similarity:.2f}</h1>",
113
+ unsafe_allow_html=True)
114
+
115
+ # Progress bar and interpretation
116
+ st.progress(similarity)
117
+
118
+ if similarity > 0.8:
119
+ st.success("🎯 These texts are very similar!")
120
+ elif similarity > 0.5:
121
+ st.info("πŸ€” These texts are somewhat similar")
122
+ else:
123
+ st.warning("πŸ“Š These texts are quite different")
124
+
125
+ # Visualization
126
+ st.markdown("### Embedding Visualization")
127
+ st.plotly_chart(create_radar_chart(emb1, emb2), use_container_width=True)
128
 
129
+ with tab2:
130
+ st.markdown("### Learn Through Examples")
131
+
132
+ examples = {
133
+ "Similar Meaning, Different Words": {
134
+ "text1": "The cat is sleeping on the couch",
135
+ "text2": "A feline is resting on the sofa",
136
+ "explanation": "These sentences use different words but convey the same meaning."
137
+ },
138
+ "Similar Words, Different Context": {
139
+ "text1": "The bank is by the river",
140
+ "text2": "I need to go to the bank for money",
141
+ "explanation": "These sentences use 'bank' in different contexts."
142
+ },
143
+ "Technical Similarity": {
144
+ "text1": "Python is a programming language",
145
+ "text2": "Java is used for coding software",
146
+ "explanation": "These sentences are related to programming but discuss different languages."
147
+ },
148
+ "Opposite Meanings": {
149
+ "text1": "The stock market is going up",
150
+ "text2": "The stock market is going down",
151
+ "explanation": "These sentences use similar words but have opposite meanings."
152
+ }
153
  }
 
154
 
155
+ selected_example = st.selectbox("Choose an example to explore",
156
+ list(examples.keys()))
157
+
158
+ if st.button("Analyze Example", type="primary"):
159
+ example = examples[selected_example]
160
+ similarity, emb1, emb2 = get_embedding_and_similarity(
161
+ example["text1"],
162
+ example["text2"]
163
+ )
164
+
165
+ col1, col2 = st.columns(2)
166
+ with col1:
167
+ st.markdown("**Text 1:**")
168
+ st.markdown(f"<div class='highlight' style='background-color: #f0f2f6'>{example['text1']}</div>",
169
+ unsafe_allow_html=True)
170
+ with col2:
171
+ st.markdown("**Text 2:**")
172
+ st.markdown(f"<div class='highlight' style='background-color: #f0f2f6'>{example['text2']}</div>",
173
+ unsafe_allow_html=True)
174
+
175
+ st.markdown("**Explanation:**")
176
+ st.info(example["explanation"])
177
+
178
+ st.markdown("**Similarity Score:**")
179
+ st.progress(similarity)
180
+ st.write(f"Cosine Similarity: {similarity:.4f}")
181
+
182
+ st.plotly_chart(create_radar_chart(emb1, emb2), use_container_width=True)
183
 
184
+ with tab3:
185
+ st.markdown("### Understanding Sentence Embeddings")
 
186
 
187
+ col1, col2 = st.columns(2)
 
 
188
 
189
+ with col1:
190
+ st.markdown("""
191
+ #### What are Sentence Embeddings?
192
+ Sentence embeddings are numerical representations of text that capture semantic meaning.
193
+ Each sentence is converted into a vector of numbers, where similar meanings result in
194
+ similar vectors.
195
+
196
+ #### How is Similarity Calculated?
197
+ The similarity between two sentences is measured using cosine similarity between their
198
+ embedding vectors. The score ranges from -1 to 1:
199
+ - 1.0 = Identical meaning
200
+ - >0.8 = Very similar
201
+ - >0.5 = Somewhat similar
202
+ - <0.5 = Different meanings
203
+ """)
204
+
205
+ with col2:
206
+ st.markdown("""
207
+ #### Current Model Details
208
+ This demo uses the `all-MiniLM-L6-v2` model:
209
+ - Embedding Size: 384 dimensions
210
+ - Optimized for semantic similarity
211
+ - Fast and efficient
212
+ - Good balance of performance and speed
213
+
214
+ #### Use Cases
215
+ - Semantic search
216
+ - Document similarity
217
+ - Text clustering
218
+ - Information retrieval
219
+ """)
220
 
221
+ with st.expander("πŸ”¬ Technical Details"):
222
+ st.markdown("""
223
+ The model processes text through these steps:
224
+ 1. Tokenization: Breaks text into tokens
225
+ 2. Encoding: Converts tokens to embeddings
226
+ 3. Pooling: Combines token embeddings into sentence embedding
227
+ 4. Similarity: Computes cosine similarity between embeddings
228
+ """)