DexterSptizu commited on
Commit
910c0be
Β·
verified Β·
1 Parent(s): 67dc5f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -97
app.py CHANGED
@@ -9,6 +9,14 @@ import pandas as pd
9
  # Page configuration
10
  st.set_page_config(layout="wide", page_title="🎯 Sentence Transformer Explorer")
11
 
 
 
 
 
 
 
 
 
12
  # Load model
13
  @st.cache_resource
14
  def load_model():
@@ -16,56 +24,31 @@ def load_model():
16
 
17
  model = load_model()
18
 
19
- def get_embedding_and_similarity(sentences: List[str]) -> Tuple[np.ndarray, np.ndarray]:
20
- embeddings = model.encode(sentences)
21
- similarity_matrix = util.cos_sim(embeddings, embeddings).numpy()
22
- return embeddings, similarity_matrix
23
-
24
- def create_word_importance_visualization(sentence: str, embedding: np.ndarray):
25
- # Calculate word-level contribution to the embedding
26
- words = sentence.split()
27
- word_embeddings = model.encode(words)
28
-
29
- # Calculate each word's average contribution
30
- word_importance = np.mean(np.abs(word_embeddings), axis=1)
31
-
32
- # Create word importance visualization
33
- fig = go.Figure()
34
-
35
- # Add word bars
36
- fig.add_trace(go.Bar(
37
- x=words,
38
- y=word_importance,
39
- marker_color='rgb(158,202,225)',
40
- text=np.round(word_importance, 3),
41
- textposition='auto',
42
- ))
43
-
44
- fig.update_layout(
45
- title="Word Importance in Embedding",
46
- xaxis_title="Words",
47
- yaxis_title="Average Contribution",
48
- height=400
49
- )
50
- return fig
51
 
52
- def create_similarity_heatmap(sentences: List[str], similarity_matrix: np.ndarray):
53
- fig = go.Figure(data=go.Heatmap(
54
- z=similarity_matrix,
55
- x=sentences,
56
- y=sentences,
57
- colorscale='RdBu',
58
- text=np.round(similarity_matrix, 3),
59
- texttemplate='%{text}',
60
- textfont={"size": 10},
61
- hoverongaps=False
62
- ))
63
-
64
- fig.update_layout(
65
- title="Sentence Similarity Matrix",
66
- height=400
67
- )
68
- return fig
69
 
70
  def main():
71
  st.title("🎯 Interactive Sentence Transformer Explorer")
@@ -80,70 +63,64 @@ def main():
80
  4. **Interactive Examples**: Try different sentences and see the results
81
  """)
82
 
83
- # Interactive sentence input
84
- st.subheader("πŸ”€ Enter Your Sentences")
85
-
86
  col1, col2 = st.columns(2)
87
 
88
  with col1:
89
- # Example templates
90
- example_templates = {
91
- "Similar Meanings": [
92
- "I love programming in Python",
93
- "Coding with Python is my favorite",
94
- "I enjoy developing software using Python"
95
- ],
96
- "Different Topics": [
97
- "The cat sleeps on the mat",
98
- "Python is a programming language",
99
- "The weather is beautiful today"
100
- ],
101
- "Semantic Relations": [
102
- "Paris is the capital of France",
103
- "Berlin is the capital of Germany",
104
- "London is the capital of England"
105
- ]
106
- }
107
-
108
- selected_template = st.selectbox("Choose an example template:",
109
- list(example_templates.keys()))
110
 
111
  with col2:
112
- if st.button("Load Example"):
113
- sentences = example_templates[selected_template]
114
- else:
115
- sentences = ["I love programming in Python",
116
- "Coding with Python is my favorite",
117
- "The weather is beautiful today"]
118
 
119
  # Dynamic sentence input
120
  num_sentences = st.slider("Number of sentences:", 2, 5, 3)
121
  sentences = []
122
 
 
123
  for i in range(num_sentences):
124
- sentence = st.text_input(f"Sentence {i+1}",
125
- value=sentences[i] if i < len(sentences) else "")
 
 
 
 
126
  sentences.append(sentence)
127
 
128
  if st.button("Analyze Sentences", type="primary"):
129
  if all(sentences):
130
- embeddings, similarity_matrix = get_embedding_and_similarity(sentences)
 
 
131
 
132
  st.subheader("πŸ“Š Analysis Results")
133
 
134
- # Create tabs for different visualizations
135
- tab1, tab2, tab3 = st.tabs(["Word Importance", "Sentence Similarity", "Embedding Space"])
136
 
137
  with tab1:
138
- st.markdown("### πŸ” Word-Level Analysis")
139
- for i, sentence in enumerate(sentences):
140
- st.markdown(f"**Sentence {i+1}:** {sentence}")
141
- fig = create_word_importance_visualization(sentence, embeddings[i])
142
- st.plotly_chart(fig, use_container_width=True)
143
-
144
- with tab2:
145
- st.markdown("### 🀝 Sentence Similarity Analysis")
146
- fig = create_similarity_heatmap(sentences, similarity_matrix)
 
 
 
 
 
 
 
147
  st.plotly_chart(fig, use_container_width=True)
148
 
149
  # Add similarity interpretation
@@ -156,11 +133,9 @@ def main():
156
  else "Moderately similar" if similarity > 0.5
157
  else "Different"
158
  )
159
- st.write(f"Sentences {i+1} & {i+2}: {interpretation} ({similarity:.3f})")
160
 
161
- with tab3:
162
- st.markdown("### 🎯 Interactive Embedding Analysis")
163
-
164
  # Create embedding statistics
165
  embedding_stats = pd.DataFrame({
166
  'Sentence': sentences,
 
9
  # Page configuration
10
  st.set_page_config(layout="wide", page_title="🎯 Sentence Transformer Explorer")
11
 
12
+ # Initialize session state for sentences if not exists
13
+ if 'sentences' not in st.session_state:
14
+ st.session_state.sentences = [
15
+ "I love programming in Python",
16
+ "Coding with Python is my favorite",
17
+ "The weather is beautiful today"
18
+ ]
19
+
20
  # Load model
21
  @st.cache_resource
22
  def load_model():
 
24
 
25
  model = load_model()
26
 
27
+ # Example templates
28
+ EXAMPLE_TEMPLATES = {
29
+ "Similar Meanings": [
30
+ "I love programming in Python",
31
+ "Coding with Python is my favorite",
32
+ "I enjoy developing software using Python"
33
+ ],
34
+ "Different Topics": [
35
+ "The cat sleeps on the mat",
36
+ "Python is a programming language",
37
+ "The weather is beautiful today"
38
+ ],
39
+ "Semantic Relations": [
40
+ "Paris is the capital of France",
41
+ "Berlin is the capital of Germany",
42
+ "London is the capital of England"
43
+ ]
44
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ def load_example_sentences():
47
+ selected_template = st.session_state.get('template_selection')
48
+ st.session_state.sentences = EXAMPLE_TEMPLATES[selected_template]
49
+ # Force update of text inputs
50
+ for i, sentence in enumerate(st.session_state.sentences):
51
+ st.session_state[f'sentence_{i}'] = sentence
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  def main():
54
  st.title("🎯 Interactive Sentence Transformer Explorer")
 
63
  4. **Interactive Examples**: Try different sentences and see the results
64
  """)
65
 
66
+ # Example selection
 
 
67
  col1, col2 = st.columns(2)
68
 
69
  with col1:
70
+ st.selectbox(
71
+ "Choose an example template:",
72
+ options=list(EXAMPLE_TEMPLATES.keys()),
73
+ key='template_selection'
74
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  with col2:
77
+ st.button(
78
+ "Load Example",
79
+ on_click=load_example_sentences,
80
+ type="primary"
81
+ )
 
82
 
83
  # Dynamic sentence input
84
  num_sentences = st.slider("Number of sentences:", 2, 5, 3)
85
  sentences = []
86
 
87
+ # Create text inputs with keys
88
  for i in range(num_sentences):
89
+ default_value = st.session_state.sentences[i] if i < len(st.session_state.sentences) else ""
90
+ sentence = st.text_input(
91
+ f"Sentence {i+1}",
92
+ value=default_value,
93
+ key=f'sentence_{i}'
94
+ )
95
  sentences.append(sentence)
96
 
97
  if st.button("Analyze Sentences", type="primary"):
98
  if all(sentences):
99
+ # Your existing analysis code here...
100
+ embeddings = model.encode(sentences)
101
+ similarity_matrix = util.cos_sim(embeddings, embeddings).numpy()
102
 
103
  st.subheader("πŸ“Š Analysis Results")
104
 
105
+ tab1, tab2 = st.tabs(["Sentence Similarity", "Embedding Analysis"])
 
106
 
107
  with tab1:
108
+ # Create similarity heatmap
109
+ fig = go.Figure(data=go.Heatmap(
110
+ z=similarity_matrix,
111
+ x=sentences,
112
+ y=sentences,
113
+ colorscale='RdBu',
114
+ text=np.round(similarity_matrix, 3),
115
+ texttemplate='%{text}',
116
+ textfont={"size": 10},
117
+ hoverongaps=False
118
+ ))
119
+
120
+ fig.update_layout(
121
+ title="Sentence Similarity Matrix",
122
+ height=400
123
+ )
124
  st.plotly_chart(fig, use_container_width=True)
125
 
126
  # Add similarity interpretation
 
133
  else "Moderately similar" if similarity > 0.5
134
  else "Different"
135
  )
136
+ st.write(f"Sentences {i+1} & {j+1}: {interpretation} ({similarity:.3f})")
137
 
138
+ with tab2:
 
 
139
  # Create embedding statistics
140
  embedding_stats = pd.DataFrame({
141
  'Sentence': sentences,