Spaces:

DexterSptizu
/

sentence-transformer-visualization

Sleeping

App Files Files Community

DexterSptizu commited on Nov 4, 2024

Commit

1262b42

verified ·

1 Parent(s): 2df7566

Update app.py

Browse files

Files changed (1) hide show

app.py +199 -82

app.py CHANGED Viewed

@@ -1,8 +1,42 @@
 import streamlit as st
 import numpy as np
 from sentence_transformers import SentenceTransformer, util
-# Initialize sentence transformer model
 @st.cache_resource
 def load_model():
     return SentenceTransformer('all-MiniLM-L6-v2')
@@ -10,102 +44,185 @@ def load_model():
 model = load_model()
 def get_embedding_and_similarity(text1, text2):
-    # Get embeddings
     embedding1 = model.encode(text1, convert_to_tensor=True)
     embedding2 = model.encode(text2, convert_to_tensor=True)
-    # Calculate cosine similarity
     similarity = util.pytorch_cos_sim(embedding1, embedding2).item()
-    return similarity
 st.title("🤗 Interactive Sentence Embeddings Explorer")
 st.markdown("""
-This demo helps you understand how sentence transformers work by comparing text similarities.
-Try different sentences to see how the model captures semantic meaning!
-""")
-# Main comparison section
-st.header("Compare Two Texts")
-col1, col2 = st.columns(2)
-with col1:
-    st.markdown("**First Text**")
-    text1 = st.text_area("Enter first text", height=100,
-                         value="I love programming in Python")
-with col2:
-    st.markdown("**Second Text**")
-    text2 = st.text_area("Enter second text", height=100,
-                         value="Python is my favorite programming language")
-if st.button("Calculate Similarity"):
-    similarity = get_embedding_and_similarity(text1, text2)
-    st.markdown("### Similarity Score")
-    st.progress(similarity)
-    st.write(f"Cosine Similarity: {similarity:.4f}")
-    if similarity > 0.8:
-        st.success("These texts are very similar!")
-    elif similarity > 0.5:
-        st.info("These texts are somewhat similar")
-    else:
-        st.warning("These texts are quite different")
-# Interactive examples section
-st.header("Try These Examples")
-st.markdown("Click on any example to see how similar sentences are handled by the model")
-examples = {
-    "Similar Meaning, Different Words": {
-        "text1": "The cat is sleeping on the couch",
-        "text2": "A feline is resting on the sofa"
-    },
-    "Similar Words, Different Meaning": {
-        "text1": "The bank is by the river",
-        "text2": "I need to go to the bank for money"
-    },
-    "Technical Similarity": {
-        "text1": "Python is a programming language",
-        "text2": "Java is used for coding software"
-    },
-    "Opposite Meanings": {
-        "text1": "The stock market is going up",
-        "text2": "The stock market is going down"
     }
-}
-selected_example = st.selectbox("Choose an example", list(examples.keys()))
-if st.button("Try this example"):
-    example = examples[selected_example]
-    similarity = get_embedding_and_similarity(example["text1"], example["text2"])
-    st.markdown("### Example Texts")
-    st.write("Text 1:", example["text1"])
-    st.write("Text 2:", example["text2"])
-    st.markdown("### Similarity Score")
-    st.progress(similarity)
-    st.write(f"Cosine Similarity: {similarity:.4f}")
-# Educational section
-st.header("📚 How It Works")
-st.markdown("""
-1. **Text to Embeddings**: The model converts each text into a high-dimensional vector (embedding)
-2. **Similarity Calculation**: Cosine similarity between vectors is calculated
-3. **Score Interpretation**:
-   - 1.0 = Identical meaning
-   - >0.8 = Very similar
-   - >0.5 = Somewhat similar
-   - <0.5 = Different meanings
-""")
-# Advanced settings
-with st.expander("🔧 Advanced Settings"):
-    st.markdown("""
-    **Current Model**: all-MiniLM-L6-v2
-    - Embedding Size: 384 dimensions
-    - Optimized for semantic similarity tasks
-    - Fast and efficient for real-time applications
-    """)

 import streamlit as st
 import numpy as np
 from sentence_transformers import SentenceTransformer, util
+import plotly.graph_objects as go
+# Page configuration
+st.set_page_config(
+    page_title="Sentence Embeddings Explorer",
+    page_icon="🤗",
+    layout="wide"
+)
+# Custom CSS
+st.markdown("""
+    <style>
+    .stTabs [data-baseweb="tab-list"] {
+        gap: 24px;
+    }
+    .stTabs [data-baseweb="tab"] {
+        height: 50px;
+        padding-left: 20px;
+        padding-right: 20px;
+    }
+    .big-font {
+        font-size:20px !important;
+        font-weight: bold;
+    }
+    .medium-font {
+        font-size:16px !important;
+    }
+    .highlight {
+        padding: 10px;
+        border-radius: 5px;
+        margin: 10px 0;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+# Initialize model
 @st.cache_resource
 def load_model():
     return SentenceTransformer('all-MiniLM-L6-v2')
 model = load_model()
 def get_embedding_and_similarity(text1, text2):
     embedding1 = model.encode(text1, convert_to_tensor=True)
     embedding2 = model.encode(text2, convert_to_tensor=True)
     similarity = util.pytorch_cos_sim(embedding1, embedding2).item()
+    return similarity, embedding1.cpu().numpy(), embedding2.cpu().numpy()
+def create_radar_chart(embedding1, embedding2, num_dimensions=10):
+    # Select first few dimensions for visualization
+    categories = [f"Dim {i+1}" for i in range(num_dimensions)]
+    fig = go.Figure()
+    fig.add_trace(go.Scatterpolar(
+        r=embedding1[:num_dimensions],
+        theta=categories,
+        fill='toself',
+        name='Text 1'
+    ))
+    fig.add_trace(go.Scatterpolar(
+        r=embedding2[:num_dimensions],
+        theta=categories,
+        fill='toself',
+        name='Text 2'
+    ))
+    fig.update_layout(
+        polar=dict(radialaxis=dict(visible=True, range=[-1, 1])),
+        showlegend=True
+    )
+    return fig
+# Title and Introduction
 st.title("🤗 Interactive Sentence Embeddings Explorer")
 st.markdown("""
+<p class="medium-font">
+Explore the fascinating world of sentence embeddings! This interactive tool helps you understand
+how AI models capture the meaning of text and measure similarity between sentences.
+</p>
+""", unsafe_allow_html=True)
+# Create tabs
+tab1, tab2, tab3 = st.tabs(["🔍 Compare Texts", "📚 Learn by Examples", "ℹ️ How It Works"])
+with tab1:
+    st.markdown("### Compare Any Two Texts")
+    col1, col2 = st.columns(2)
+    with col1:
+        text1 = st.text_area("First Text",
+                            height=150,
+                            value="I love programming in Python",
+                            help="Enter your first text here")
+    with col2:
+        text2 = st.text_area("Second Text",
+                            height=150,
+                            value="Python is my favorite programming language",
+                            help="Enter your second text here")
+    if st.button("Calculate Similarity", type="primary"):
+        similarity, emb1, emb2 = get_embedding_and_similarity(text1, text2)
+        # Create three columns for results
+        col1, col2, col3 = st.columns([2,1,2])
+        with col2:
+            st.markdown("### Similarity Score")
+            st.markdown(f"<h1 style='text-align: center;'>{similarity:.2f}</h1>",
+                       unsafe_allow_html=True)
+        # Progress bar and interpretation
+        st.progress(similarity)
+        if similarity > 0.8:
+            st.success("🎯 These texts are very similar!")
+        elif similarity > 0.5:
+            st.info("🤔 These texts are somewhat similar")
+        else:
+            st.warning("📊 These texts are quite different")
+        # Visualization
+        st.markdown("### Embedding Visualization")
+        st.plotly_chart(create_radar_chart(emb1, emb2), use_container_width=True)
+with tab2:
+    st.markdown("### Learn Through Examples")
+    examples = {
+        "Similar Meaning, Different Words": {
+            "text1": "The cat is sleeping on the couch",
+            "text2": "A feline is resting on the sofa",
+            "explanation": "These sentences use different words but convey the same meaning."
+        },
+        "Similar Words, Different Context": {
+            "text1": "The bank is by the river",
+            "text2": "I need to go to the bank for money",
+            "explanation": "These sentences use 'bank' in different contexts."
+        },
+        "Technical Similarity": {
+            "text1": "Python is a programming language",
+            "text2": "Java is used for coding software",
+            "explanation": "These sentences are related to programming but discuss different languages."
+        },
+        "Opposite Meanings": {
+            "text1": "The stock market is going up",
+            "text2": "The stock market is going down",
+            "explanation": "These sentences use similar words but have opposite meanings."
+        }
     }
+    selected_example = st.selectbox("Choose an example to explore",
+                                  list(examples.keys()))
+    if st.button("Analyze Example", type="primary"):
+        example = examples[selected_example]
+        similarity, emb1, emb2 = get_embedding_and_similarity(
+            example["text1"],
+            example["text2"]
+        )
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("**Text 1:**")
+            st.markdown(f"<div class='highlight' style='background-color: #f0f2f6'>{example['text1']}</div>",
+                       unsafe_allow_html=True)
+        with col2:
+            st.markdown("**Text 2:**")
+            st.markdown(f"<div class='highlight' style='background-color: #f0f2f6'>{example['text2']}</div>",
+                       unsafe_allow_html=True)
+        st.markdown("**Explanation:**")
+        st.info(example["explanation"])
+        st.markdown("**Similarity Score:**")
+        st.progress(similarity)
+        st.write(f"Cosine Similarity: {similarity:.4f}")
+        st.plotly_chart(create_radar_chart(emb1, emb2), use_container_width=True)
+with tab3:
+    st.markdown("### Understanding Sentence Embeddings")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("""
+        #### What are Sentence Embeddings?
+        Sentence embeddings are numerical representations of text that capture semantic meaning.
+        Each sentence is converted into a vector of numbers, where similar meanings result in
+        similar vectors.
+        #### How is Similarity Calculated?
+        The similarity between two sentences is measured using cosine similarity between their
+        embedding vectors. The score ranges from -1 to 1:
+        - 1.0 = Identical meaning
+        - >0.8 = Very similar
+        - >0.5 = Somewhat similar
+        - <0.5 = Different meanings
+        """)
+    with col2:
+        st.markdown("""
+        #### Current Model Details
+        This demo uses the `all-MiniLM-L6-v2` model:
+        - Embedding Size: 384 dimensions
+        - Optimized for semantic similarity
+        - Fast and efficient
+        - Good balance of performance and speed
+        #### Use Cases
+        - Semantic search
+        - Document similarity
+        - Text clustering
+        - Information retrieval
+        """)
+    with st.expander("🔬 Technical Details"):
+        st.markdown("""
+        The model processes text through these steps:
+        1. Tokenization: Breaks text into tokens
+        2. Encoding: Converts tokens to embeddings
+        3. Pooling: Combines token embeddings into sentence embedding
+        4. Similarity: Computes cosine similarity between embeddings
+        """)