DexterSptizu commited on
Commit
1ffc0e8
Β·
verified Β·
1 Parent(s): 3fe05b6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ from sentence_transformers import SentenceTransformer, util
4
+
5
+ # Initialize sentence transformer model
6
+ @st.cache_resource
7
+ def load_model():
8
+ return SentenceTransformer('all-MiniLM-L6-v2')
9
+
10
+ model = load_model()
11
+
12
+ def get_embedding_and_similarity(text1, text2):
13
+ # Get embeddings
14
+ embedding1 = model.encode(text1, convert_to_tensor=True)
15
+ embedding2 = model.encode(text2, convert_to_tensor=True)
16
+
17
+ # Calculate cosine similarity
18
+ similarity = util.pytorch_cos_sim(embedding1, embedding2).item()
19
+ return similarity
20
+
21
+ st.title("πŸ€— Interactive Sentence Embeddings Explorer")
22
+ st.markdown("""
23
+ This demo helps you understand how sentence transformers work by comparing text similarities.
24
+ Try different sentences to see how the model captures semantic meaning!
25
+ """)
26
+
27
+ # Main comparison section
28
+ st.header("Compare Two Texts")
29
+
30
+ col1, col2 = st.columns(2)
31
+ with col1:
32
+ st.markdown("**First Text**")
33
+ text1 = st.text_area("Enter first text", height=100,
34
+ value="I love programming in Python")
35
+
36
+ with col2:
37
+ st.markdown("**Second Text**")
38
+ text2 = st.text_area("Enter second text", height=100,
39
+ value="Python is my favorite programming language")
40
+
41
+ if st.button("Calculate Similarity"):
42
+ similarity = get_embedding_and_similarity(text1, text2)
43
+
44
+ st.markdown("### Similarity Score")
45
+ st.progress(similarity)
46
+ st.write(f"Cosine Similarity: {similarity:.4f}")
47
+
48
+ if similarity > 0.8:
49
+ st.success("These texts are very similar!")
50
+ elif similarity > 0.5:
51
+ st.info("These texts are somewhat similar")
52
+ else:
53
+ st.warning("These texts are quite different")
54
+
55
+ # Interactive examples section
56
+ st.header("Try These Examples")
57
+ st.markdown("Click on any example to see how similar sentences are handled by the model")
58
+
59
+ examples = {
60
+ "Similar Meaning, Different Words": {
61
+ "text1": "The cat is sleeping on the couch",
62
+ "text2": "A feline is resting on the sofa"
63
+ },
64
+ "Similar Words, Different Meaning": {
65
+ "text1": "The bank is by the river",
66
+ "text2": "I need to go to the bank for money"
67
+ },
68
+ "Technical Similarity": {
69
+ "text1": "Python is a programming language",
70
+ "text2": "Java is used for coding software"
71
+ },
72
+ "Opposite Meanings": {
73
+ "text1": "The stock market is going up",
74
+ "text2": "The stock market is going down"
75
+ }
76
+ }
77
+
78
+ selected_example = st.selectbox("Choose an example", list(examples.keys()))
79
+
80
+ if st.button("Try this example"):
81
+ example = examples[selected_example]
82
+ similarity = get_embedding_and_similarity(example["text1"], example["text2"])
83
+
84
+ st.markdown("### Example Texts")
85
+ st.write("Text 1:", example["text1"])
86
+ st.write("Text 2:", example["text2"])
87
+
88
+ st.markdown("### Similarity Score")
89
+ st.progress(similarity)
90
+ st.write(f"Cosine Similarity: {similarity:.4f}")
91
+
92
+ # Educational section
93
+ st.header("πŸ“š How It Works")
94
+ st.markdown("""
95
+ 1. **Text to Embeddings**: The model converts each text into a high-dimensional vector (embedding)
96
+ 2. **Similarity Calculation**: Cosine similarity between vectors is calculated
97
+ 3. **Score Interpretation**:
98
+ - 1.0 = Identical meaning
99
+ - >0.8 = Very similar
100
+ - >0.5 = Somewhat similar
101
+ - <0.5 = Different meanings
102
+ """)
103
+
104
+ # Advanced settings
105
+ with st.expander("πŸ”§ Advanced Settings"):
106
+ st.markdown("""
107
+ **Current Model**: all-MiniLM-L6-v2
108
+ - Embedding Size: 384 dimensions
109
+ - Optimized for semantic similarity tasks
110
+ - Fast and efficient for real-time applications
111
+ """)