blazingbunny commited on
Commit
f3d628e
·
verified ·
1 Parent(s): f9a6969

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import BertModel, BertTokenizer
4
+
5
+ # Load pre-trained BERT model and tokenizer (do this outside the main loop for efficiency)
6
+ @st.cache_resource # Cache the model for faster subsequent runs
7
+ def load_bert():
8
+ tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
9
+ model = BertModel.from_pretrained('bert-base-uncased')
10
+ return tokenizer, model
11
+
12
+ tokenizer, model = load_bert()
13
+
14
+ def calculate_similarity(word1, word2):
15
+ # Tokenize and get embeddings
16
+ input_ids1 = torch.tensor([tokenizer.encode(word1, add_special_tokens=True)])
17
+ input_ids2 = torch.tensor([tokenizer.encode(word2, add_special_tokens=True)])
18
+
19
+ with torch.no_grad():
20
+ embeddings1 = model(input_ids1)[0][0, 0, :]
21
+ embeddings2 = model(input_ids2)[0][0, 0, :]
22
+
23
+ cos_sim = torch.nn.functional.cosine_similarity(embeddings1, embeddings2, dim=0)
24
+ return cos_sim.item() # Convert tensor to a float
25
+
26
+ # Streamlit interface
27
+ st.title("Word Similarity Checker")
28
+
29
+ word1 = st.text_input("Enter the first word:")
30
+ word2 = st.text_input("Enter the second word:")
31
+
32
+ if st.button("Check Similarity"):
33
+ if word1 and word2:
34
+ similarity = calculate_similarity(word1, word2)
35
+ st.write(f"Similarity between '{word1}' and '{word2}': {similarity:.4f}")
36
+ else:
37
+ st.warning("Please enter both words.")