Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,6 @@ import streamlit as st
|
|
2 |
import torch
|
3 |
from transformers import BertModel, BertTokenizer
|
4 |
|
5 |
-
# Load pre-trained BERT model and tokenizer (do this outside the main loop for efficiency)
|
6 |
# Load pre-trained BERT model and tokenizer
|
7 |
@st.cache_resource
|
8 |
def load_bert():
|
@@ -12,18 +11,17 @@ def load_bert():
|
|
12 |
|
13 |
tokenizer, model = load_bert()
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
return cos_sim.item() # Convert tensor to a float
|
27 |
|
28 |
# Streamlit interface
|
29 |
st.title("Word Similarity Checker")
|
|
|
2 |
import torch
|
3 |
from transformers import BertModel, BertTokenizer
|
4 |
|
|
|
5 |
# Load pre-trained BERT model and tokenizer
|
6 |
@st.cache_resource
|
7 |
def load_bert():
|
|
|
11 |
|
12 |
tokenizer, model = load_bert()
|
13 |
|
14 |
+
def calculate_similarity(word1, word2):
|
15 |
+
# Tokenize and get embeddings
|
16 |
+
input_ids1 = torch.tensor([tokenizer.encode(word1, add_special_tokens=True)])
|
17 |
+
input_ids2 = torch.tensor([tokenizer.encode(word2, add_special_tokens=True)])
|
18 |
+
|
19 |
+
with torch.no_grad():
|
20 |
+
embeddings1 = model(input_ids1)[0][0, 0, :]
|
21 |
+
embeddings2 = model(input_ids2)[0][0, 0, :]
|
22 |
+
|
23 |
+
cos_sim = torch.nn.functional.cosine_similarity(embeddings1, embeddings2, dim=0)
|
24 |
+
return cos_sim.item() # Convert tensor to a float
|
|
|
25 |
|
26 |
# Streamlit interface
|
27 |
st.title("Word Similarity Checker")
|