blazingbunny commited on
Commit
9d4445b
·
verified ·
1 Parent(s): cb42075

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -13
app.py CHANGED
@@ -2,7 +2,6 @@ import streamlit as st
2
  import torch
3
  from transformers import BertModel, BertTokenizer
4
 
5
- # Load pre-trained BERT model and tokenizer (do this outside the main loop for efficiency)
6
  # Load pre-trained BERT model and tokenizer
7
  @st.cache_resource
8
  def load_bert():
@@ -12,18 +11,17 @@ def load_bert():
12
 
13
  tokenizer, model = load_bert()
14
 
15
- def calculate_similarity(word1, word2):
16
- def calculate_similarity(word1, word2):
17
- # Tokenize and get embeddings
18
- input_ids1 = torch.tensor([tokenizer.encode(word1, add_special_tokens=True)])
19
- input_ids2 = torch.tensor([tokenizer.encode(word2, add_special_tokens=True)])
20
-
21
- with torch.no_grad():
22
- embeddings1 = model(input_ids1)[0][0, 0, :]
23
- embeddings2 = model(input_ids2)[0][0, 0, :]
24
-
25
- cos_sim = torch.nn.functional.cosine_similarity(embeddings1, embeddings2, dim=0)
26
- return cos_sim.item() # Convert tensor to a float
27
 
28
  # Streamlit interface
29
  st.title("Word Similarity Checker")
 
2
  import torch
3
  from transformers import BertModel, BertTokenizer
4
 
 
5
  # Load pre-trained BERT model and tokenizer
6
  @st.cache_resource
7
  def load_bert():
 
11
 
12
  tokenizer, model = load_bert()
13
 
14
+ def calculate_similarity(word1, word2):
15
+ # Tokenize and get embeddings
16
+ input_ids1 = torch.tensor([tokenizer.encode(word1, add_special_tokens=True)])
17
+ input_ids2 = torch.tensor([tokenizer.encode(word2, add_special_tokens=True)])
18
+
19
+ with torch.no_grad():
20
+ embeddings1 = model(input_ids1)[0][0, 0, :]
21
+ embeddings2 = model(input_ids2)[0][0, 0, :]
22
+
23
+ cos_sim = torch.nn.functional.cosine_similarity(embeddings1, embeddings2, dim=0)
24
+ return cos_sim.item() # Convert tensor to a float
 
25
 
26
  # Streamlit interface
27
  st.title("Word Similarity Checker")