Spaces:

brisklyapp
/

strings-similarity

Runtime error

emiliosheinz commited on Feb 25, 2023

Commit

66f1e76

1 Parent(s): 4c9ba47

use fine tuned model

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-# load the pre-trained model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
-model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased")
 # set the app title
 st.title("Sentence Similarity Checker")
@@ -14,15 +12,12 @@ sentence2 = st.text_input("Enter the second sentence:")
 # check if both sentences are not empty
 if sentence1 and sentence2:
-    # tokenize the sentences and get the output logits for the sentence pair classification task
-    inputs = tokenizer(sentence1, sentence2, padding=True, truncation=True, max_length=250, return_tensors="pt")
-    outputs = model(**inputs).logits
-    # calculate the softmax probabilities for the two classes (similar or dissimilar)
-    probs = outputs.softmax(dim=1)
-    # the probability of the sentences being similar is the second element of the output array
-    similarity_score = probs[0][1].item()
     # display the similarity score to the user
     st.write("Similarity score:", similarity_score)

 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+model = AutoModelForSequenceClassification.from_pretrained("sentence-transformers/all-distilroberta-v1")
 # set the app title
 st.title("Sentence Similarity Checker")
 # check if both sentences are not empty
 if sentence1 and sentence2:
+    # encode the sentences into embeddings
+    embeddings1 = model.encode(sentence1, convert_to_tensor=True)
+    embeddings2 = model.encode(sentence2, convert_to_tensor=True)
+    # calculate the cosine similarity between the embeddings
+    similarity_score = float(embeddings1 @ embeddings2.T)
     # display the similarity score to the user
     st.write("Similarity score:", similarity_score)