Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from scipy.spatial.distance import cosine
|
3 |
+
from transformers import AutoModel, AutoTokenizer
|
4 |
+
from thefuzz import fuzz
|
5 |
+
import gradio as gr
|
6 |
+
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained("princeton-nlp/sup-simcse-bert-base-uncased")
|
8 |
+
model = AutoModel.from_pretrained("princeton-nlp/sup-simcse-bert-base-uncased")
|
9 |
+
|
10 |
+
|
11 |
+
def thefuzz(text1, text2):
|
12 |
+
score = fuzz.token_sort_ratio(text1, text2)
|
13 |
+
return {'token sort ratio':score/100}
|
14 |
+
|
15 |
+
|
16 |
+
def simcse(text1, text2):
|
17 |
+
# Tokenize input texts
|
18 |
+
texts = [text1,text2]
|
19 |
+
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
|
20 |
+
# Get the embeddings
|
21 |
+
with torch.no_grad():
|
22 |
+
embeddings = model(**inputs, output_hidden_states=True, return_dict=True).pooler_output
|
23 |
+
cosine_sim_0_1 = 1 - cosine(embeddings[0], embeddings[1])
|
24 |
+
return {"cosine similarity of simcse embeddings":cosine_sim_0_1}
|
25 |
+
|
26 |
+
def get_scores(text1, text2):
|
27 |
+
fuzz_out = thefuzz(text1, text2)
|
28 |
+
simcse_out = simcse(text1, text2)
|
29 |
+
return simcse_out, fuzz_out
|
30 |
+
|
31 |
+
inputs = [
|
32 |
+
gr.inputs.Textbox(lines=5, label="Input Text One"),
|
33 |
+
gr.inputs.Textbox(lines=5, label="Input Text Two")
|
34 |
+
]
|
35 |
+
outputs = [
|
36 |
+
gr.outputs.Label(type="confidences",label="Cosine similarity based on SimCSE embeddings"),
|
37 |
+
gr.outputs.Label(type="confidences",label="Token sort ratio using Levenshtein distance"),
|
38 |
+
]
|
39 |
+
title = "SimCSE vs thefuzz"
|
40 |
+
description = "Simple app for comparing text similarity scores using Princeton-NLP SimCSE and thefuzz from SeatGeek. Interface by Troy Yang."
|
41 |
+
article = "<p style='text-align: center'><a href='https://github.com/princeton-nlp/SimCSE'>SimCSE: Simple Contrastive Learning of Sentence Embeddings</a> | <a href='https://github.com/seatgeek/thefuzz'>thefuzz: Fuzzy String Matching in Python</a></p>"
|
42 |
+
examples = [
|
43 |
+
["There's a kid on a skateboard.","A kid is skateboarding."],
|
44 |
+
['There is no boy standing in front of the blue building in the space reserved for handicapped people', 'A boy is standing in front of the blue building in the space reserved for handicapped people'],
|
45 |
+
['People wearing costumes are gathering in a forest and are looking in the same direction','Masked people are looking in the same direction in a forest'],
|
46 |
+
['Two large persons are sitting on a park bench and they have a bottle of soda between them','Two large persons are standing near a park bench and they have nothing between them'],
|
47 |
+
['A young man with brown hair and shades is sitting in front of some cans of soda',
|
48 |
+
'A young man with brown hair and sunglasses is sitting in front of some cans of soda']
|
49 |
+
,['A young lady with light brown hair is wearing a red necklace, a sweatshirt and earrings and is smiling',
|
50 |
+
'There is no young lady with light brown hair wearing a red necklace, a sweatshirt and earrings and smiling']
|
51 |
+
,['A woman wearing a blue and white uniform with a white and blue hat is keeping her mouth open and is near others dressed in the same fashion',
|
52 |
+
'A woman wearing casual clothing is keeping her mouth closed and is near other people dressed differently']
|
53 |
+
,['The man with brown hair is wearing sunglasses and is sitting listlessly at a table with cans of soda and other drinks',
|
54 |
+
'The man with brown hair is wearing sunglasses and is sitting at a table with cans of soda and other drinks']
|
55 |
+
,['There is no man wearing clothes that are covered with paint or is sitting outside in a busy area writing something',
|
56 |
+
'A man is wearing clothes that are covered with paint and is sitting outside in a busy area writing something']
|
57 |
+
,['The shirtless man in striped shorts and sunglasses is not standing near a man in a white shirt and sunglasses',
|
58 |
+
'The shirtless man in striped shorts and sunglasses is standing near a man in a white shirt and sunglasses']
|
59 |
+
,['The shirtless man in striped shorts and sunglasses is standing near a man in a white shirt and sunglasses',
|
60 |
+
'The shirtless man in striped shorts and sunglasses is standing near a person in a white shirt and sunglasses']
|
61 |
+
,['A young boy is wearing a blue patterned swim suit, a black and yellow swim cap and has blue swim goggles on her head',
|
62 |
+
'A young girl is wearing a blue patterned swim suit, a black and yellow swim cap and has blue swimming goggles on her head']
|
63 |
+
]
|
64 |
+
gr.Interface(get_scores, inputs, outputs, title=title, description=description, article=article, examples=examples).launch(share=True)#()#
|