Spaces:
Runtime error
Runtime error
from statistics import mean | |
import random | |
import torch | |
from transformers import BertModel, BertTokenizerFast | |
import numpy as np | |
import torch.nn.functional as F | |
import gradio as gr | |
tokenizer = BertTokenizerFast.from_pretrained("setu4993/LaBSE") | |
model = BertModel.from_pretrained("setu4993/LaBSE") | |
model = model.eval() | |
def embed(text, tokenizer, model): | |
inputs = tokenizer(text, return_tensors="pt", padding=True) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
return outputs.pooler_output | |
def similarity(embeddings_1, embeddings_2): | |
normalized_embeddings_1 = F.normalize(embeddings_1, p=2) | |
normalized_embeddings_2 = F.normalize(embeddings_2, p=2) | |
return torch.matmul( | |
normalized_embeddings_1, normalized_embeddings_2.transpose(0, 1) | |
) | |
def semantic_sim(sentence1, sentence2): | |
em1 = embed(sentence1, tokenizer, model) | |
em2 = embed(sentence2, tokenizer, model) | |
sim = int(float(similarity(em1, em2)*5)) | |
out = "" | |
if sim == 5: | |
out = "Equivalent" | |
elif sim == 4: | |
out = "Mostly equivalent, unimportant details differ" | |
elif sim == 3: | |
out = "Roughly equivalent, important details differ or are missing" | |
elif sim == 2: | |
out = "Not equivalent, but share some details" | |
elif sim == 1: | |
out = "Same general topic, but not equivalent" | |
elif sim == 0: | |
out = "Completely dissimilar" | |
return out | |
iface = gr.Interface(fn=semantic_sim, inputs=["text", "text"], outputs=["text"]).launch() | |