Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from transformers import DistilBertTokenizer, DistilBertModel | |
class SimilarityPredictor: | |
def __init__(self): | |
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
# Use the base model instead of custom model | |
self.model = DistilBertModel.from_pretrained('distilbert-base-uncased').to(self.device) | |
self.tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased') | |
self.head = torch.nn.Sequential(torch.nn.Linear(768, 1), torch.nn.Sigmoid()).to(self.device) | |
def predict(self, anchor, target): | |
self.model.eval() | |
with torch.no_grad(): | |
encoded = self.tokenizer( | |
[anchor], | |
[target], | |
padding=True, | |
truncation=True, | |
max_length=64, | |
return_tensors='pt' | |
).to(self.device) | |
output = self.head(self.model(**encoded)[0][:,0,:]).squeeze() | |
return float(output) | |
predictor = SimilarityPredictor() | |
example_pairs = [ | |
["mobile phone", "cellphone"], | |
["artificial intelligence", "machine learning"], | |
["electric vehicle", "battery powered car"], | |
["wireless communication", "radio transmission"], | |
["solar panel", "photovoltaic cell"] | |
] | |
def predict_similarity(anchor, target): | |
score = predictor.predict(anchor, target) | |
return round(score, 3) | |
iface = gr.Interface( | |
fn=predict_similarity, | |
inputs=[ | |
gr.Textbox(label="Anchor Phrase", placeholder="Enter first phrase..."), | |
gr.Textbox(label="Target Phrase", placeholder="Enter second phrase...") | |
], | |
outputs=gr.Number(label="Similarity Score (0-1)"), | |
title="Patent Phrase Similarity Checker", | |
description="Compare the similarity between two patent phrases (0: Different, 1: Identical)", | |
examples=example_pairs, | |
theme="huggingface" | |
) | |
iface.launch() |