Spaces:
Sleeping
Sleeping
File size: 1,189 Bytes
c6cd033 52773ee c6cd033 52773ee c6cd033 52773ee c6cd033 52773ee c6cd033 52773ee c6cd033 52773ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from transformers import AutoTokenizer, AutoModel
import torch
import gradio as gr
# Load the pre-trained paraphrase-mpnet-base-v2 model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-mpnet-base-v2')
model = AutoModel.from_pretrained('sentence-transformers/paraphrase-mpnet-base-v2')
def get_mpnet_embeddings(sentences):
# Tokenize input sentences
inputs = tokenizer(sentences, return_tensors='pt', padding=True, truncation=True, max_length=512)
# Get embeddings
with torch.no_grad():
outputs = model(**inputs)
embeddings = outputs.last_hidden_state.mean(dim=1) # Mean pooling over the sequence
return embeddings.numpy().tolist()
# Define the Gradio interface
interface = gr.Interface(
fn=get_mpnet_embeddings, # Function to call
inputs=gr.Textbox(lines=2, placeholder="Enter sentences here, one per line"), # Input component
outputs=gr.JSON(), # Output component
title="Sentence Embeddings with MPNet", # Interface title
description="Enter sentences to get their embeddings with paraphrase-mpnet-base-v2 (up to 512 tokens)." # Description
)
# Launch the interface
interface.launch()
|