|
import gradio as gr |
|
from transformers import AutoModel, AutoTokenizer |
|
import numpy as np |
|
|
|
|
|
model_name = "sentence-transformers/all-mpnet-base-v2" |
|
model = AutoModel.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
def text_to_vector(texts): |
|
|
|
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True) |
|
outputs = model(**inputs) |
|
vectors = outputs.pooler_output.detach().numpy() |
|
|
|
|
|
result = [ |
|
{"sentence": sentence, "vector": ", ".join(map(str, vector))} |
|
for sentence, vector in zip(texts, vectors) |
|
] |
|
|
|
return result |
|
|
|
demo = gr.Interface( |
|
fn=text_to_vector, |
|
inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"), |
|
outputs=gr.JSON(label="Sentence and Vector Pairs"), |
|
title="Batch Text to Vector 769 dim", |
|
description="This demo converts an array of sentences to vectors and returns objects with sentence and vector." |
|
) |
|
|
|
demo.launch() |
|
|