File size: 1,167 Bytes
0c07570 8d7a379 0c07570 226cae8 0c07570 56518e1 885a800 56518e1 0c07570 56518e1 885a800 0c07570 d118cf4 885a800 0c07570 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import gradio as gr
from transformers import AutoModel, AutoTokenizer
import numpy as np
# Load a small CPU model for text to vector processing
model_name = "sentence-transformers/all-mpnet-base-v2"
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
def text_to_vector(texts):
# Tokenize the input array of sentences
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
outputs = model(**inputs)
vectors = outputs.pooler_output.detach().numpy()
# Convert each vector to a string representation and create an object
result = [
{"sentence": sentence, "vector": ", ".join(map(str, vector))}
for sentence, vector in zip(texts, vectors)
]
return result
demo = gr.Interface(
fn=text_to_vector,
inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"),
outputs=gr.JSON(label="Sentence and Vector Pairs"),
title="Batch Text to Vector 769 dim",
description="This demo converts an array of sentences to vectors and returns objects with sentence and vector."
)
demo.launch()
|