import gradio as gr from transformers import AutoModel, AutoTokenizer import numpy as np # Load a small CPU model for text to vector processing model_name = "sentence-transformers/all-mpnet-base-v2" model = AutoModel.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def text_to_vector(texts): # Tokenize the input array of sentences inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True) outputs = model(**inputs) vectors = outputs.pooler_output.detach().numpy() # Convert each vector to a string representation and create an object result = [ {"sentence": sentence, "vector": ", ".join(map(str, vector))} for sentence, vector in zip(texts, vectors) ] return result demo = gr.Interface( fn=text_to_vector, inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"), outputs=gr.JSON(label="Sentence and Vector Pairs"), title="Batch Text to Vector 769 dim", description="This demo converts an array of sentences to vectors and returns objects with sentence and vector." ) demo.launch()