import gradio as gr from transformers import AutoModel, AutoTokenizer import numpy as np import json # Load a small CPU model for text to vector processing model_name = "Supabase/gte-small" model = AutoModel.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def text_to_vector(texts_json): try: texts = json.loads(texts_json) if not isinstance(texts, list): raise ValueError("Input must be a JSON array of strings.") except json.JSONDecodeError: raise ValueError("Invalid JSON format.") inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True) outputs = model(**inputs) vectors = outputs.pooler_output.detach().numpy() # Convert to PostgreSQL-friendly array format postgres_array = "{" + ",".join(["{" + ",".join(map(str, v)) + "}" for v in vectors]) + "}" return postgres_array demo = gr.Interface( fn=text_to_vector, inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"), outputs=gr.Textbox(label="Text Vectors (PostgreSQL Array)", lines=10), title="Batch Text to Vector", description="This demo converts an array of sentences to vectors and returns them as a PostgreSQL-friendly array." ) demo.launch()