File size: 1,308 Bytes
0c07570 87c5008 1136738 0c07570 4ebac2c 0c07570 1136738 87c5008 23b27f7 0c07570 d118cf4 23b27f7 0e41473 23b27f7 0c07570 23b27f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import gradio as gr
from transformers import AutoModel, AutoTokenizer
import numpy as np
import json
# Load a small CPU model for text to vector processing
model_name = "Supabase/gte-small"
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
def text_to_vector(texts_json):
try:
texts = json.loads(texts_json)
if not isinstance(texts, list):
raise ValueError("Input must be a JSON array of strings.")
except json.JSONDecodeError:
raise ValueError("Invalid JSON format.")
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
outputs = model(**inputs)
vectors = outputs.pooler_output.detach().numpy()
# Convert to PostgreSQL-friendly array format
postgres_array = "{" + ",".join(["{" + ",".join(map(str, v)) + "}" for v in vectors]) + "}"
return postgres_array
demo = gr.Interface(
fn=text_to_vector,
inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"),
outputs=gr.Textbox(label="Text Vectors (PostgreSQL Array)", lines=10),
title="Batch Text to Vector",
description="This demo converts an array of sentences to vectors and returns them as a PostgreSQL-friendly array."
)
demo.launch() |