File size: 1,167 Bytes
0c07570
 
8d7a379
0c07570
 
226cae8
0c07570
 
 
56518e1
885a800
56518e1
0c07570
56518e1
 
885a800
 
 
 
 
 
 
0c07570
 
 
d118cf4
885a800
 
 
0c07570
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr
from transformers import AutoModel, AutoTokenizer
import numpy as np

# Load a small CPU model for text to vector processing
model_name = "sentence-transformers/all-mpnet-base-v2"
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

def text_to_vector(texts):
    # Tokenize the input array of sentences
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)
    vectors = outputs.pooler_output.detach().numpy()
    
    # Convert each vector to a string representation and create an object
    result = [
        {"sentence": sentence, "vector": ", ".join(map(str, vector))}
        for sentence, vector in zip(texts, vectors)
    ]
    
    return result

demo = gr.Interface(
    fn=text_to_vector,
    inputs=gr.Textbox(label="Enter JSON array", placeholder="Enter an array of sentences as a JSON string"),
    outputs=gr.JSON(label="Sentence and Vector Pairs"),
    title="Batch Text to Vector 769 dim",
    description="This demo converts an array of sentences to vectors and returns objects with sentence and vector."
)

demo.launch()