|
import tensorflow as tf |
|
import tensorflow_hub as hub |
|
from tensorflow_text import SentencepieceTokenizer |
|
import gradio as gr |
|
import math |
|
|
|
model_url = "https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3" |
|
model = hub.load(model_url) |
|
|
|
def embed_text(text: str) -> dict: |
|
embeddings = model(text) |
|
return embeddings.numpy().tolist() |
|
|
|
embed_text_inter = gr.Interface( |
|
fn = embed_text, |
|
inputs = "text", |
|
outputs = gr.JSON(), |
|
title = "Universal Sentence Encoder 3 Large" |
|
) |
|
|
|
def distance(text_1: str, text_2: str) -> float: |
|
embeddings_1 = model(text_1) |
|
embeddings_2 = model(text_2) |
|
dist = math.sqrt(sum((embeddings_1 - embeddings_2)**2)) |
|
return dist |
|
|
|
distance_inter = gr.Interface( |
|
fn = distance, |
|
inputs = ["text", "text"], |
|
outputs = "number", |
|
title = "Universal Sentence Encoder 3 Large" |
|
) |
|
|
|
|
|
iface = gr.TabbedInterface( |
|
interface_list=[embed_text_inter, distance_inter], |
|
title="Universal Sentence Encoder 3 Large" |
|
) |
|
|
|
iface.launch() |