Spaces:
Sleeping
Sleeping
File size: 1,534 Bytes
d82542d 45d6b11 d82542d 2014880 d82542d 2014880 45d6b11 2014880 9a26bab 45d6b11 9a26bab 45d6b11 2014880 9a26bab 45d6b11 2014880 d69bc63 45d6b11 2014880 45d6b11 d82542d 45d6b11 2014880 9a26bab d82542d 9a26bab d82542d 9a26bab d82542d 9a26bab d82542d 2014880 d82542d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import argparse
import logging
import datasets
import gradio as gr
logging.disable(logging.CRITICAL)
model_name = "dangvantuan/sentence-camembert-large"
model = gradio.load(model_name)
dataset = datasets.load_dataset("json", data_files=["./data/dataset.json"], split="train")
dataset.load_faiss_index("embeddings", "index.faiss")
def search(query, k):
query_embedding = model.encode(query)
_, retrieved_examples = dataset.get_nearest_examples(
"embeddings",
query_embedding,
k=int(k),
)
results = []
for text, start, end, title, url in zip(
retrieved_examples["text"],
retrieved_examples["start"],
retrieved_examples["end"],
retrieved_examples["title"],
retrieved_examples["url"],
):
start = start
end = end
result = {
"title": title,
"transcript": f"[{str(start)} ====> {str(end)}] {text}",
"link": url,
}
results.append(result)
return results
iface = gr.Interface(
search,
inputs=[
gr.inputs.Textbox(label="Query"),
gr.inputs.Number(label="K", default=3, min_value=1, max_value=10),
],
outputs=[
gr.outputs.Textbox(label="Result 1"),
gr.outputs.Textbox(label="Result 2"),
gr.outputs.Textbox(label="Result 3"),
],
title="Camembert and Faiss-powered Search Engine",
description="Search through a dataset using Camembert and Faiss",
theme="light",
layout="vertical",
)
iface.launch()
|