Spaces:
Sleeping
Sleeping
File size: 1,551 Bytes
d82542d 45d6b11 d82542d 2014880 d82542d 3a29d4a 2014880 45d6b11 2014880 3a29d4a 45d6b11 4829b97 45d6b11 2014880 d82542d 45d6b11 2014880 d69bc63 45d6b11 2014880 45d6b11 d82542d 45d6b11 2014880 d82542d d69bc63 d82542d 2014880 d82542d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import argparse
import logging
import datasets
import gradio as gr
import sentence_transformers
logging.disable(logging.CRITICAL)
model = sentence_transformers.SentenceTransformer(
"dangvantuan/sentence-camembert-large", device="cpu")
dataset = datasets.load_dataset("json", data_files=["./dataset.json"], split="train")
dataset.load_faiss_index("embeddings", "index.faiss")
def search(query, k):
query_embedding = model.encode(query)
_, retrieved_examples = dataset.get_nearest_examples(
"embeddings",
query_embedding,
k=int(k),
)
results = []
for text, start, end, title, url in zip(
retrieved_examples["text"],
retrieved_examples["start"],
retrieved_examples["end"],
retrieved_examples["title"],
retrieved_examples["url"],
):
start = start
end = end
result = {
"title": title,
"transcript": f"[{str(start)} ====> {str(end)}] {text}",
"link": url,
}
results.append(result)
return results
iface = gr.Interface(
search,
inputs=[
gr.inputs.Textbox(label="Query"),
gr.inputs.Number(label="K", default=3),
],
outputs=[
gr.outputs.Textbox(label="Title"),
gr.outputs.Textbox(label="Transcript"),
gr.outputs.Textbox(label="Link"),
],
title="Camembert and Faiss-powered Search Engine",
description="Search through a dataset using Camembert and Faiss",
theme="light",
layout="vertical",
)
iface.launch()
|