File size: 1,992 Bytes
d82542d
45d6b11
d82542d
2014880
d82542d
3a29d4a
2014880
45d6b11
2014880
3a29d4a
 
45d6b11
4829b97
45d6b11
2014880
704ef0e
45d6b11
2014880
 
 
d69bc63
45d6b11
 
2014880
 
 
 
 
 
 
 
 
45d6b11
 
d82542d
 
45d6b11
 
 
2014880
 
704ef0e
d82542d
704ef0e
 
 
 
 
 
 
 
d82542d
 
704ef0e
 
 
 
 
 
 
 
 
d82542d
 
 
704ef0e
d82542d
704ef0e
 
 
bea0c91
2014880
 
d82542d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import argparse
import logging

import datasets
import gradio as gr
import sentence_transformers

logging.disable(logging.CRITICAL)

model = sentence_transformers.SentenceTransformer(
    "dangvantuan/sentence-camembert-large", device="cpu")

dataset = datasets.load_dataset("json", data_files=["./dataset.json"], split="train")
dataset.load_faiss_index("embeddings", "index.faiss")

def search(query, k=3):
    query_embedding = model.encode(query)
    _, retrieved_examples = dataset.get_nearest_examples(
        "embeddings",
        query_embedding,
        k=int(k),
    )
    results = []
    for text, start, end, title, url in zip(
        retrieved_examples["text"],
        retrieved_examples["start"],
        retrieved_examples["end"],
        retrieved_examples["title"],
        retrieved_examples["url"],
    ):
        start = start
        end = end
        result = {
            "title": title,
            "transcript": f"[{str(start)} ====> {str(end)}] {text}",
            "link": url,
        }
        results.append(result)
    return results

iface = gr.Interface(
    fn=search,
    inputs=[
        gr.inputs.Textbox(
            label="Query", placeholder="Type in a search query...", lines=3
        ),
        gr.inputs.Number(
            label="K",
            default=3,
            description="Number of results to return",
        ),
    ],
    outputs=[
        gr.outputs.Label(
            label="Result 1", type="auto", default="Search results will appear here."
        ),
        gr.outputs.Label(
            label="Result 2", type="auto", default=""
        ),
        gr.outputs.Link(
            label="Result 3", type="auto", default=""
        ),
    ],
    title="Camembert and Faiss-powered Search Engine",
    description="Search through a dataset using Camembert and Faiss",
    theme="default",
    layout="vertical",
    allow_flagging=False,
    allow_screenshot=False,
    allow_share=True,
    allow_download=False
)

iface.launch()