File size: 1,990 Bytes
d59a442
5e17fcf
 
 
d59a442
 
 
 
 
5e17fcf
 
d59a442
 
 
 
 
 
5e17fcf
 
d59a442
5e17fcf
d59a442
 
 
 
 
 
 
5e17fcf
d59a442
5e17fcf
 
 
 
d59a442
5e17fcf
 
 
 
 
 
 
 
 
 
 
 
 
 
d59a442
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import ir_datasets
import pandas as pd

from autogluon.multimodal import MultiModalPredictor


def text_embedding(query: str):
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    # dataset = ir_datasets.load("beir/fiqa/dev")
    # docs_df = pd.DataFrame(dataset.docs_iter()).set_index("doc_id").sample(frac=0.001)
    predictor = MultiModalPredictor(
        pipeline="feature_extraction",
        hyperparameters={
            "model.hf_text.checkpoint_name": model_name
        }
    )
    # query_embedding = predictor.extract_embedding(docs_df)
    # return query_embedding["text"]   
    query_embedding = predictor.extract_embedding([query])
    return query_embedding["0"]


def main():
    with gr.Blocks(title="OpenSearch Demo") as demo:
        gr.Markdown("# Text Embedding for Search Queries")
        gr.Markdown("Ask an open question!")
        with gr.Row():
            inp_single = gr.Textbox(show_label=False)
        with gr.Row():    
            btn_single = gr.Button("Generate Embedding")
        with gr.Row():
            out_single = gr.DataFrame(label="Embedding", show_label=True)
        gr.Markdown("You can select one of the sample datasets for batch inference")
        with gr.Row():
            with gr.Column():
                btn_fiqa = gr.Button("fiqa")
            with gr.Column():
                btn_faiss = gr.Button("faiss")
        with gr.Row():
            out_batch = gr.DataFrame(label="Embedding", show_label=True)
        gr.Markdown("You can also try out our batch inference by uploading a file")
        with gr.Row():
            out_batch = gr.File(interactive=True)
        with gr.Row():    
            btn_file = gr.Button("Generate Embedding")
            
        btn_single.click(fn=text_embedding, inputs=inp_single, outputs=out_single)
        btn_file.click(fn=text_embedding, inputs=inp_single, outputs=out_single)    
    
    demo.launch()   


if __name__ == "__main__":
    main()