opensearchspace / app.py
suzhoum's picture
wip
5e17fcf
raw
history blame
1.99 kB
import gradio as gr
import ir_datasets
import pandas as pd
from autogluon.multimodal import MultiModalPredictor
def text_embedding(query: str):
model_name = "sentence-transformers/all-MiniLM-L6-v2"
# dataset = ir_datasets.load("beir/fiqa/dev")
# docs_df = pd.DataFrame(dataset.docs_iter()).set_index("doc_id").sample(frac=0.001)
predictor = MultiModalPredictor(
pipeline="feature_extraction",
hyperparameters={
"model.hf_text.checkpoint_name": model_name
}
)
# query_embedding = predictor.extract_embedding(docs_df)
# return query_embedding["text"]
query_embedding = predictor.extract_embedding([query])
return query_embedding["0"]
def main():
with gr.Blocks(title="OpenSearch Demo") as demo:
gr.Markdown("# Text Embedding for Search Queries")
gr.Markdown("Ask an open question!")
with gr.Row():
inp_single = gr.Textbox(show_label=False)
with gr.Row():
btn_single = gr.Button("Generate Embedding")
with gr.Row():
out_single = gr.DataFrame(label="Embedding", show_label=True)
gr.Markdown("You can select one of the sample datasets for batch inference")
with gr.Row():
with gr.Column():
btn_fiqa = gr.Button("fiqa")
with gr.Column():
btn_faiss = gr.Button("faiss")
with gr.Row():
out_batch = gr.DataFrame(label="Embedding", show_label=True)
gr.Markdown("You can also try out our batch inference by uploading a file")
with gr.Row():
out_batch = gr.File(interactive=True)
with gr.Row():
btn_file = gr.Button("Generate Embedding")
btn_single.click(fn=text_embedding, inputs=inp_single, outputs=out_single)
btn_file.click(fn=text_embedding, inputs=inp_single, outputs=out_single)
demo.launch()
if __name__ == "__main__":
main()