import gradio as gr import re import os import numpy as np from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceBgeEmbeddings # from dotenv import load_dotenv, find_dotenv # load_dotenv(find_dotenv(r"LLMs\.env")) HUGGINGFACEHUB_API_TOKEN = os.environ["token"] def clean_(l): s = list(l)[0][1] s = s.replace("\n", "=") return re.split('=', s, maxsplit=1)[-1].strip() def similarity_search2(vectordb, query, k, unique="True"): print(f"\nQuery Key: {query}, \nrows requested:{k}\nUnique values:{unique}") D = vectordb.similarity_search(query,k) temp = [] for d in D: temp.append(clean_(d)) del D if unique == "True": return str(np.unique(np.array(temp)))[1:-1] else: return str(np.array(temp))[1:-1] with gr.Blocks() as demo: gr.Markdown( """

Query Retrieval

""") query = gr.Textbox(placeholder="your query", label="Query") k = gr.Slider(10,1000,5, label="number of samples to check") unique = gr.Radio(["True", "False"], label="Return Unique values") with gr.Row(): btn = gr.Button("Submit") def mmt_query(query, k, unique): model_id = "BAAI/bge-large-en-v1.5" model_kwargs = {"device": "cpu"} embedding = HuggingFaceBgeEmbeddings( model_name = model_id, model_kwargs = model_kwargs, encode_kwargs = {'normalize_embeddings':True} ) persist_directory = r"VectorDB\db_book_mmt" vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) return similarity_search2(vectordb, query, k, unique) output = gr.Textbox() btn.click(mmt_query, [query, k, unique], output) # interface = gr.Interface(fn=auto_eda, inputs="dataframe", outputs="json") # demo.queue() demo.launch()