Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
import os | |
import numpy as np | |
from langchain_community.vectorstores import Chroma | |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings | |
# from dotenv import load_dotenv, find_dotenv | |
# load_dotenv(find_dotenv(r"LLMs\.env")) | |
HUGGINGFACEHUB_API_TOKEN = os.environ["token"] | |
def clean_(l): | |
s = list(l)[0][1] | |
s = s.replace("\n", "=") | |
return re.split('=', s, maxsplit=1)[-1].strip() | |
def similarity_search2(vectordb, query, k, unique="True"): | |
print(f"\nQuery Key: {query}, \nrows requested:{k}\nUnique values:{unique}") | |
D = vectordb.similarity_search(query,k) | |
temp = [] | |
for d in D: | |
temp.append(clean_(d)) | |
del D | |
if unique == "True": | |
return str(np.unique(np.array(temp)))[1:-1] | |
else: | |
return str(np.array(temp))[1:-1] | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
<h2> <center> Query Retrieval </center> </h2> | |
""") | |
query = gr.Textbox(placeholder="your query", label="Query") | |
k = gr.Slider(10,1000,5, label="number of samples to check") | |
unique = gr.Radio(["True", "False"], label="Return Unique values") | |
with gr.Row(): | |
btn = gr.Button("Submit") | |
def mmt_query(query, k, unique): | |
model_id = "BAAI/bge-large-en-v1.5" | |
model_kwargs = {"device": "cpu"} | |
embedding = HuggingFaceBgeEmbeddings( | |
model_name = model_id, | |
model_kwargs = model_kwargs, | |
encode_kwargs = {'normalize_embeddings':True} | |
) | |
persist_directory = r"VectorDB\db_book_mmt" | |
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) | |
return similarity_search2(vectordb, query, k, unique) | |
output = gr.Textbox() | |
btn.click(mmt_query, [query, k, unique], output) | |
# interface = gr.Interface(fn=auto_eda, inputs="dataframe", outputs="json") | |
# demo.queue() | |
demo.launch() |