|
import os |
|
import gradio as gr |
|
from rag_tool import RAGTool |
|
|
|
|
|
rag_tool = RAGTool( |
|
documents_path="./documents", |
|
embedding_model="sentence-transformers/all-MiniLM-L6-v2", |
|
vector_store_type="faiss", |
|
chunk_size=1000, |
|
chunk_overlap=200, |
|
persist_directory="./vector_store" |
|
) |
|
|
|
|
|
def upload_documents(files, chunk_size, chunk_overlap, embedding_model, vector_store_type): |
|
|
|
os.makedirs("./uploaded_docs", exist_ok=True) |
|
|
|
|
|
for file in files: |
|
file_path = os.path.join("./uploaded_docs", os.path.basename(file.name)) |
|
with open(file_path, "wb") as f: |
|
f.write(file.read()) |
|
|
|
|
|
global rag_tool |
|
rag_tool = RAGTool( |
|
documents_path="./uploaded_docs", |
|
embedding_model=embedding_model, |
|
vector_store_type=vector_store_type, |
|
chunk_size=int(chunk_size), |
|
chunk_overlap=int(chunk_overlap), |
|
persist_directory="./uploaded_vector_store" |
|
) |
|
|
|
return f"Documents uploaded and processed. Vector store created with {embedding_model} model." |
|
|
|
|
|
def query_documents(query, top_k): |
|
global rag_tool |
|
return rag_tool(query, top_k=int(top_k)) |
|
|
|
|
|
with gr.Blocks(title="Advanced RAG Tool") as demo: |
|
gr.Markdown("# Advanced RAG Tool") |
|
gr.Markdown("Upload documents and query them using semantic search") |
|
|
|
with gr.Tab("Upload & Configure"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
files = gr.File(file_count="multiple", label="Upload Documents") |
|
chunk_size = gr.Slider(200, 2000, value=1000, step=100, label="Chunk Size") |
|
chunk_overlap = gr.Slider(0, 500, value=200, step=50, label="Chunk Overlap") |
|
|
|
with gr.Column(): |
|
embedding_models = [ |
|
"sentence-transformers/all-MiniLM-L6-v2", |
|
"BAAI/bge-small-en-v1.5", |
|
"BAAI/bge-base-en-v1.5", |
|
"thenlper/gte-small", |
|
"thenlper/gte-base" |
|
] |
|
embedding_model = gr.Dropdown( |
|
choices=embedding_models, |
|
value="sentence-transformers/all-MiniLM-L6-v2", |
|
label="Embedding Model" |
|
) |
|
vector_store_type = gr.Radio( |
|
choices=["faiss", "chroma"], |
|
value="faiss", |
|
label="Vector Store Type" |
|
) |
|
|
|
upload_button = gr.Button("Upload and Process Documents") |
|
upload_result = gr.Textbox(label="Upload Result") |
|
|
|
upload_button.click( |
|
upload_documents, |
|
inputs=[files, chunk_size, chunk_overlap, embedding_model, vector_store_type], |
|
outputs=upload_result |
|
) |
|
|
|
with gr.Tab("Query Documents"): |
|
query = gr.Textbox(label="Your Question", placeholder="What information are you looking for?") |
|
top_k = gr.Slider(1, 10, value=3, step=1, label="Number of Results") |
|
query_button = gr.Button("Search") |
|
answer = gr.Textbox(label="Results") |
|
|
|
query_button.click( |
|
query_documents, |
|
inputs=[query, top_k], |
|
outputs=answer |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |