Chris4K commited on
Commit
6184bc1
·
verified ·
1 Parent(s): fbe6a2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -83
app.py CHANGED
@@ -1,97 +1,80 @@
1
  import os
2
  import gradio as gr
3
- from rag_tool import RAGTool
 
 
4
 
5
- # Initialize the RAG Tool with default settings
6
- rag_tool = RAGTool(
7
- documents_path="./documents",
8
- embedding_model="sentence-transformers/all-MiniLM-L6-v2",
9
- vector_store_type="faiss",
10
- chunk_size=1000,
11
- chunk_overlap=200,
12
- persist_directory="./vector_store"
13
- )
14
 
15
- # Function to handle document uploads
16
- def upload_documents(files, chunk_size, chunk_overlap, embedding_model, vector_store_type):
17
- # Create a temporary directory for uploaded files
18
- os.makedirs("./uploaded_docs", exist_ok=True)
19
-
20
- # Save uploaded files
21
- for file in files:
22
- file_path = os.path.join("./uploaded_docs", os.path.basename(file.name))
23
- with open(file_path, "wb") as f:
24
- f.write(file.read())
25
-
26
- # Initialize a new RAG Tool with the uploaded documents
27
- global rag_tool
28
- rag_tool = RAGTool(
29
- documents_path="./uploaded_docs",
30
- embedding_model=embedding_model,
31
- vector_store_type=vector_store_type,
32
- chunk_size=int(chunk_size),
33
- chunk_overlap=int(chunk_overlap),
34
- persist_directory="./uploaded_vector_store"
35
- )
36
-
37
- return f"Documents uploaded and processed. Vector store created with {embedding_model} model."
38
 
39
- # Function to handle queries
40
- def query_documents(query, top_k):
41
- global rag_tool
42
- return rag_tool(query, top_k=int(top_k))
43
 
44
- # Gradio interface
45
- with gr.Blocks(title="Advanced RAG Tool") as demo:
46
- gr.Markdown("# Advanced RAG Tool")
47
- gr.Markdown("Upload documents and query them using semantic search")
48
-
49
- with gr.Tab("Upload & Configure"):
50
- with gr.Row():
51
- with gr.Column():
52
- files = gr.File(file_count="multiple", label="Upload Documents")
53
- chunk_size = gr.Slider(200, 2000, value=1000, step=100, label="Chunk Size")
54
- chunk_overlap = gr.Slider(0, 500, value=200, step=50, label="Chunk Overlap")
55
-
56
- with gr.Column():
57
- embedding_models = [
58
- "sentence-transformers/all-MiniLM-L6-v2",
59
- "BAAI/bge-small-en-v1.5",
60
- "BAAI/bge-base-en-v1.5",
61
- "thenlper/gte-small",
62
- "thenlper/gte-base"
63
- ]
64
- embedding_model = gr.Dropdown(
65
- choices=embedding_models,
66
- value="sentence-transformers/all-MiniLM-L6-v2",
67
- label="Embedding Model"
68
- )
69
- vector_store_type = gr.Radio(
70
- choices=["faiss", "chroma"],
71
- value="faiss",
72
- label="Vector Store Type"
73
- )
74
 
75
- upload_button = gr.Button("Upload and Process Documents")
76
- upload_result = gr.Textbox(label="Upload Result")
77
 
78
- upload_button.click(
79
- upload_documents,
80
- inputs=[files, chunk_size, chunk_overlap, embedding_model, vector_store_type],
81
- outputs=upload_result
 
82
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- with gr.Tab("Query Documents"):
85
- query = gr.Textbox(label="Your Question", placeholder="What information are you looking for?")
86
- top_k = gr.Slider(1, 10, value=3, step=1, label="Number of Results")
87
- query_button = gr.Button("Search")
88
- answer = gr.Textbox(label="Results")
89
 
90
- query_button.click(
91
- query_documents,
92
- inputs=[query, top_k],
93
- outputs=answer
94
- )
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  # Launch the app
97
  if __name__ == "__main__":
 
1
  import os
2
  import gradio as gr
3
+ import warnings
4
+ from pathlib import Path
5
+ import shutil
6
 
7
+ # Suppress LangChain deprecation warnings
8
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 
 
 
 
 
 
9
 
10
+ from rag_tool import RAGTool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # Initialize the RAG Tool
13
+ rag_tool = RAGTool()
 
 
14
 
15
+ # Function to handle document uploads
16
+ def upload_file(file):
17
+ try:
18
+ # Create documents directory if it doesn't exist
19
+ os.makedirs("./documents", exist_ok=True)
20
+
21
+ # Get the file path and name
22
+ file_path = Path(file.name)
23
+ destination = Path("./documents") / file_path.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Copy the file to documents directory
26
+ shutil.copy(file_path, destination)
27
 
28
+ # Configure RAG tool
29
+ rag_tool.configure(
30
+ documents_path=str(destination),
31
+ embedding_model="sentence-transformers/all-MiniLM-L6-v2",
32
+ persist_directory="./vector_store"
33
  )
34
+
35
+ return f"File uploaded and processed: {file_path.name}"
36
+ except Exception as e:
37
+ return f"Error processing file: {str(e)}"
38
+
39
+ # Function to query the documents
40
+ def query_document(question):
41
+ try:
42
+ if not hasattr(rag_tool, 'vector_store') or rag_tool.vector_store is None:
43
+ return "Please upload a document first."
44
+
45
+ response = rag_tool(question)
46
+ return response
47
+ except Exception as e:
48
+ return f"Error querying document: {str(e)}"
49
+
50
+ # Create a simple Gradio interface
51
+ with gr.Blocks(title="RAG Tool") as demo:
52
+ gr.Markdown("# Document Question Answering System")
53
+ gr.Markdown("Upload a document (PDF, TXT) and ask questions about it")
54
 
55
+ with gr.Row():
56
+ with gr.Column():
57
+ file_input = gr.File(label="Upload Document")
58
+ upload_button = gr.Button("Process Document")
59
+ upload_result = gr.Textbox(label="Upload Status")
60
 
61
+ with gr.Column():
62
+ query_input = gr.Textbox(label="Ask a Question", placeholder="What would you like to know?")
63
+ query_button = gr.Button("Get Answer")
64
+ response_output = gr.Textbox(label="Answer")
65
+
66
+ # Set up the button click events
67
+ upload_button.click(
68
+ upload_file,
69
+ inputs=file_input,
70
+ outputs=upload_result
71
+ )
72
+
73
+ query_button.click(
74
+ query_document,
75
+ inputs=query_input,
76
+ outputs=response_output
77
+ )
78
 
79
  # Launch the app
80
  if __name__ == "__main__":