multimodal_rag / app.py
ej68okap
new code added
9832882
raw
history blame
8.31 kB
import gradio as gr
import tempfile
import os
import fitz # PyMuPDF for working with PDF files
import uuid
# Importing middleware and RAG (Retrieval-Augmented Generation) components
from middleware import Middleware
from rag import Rag
rag = Rag() # Initializing RAG for question-answering functionality
# Function to generate a unique UUID for each user session
def generate_uuid(state):
# Check if UUID already exists in session state
if state["user_uuid"] is None:
# Generate a new UUID if not already set
state["user_uuid"] = str(uuid.uuid4())
return state["user_uuid"]
class PDFSearchApp:
"""Class to manage PDF upload, indexing, and querying."""
def __init__(self):
self.indexed_docs = {} # Dictionary to track indexed documents by user ID
self.current_pdf = None # Store the currently processed PDF
# Function to handle file uploads and convert PDFs into searchable data
def upload_and_convert(self, state, file, max_pages):
id = generate_uuid(state) # Get unique user ID
if file is None: # Check if a file was uploaded
return "No file uploaded"
print(f"Uploading file: {file.name}, id: {id}")
try:
self.current_pdf = file.name # Store the name of the uploaded file
# Initialize Middleware for indexing the PDF content
middleware = Middleware(id, create_collection=True)
# Index the specified number of pages from the PDF
pages = middleware.index(pdf_path=file.name, id=id, max_pages=max_pages)
# Mark the document as indexed for this user
self.indexed_docs[id] = True
return f"Uploaded and extracted {len(pages)} pages"
except Exception as e: # Handle errors during processing
return f"Error processing PDF: {str(e)}"
# def search_documents(self, state, query, num_results=3): # Set num_results to return more pages
# """
# Search for a query within indexed PDF documents and return multiple matching pages.
# Args:
# state (dict): Session state containing user-specific data.
# query (str): The user's search query.
# num_results (int): Number of top results to return (default is 3).
# Returns:
# tuple: (list of image paths, RAG response) or an error message if no match is found.
# """
# print(f"Searching for query: {query}")
# id = generate_uuid(state) # Get unique user ID
# # Check if the document has been indexed
# if not self.indexed_docs.get(id, False):
# print("Please index documents first")
# return "Please index documents first", None
# # Check if a query was provided
# if not query:
# print("Please enter a search query")
# return "Please enter a search query", None
# try:
# # Initialize Middleware for searching
# middleware = Middleware(id, create_collection=False)
# # Perform the search and retrieve the top results
# search_results = middleware.search([query]) # Returns multiple matches
# # Check if there are valid search results
# if not search_results or not search_results[0]:
# print("No relevant matches found in the PDF")
# return "No relevant matches found in the PDF", None
# # Extract multiple matching pages (up to num_results)
# image_paths = []
# for i in range(min(len(search_results[0]), num_results)): # Limit to num_results
# page_num = search_results[0][i][1] + 1 # Convert zero-based index to one-based
# img_path = f"pages/{id}/page_{page_num}.png"
# image_paths.append(img_path)
# print(f"Retrieved image paths: {image_paths}")
# # Get an answer from the RAG model using multiple images
# rag_response = rag.get_answer_from_gemini(query, image_paths)
# return image_paths, rag_response # Return multiple image paths and RAG response
# except Exception as e:
# # Handle and log any errors that occur
# print(f"Error during search: {e}")
# return f"Error during search: {str(e)}", None
# Function to handle search queries within indexed PDFs
def search_documents(self, state, query, num_results=1):
print(f"Searching for query: {query}")
id = generate_uuid(state) # Get unique user ID
# Check if the document has been indexed
if not self.indexed_docs.get(id, False):
print("Please index documents first")
return "Please index documents first", "--"
# Check if a query was provided
if not query:
print("Please enter a search query")
return "Please enter a search query", "--"
try:
# Initialize Middleware for searching
middleware = Middleware(id, create_collection=False)
# Perform the search and retrieve the top result
search_results = middleware.search([query])[0]
# Extract the page number from the search results
page_num = search_results[0][1] + 1
print(f"Retrieved page number: {page_num}")
# Construct the image path for the retrieved page
img_path = f"pages/{id}/page_{page_num}.png"
print(f"Retrieved image path: {img_path}")
# Get an answer from the RAG model using the query and associated image
rag_response = rag.get_answer_from_gemini(query, [img_path])
return img_path, rag_response
except Exception as e: # Handle errors during the search process
return f"Error during search: {str(e)}", "--"
# Function to create the Gradio user interface
def create_ui():
app = PDFSearchApp() # Instantiate the PDFSearchApp class
with gr.Blocks() as demo:
state = gr.State(value={"user_uuid": None}) # Initialize session state
# Header and introduction markdown
gr.Markdown("# Colpali Milvus Multimodal RAG Demo")
gr.Markdown(
"This demo showcases how to use [Colpali](https://github.com/illuin-tech/colpali) embeddings with [Milvus](https://milvus.io/) and utilizing Gemini/OpenAI multimodal RAG for pdf search and Q&A."
)
# Upload PDF tab
with gr.Tab("Upload PDF"):
with gr.Column():
# Input for uploading files
file_input = gr.File(label="Upload PDF")
# Slider to select the maximum number of pages to index
max_pages_input = gr.Slider(
minimum=1,
maximum=50,
value=20,
step=10,
label="Max pages to extract and index"
)
# Textbox to display indexing status
status = gr.Textbox(label="Indexing Status", interactive=False)
# Query tab for searching documents
with gr.Tab("Query"):
with gr.Column():
# Textbox for entering search queries
query_input = gr.Textbox(label="Enter query")
# Button to trigger the search
search_btn = gr.Button("Query")
# Textbox to display the response from RAG
llm_answer = gr.Textbox(label="RAG Response", interactive=False)
# Image display for the top-matching page
images = gr.Image(label="Top page matching query")
# Event handlers to connect UI components with backend functions
file_input.change(
fn=app.upload_and_convert,
inputs=[state, file_input, max_pages_input],
outputs=[status]
)
search_btn.click(
fn=app.search_documents,
inputs=[state, query_input],
outputs=[images, llm_answer]
)
return demo # Return the constructed UI
# Entry point to launch the application
if __name__ == "__main__":
demo = create_ui() # Create the Gradio interface
demo.launch() # Launch the app