import gradio as gr import tempfile import os import fitz # PyMuPDF for working with PDF files import uuid # Importing middleware and RAG (Retrieval-Augmented Generation) components from middleware import Middleware from rag import Rag rag = Rag() # Initializing RAG for question-answering functionality # Function to generate a unique UUID for each user session def generate_uuid(state): # Check if UUID already exists in session state if state["user_uuid"] is None: # Generate a new UUID if not already set state["user_uuid"] = str(uuid.uuid4()) return state["user_uuid"] class PDFSearchApp: """Class to manage PDF upload, indexing, and querying.""" def __init__(self): self.indexed_docs = {} # Dictionary to track indexed documents by user ID self.current_pdf = None # Store the currently processed PDF # Function to handle file uploads and convert PDFs into searchable data def upload_and_convert(self, state, file, max_pages): id = generate_uuid(state) # Get unique user ID if file is None: # Check if a file was uploaded return "No file uploaded" print(f"Uploading file: {file.name}, id: {id}") try: self.current_pdf = file.name # Store the name of the uploaded file # Initialize Middleware for indexing the PDF content middleware = Middleware(id, create_collection=True) # Index the specified number of pages from the PDF pages = middleware.index(pdf_path=file.name, id=id, max_pages=max_pages) # Mark the document as indexed for this user self.indexed_docs[id] = True return f"Uploaded and extracted {len(pages)} pages" except Exception as e: # Handle errors during processing return f"Error processing PDF: {str(e)}" # def search_documents(self, state, query, num_results=3): # Set num_results to return more pages # """ # Search for a query within indexed PDF documents and return multiple matching pages. # Args: # state (dict): Session state containing user-specific data. # query (str): The user's search query. # num_results (int): Number of top results to return (default is 3). # Returns: # tuple: (list of image paths, RAG response) or an error message if no match is found. # """ # print(f"Searching for query: {query}") # id = generate_uuid(state) # Get unique user ID # # Check if the document has been indexed # if not self.indexed_docs.get(id, False): # print("Please index documents first") # return "Please index documents first", None # # Check if a query was provided # if not query: # print("Please enter a search query") # return "Please enter a search query", None # try: # # Initialize Middleware for searching # middleware = Middleware(id, create_collection=False) # # Perform the search and retrieve the top results # search_results = middleware.search([query]) # Returns multiple matches # # Check if there are valid search results # if not search_results or not search_results[0]: # print("No relevant matches found in the PDF") # return "No relevant matches found in the PDF", None # # Extract multiple matching pages (up to num_results) # image_paths = [] # for i in range(min(len(search_results[0]), num_results)): # Limit to num_results # page_num = search_results[0][i][1] + 1 # Convert zero-based index to one-based # img_path = f"pages/{id}/page_{page_num}.png" # image_paths.append(img_path) # print(f"Retrieved image paths: {image_paths}") # # Get an answer from the RAG model using multiple images # rag_response = rag.get_answer_from_gemini(query, image_paths) # return image_paths, rag_response # Return multiple image paths and RAG response # except Exception as e: # # Handle and log any errors that occur # print(f"Error during search: {e}") # return f"Error during search: {str(e)}", None # Function to handle search queries within indexed PDFs def search_documents(self, state, query, num_results=1): print(f"Searching for query: {query}") id = generate_uuid(state) # Get unique user ID # Check if the document has been indexed if not self.indexed_docs.get(id, False): print("Please index documents first") return "Please index documents first", "--" # Check if a query was provided if not query: print("Please enter a search query") return "Please enter a search query", "--" try: # Initialize Middleware for searching middleware = Middleware(id, create_collection=False) # Perform the search and retrieve the top result search_results = middleware.search([query])[0] # Extract the page number from the search results page_num = search_results[0][1] + 1 print(f"Retrieved page number: {page_num}") # Construct the image path for the retrieved page img_path = f"pages/{id}/page_{page_num}.png" print(f"Retrieved image path: {img_path}") # Get an answer from the RAG model using the query and associated image rag_response = rag.get_answer_from_gemini(query, [img_path]) return img_path, rag_response except Exception as e: # Handle errors during the search process return f"Error during search: {str(e)}", "--" # Function to create the Gradio user interface def create_ui(): app = PDFSearchApp() # Instantiate the PDFSearchApp class with gr.Blocks() as demo: state = gr.State(value={"user_uuid": None}) # Initialize session state # Header and introduction markdown gr.Markdown("# Colpali Milvus Multimodal RAG Demo") gr.Markdown( "This demo showcases how to use [Colpali](https://github.com/illuin-tech/colpali) embeddings with [Milvus](https://milvus.io/) and utilizing Gemini/OpenAI multimodal RAG for pdf search and Q&A." ) # Upload PDF tab with gr.Tab("Upload PDF"): with gr.Column(): # Input for uploading files file_input = gr.File(label="Upload PDF") # Slider to select the maximum number of pages to index max_pages_input = gr.Slider( minimum=1, maximum=50, value=20, step=10, label="Max pages to extract and index" ) # Textbox to display indexing status status = gr.Textbox(label="Indexing Status", interactive=False) # Query tab for searching documents with gr.Tab("Query"): with gr.Column(): # Textbox for entering search queries query_input = gr.Textbox(label="Enter query") # Button to trigger the search search_btn = gr.Button("Query") # Textbox to display the response from RAG llm_answer = gr.Textbox(label="RAG Response", interactive=False) # Image display for the top-matching page images = gr.Image(label="Top page matching query") # Event handlers to connect UI components with backend functions file_input.change( fn=app.upload_and_convert, inputs=[state, file_input, max_pages_input], outputs=[status] ) search_btn.click( fn=app.search_documents, inputs=[state, query_input], outputs=[images, llm_answer] ) return demo # Return the constructed UI # Entry point to launch the application if __name__ == "__main__": demo = create_ui() # Create the Gradio interface demo.launch() # Launch the app