Spaces:

ultron1996
/

multimodal_rag

Runtime error

File size: 8,308 Bytes

import gradio as gr
import tempfile
import os
import fitz  # PyMuPDF for working with PDF files
import uuid

# Importing middleware and RAG (Retrieval-Augmented Generation) components
from middleware import Middleware
from rag import Rag

rag = Rag()  # Initializing RAG for question-answering functionality

# Function to generate a unique UUID for each user session
def generate_uuid(state):
    # Check if UUID already exists in session state
    if state["user_uuid"] is None:
        # Generate a new UUID if not already set
        state["user_uuid"] = str(uuid.uuid4())
    return state["user_uuid"]


class PDFSearchApp:
    """Class to manage PDF upload, indexing, and querying."""

    def __init__(self):
        self.indexed_docs = {}  # Dictionary to track indexed documents by user ID
        self.current_pdf = None  # Store the currently processed PDF

    # Function to handle file uploads and convert PDFs into searchable data
    def upload_and_convert(self, state, file, max_pages):
        id = generate_uuid(state)  # Get unique user ID

        if file is None:  # Check if a file was uploaded
            return "No file uploaded"

        print(f"Uploading file: {file.name}, id: {id}")

        try:
            self.current_pdf = file.name  # Store the name of the uploaded file

            # Initialize Middleware for indexing the PDF content
            middleware = Middleware(id, create_collection=True)

            # Index the specified number of pages from the PDF
            pages = middleware.index(pdf_path=file.name, id=id, max_pages=max_pages)

            # Mark the document as indexed for this user
            self.indexed_docs[id] = True

            return f"Uploaded and extracted {len(pages)} pages"
        except Exception as e:  # Handle errors during processing
            return f"Error processing PDF: {str(e)}"
    # def search_documents(self, state, query, num_results=3):  # Set num_results to return more pages
    #     """
    #     Search for a query within indexed PDF documents and return multiple matching pages.

    #     Args:
    #         state (dict): Session state containing user-specific data.
    #         query (str): The user's search query.
    #         num_results (int): Number of top results to return (default is 3).

    #     Returns:
    #         tuple: (list of image paths, RAG response) or an error message if no match is found.
    #     """
    #     print(f"Searching for query: {query}")
    #     id = generate_uuid(state)  # Get unique user ID

    #     # Check if the document has been indexed
    #     if not self.indexed_docs.get(id, False):
    #         print("Please index documents first")
    #         return "Please index documents first", None

    #     # Check if a query was provided
    #     if not query:
    #         print("Please enter a search query")
    #         return "Please enter a search query", None

    #     try:
    #         # Initialize Middleware for searching
    #         middleware = Middleware(id, create_collection=False)

    #         # Perform the search and retrieve the top results
    #         search_results = middleware.search([query])  # Returns multiple matches

    #         # Check if there are valid search results
    #         if not search_results or not search_results[0]:
    #             print("No relevant matches found in the PDF")
    #             return "No relevant matches found in the PDF", None

    #         # Extract multiple matching pages (up to num_results)
    #         image_paths = []
    #         for i in range(min(len(search_results[0]), num_results)):  # Limit to num_results
    #             page_num = search_results[0][i][1] + 1  # Convert zero-based index to one-based
    #             img_path = f"pages/{id}/page_{page_num}.png"
    #             image_paths.append(img_path)

    #         print(f"Retrieved image paths: {image_paths}")

    #         # Get an answer from the RAG model using multiple images
    #         rag_response = rag.get_answer_from_gemini(query, image_paths)

    #         return image_paths, rag_response  # Return multiple image paths and RAG response

    #     except Exception as e:
    #         # Handle and log any errors that occur
    #         print(f"Error during search: {e}")
    #         return f"Error during search: {str(e)}", None


    # Function to handle search queries within indexed PDFs
    def search_documents(self, state, query, num_results=1):
        print(f"Searching for query: {query}")
        id = generate_uuid(state)  # Get unique user ID

        # Check if the document has been indexed
        if not self.indexed_docs.get(id, False):
            print("Please index documents first")
            return "Please index documents first", "--"

        # Check if a query was provided
        if not query:
            print("Please enter a search query")
            return "Please enter a search query", "--"

        try:
            # Initialize Middleware for searching
            middleware = Middleware(id, create_collection=False)

            # Perform the search and retrieve the top result
            search_results = middleware.search([query])[0]

            # Extract the page number from the search results
            page_num = search_results[0][1] + 1

            print(f"Retrieved page number: {page_num}")

            # Construct the image path for the retrieved page
            img_path = f"pages/{id}/page_{page_num}.png"
            print(f"Retrieved image path: {img_path}")

            # Get an answer from the RAG model using the query and associated image
            rag_response = rag.get_answer_from_gemini(query, [img_path])

            return img_path, rag_response
        except Exception as e:  # Handle errors during the search process
            return f"Error during search: {str(e)}", "--"
    

# Function to create the Gradio user interface
def create_ui():
    app = PDFSearchApp()  # Instantiate the PDFSearchApp class

    with gr.Blocks() as demo:
        state = gr.State(value={"user_uuid": None})  # Initialize session state

        # Header and introduction markdown
        gr.Markdown("# Colpali Milvus Multimodal RAG Demo")
        gr.Markdown(
            "This demo showcases how to use [Colpali](https://github.com/illuin-tech/colpali) embeddings with [Milvus](https://milvus.io/) and utilizing Gemini/OpenAI multimodal RAG for pdf search and Q&A."
        )

        # Upload PDF tab
        with gr.Tab("Upload PDF"):
            with gr.Column():
                # Input for uploading files
                file_input = gr.File(label="Upload PDF")

                # Slider to select the maximum number of pages to index
                max_pages_input = gr.Slider(
                    minimum=1,
                    maximum=50,
                    value=20,
                    step=10,
                    label="Max pages to extract and index"
                )

                # Textbox to display indexing status
                status = gr.Textbox(label="Indexing Status", interactive=False)

        # Query tab for searching documents
        with gr.Tab("Query"):
            with gr.Column():
                # Textbox for entering search queries
                query_input = gr.Textbox(label="Enter query")

                # Button to trigger the search
                search_btn = gr.Button("Query")

                # Textbox to display the response from RAG
                llm_answer = gr.Textbox(label="RAG Response", interactive=False)

                # Image display for the top-matching page
                images = gr.Image(label="Top page matching query")

        # Event handlers to connect UI components with backend functions
        file_input.change(
            fn=app.upload_and_convert,
            inputs=[state, file_input, max_pages_input],
            outputs=[status]
        )

        search_btn.click(
            fn=app.search_documents,
            inputs=[state, query_input],
            outputs=[images, llm_answer]
        )

    return demo  # Return the constructed UI

# Entry point to launch the application
if __name__ == "__main__":
    demo = create_ui()  # Create the Gradio interface
    demo.launch()  # Launch the app