File size: 8,308 Bytes
273089c
 
 
241c492
273089c
 
241c492
273089c
 
 
241c492
273089c
241c492
273089c
 
 
 
 
 
 
 
 
241c492
 
273089c
241c492
 
 
 
273089c
241c492
273089c
241c492
273089c
 
 
241c492
273089c
241c492
273089c
241c492
273089c
 
241c492
273089c
 
241c492
273089c
241c492
273089c
241c492
273089c
9832882
 
 
 
 
 
 
 
 
 
 
 
 
 
241c492
9832882
 
 
 
241c492
9832882
 
 
 
273089c
9832882
 
 
273089c
9832882
 
273089c
9832882
 
 
 
273089c
9832882
 
 
 
 
 
273089c
9832882
273089c
9832882
 
241c492
9832882
273089c
9832882
 
 
 
273089c
241c492
9832882
 
 
 
241c492
9832882
 
 
 
241c492
9832882
 
 
 
241c492
9832882
 
 
241c492
9832882
 
241c492
9832882
 
241c492
9832882
241c492
9832882
 
 
241c492
9832882
 
241c492
9832882
 
 
273089c
241c492
 
 
 
 
273089c
241c492
273089c
241c492
273089c
241c492
 
 
 
 
273089c
 
241c492
273089c
241c492
 
273089c
 
 
 
 
 
 
241c492
 
273089c
241c492
 
273089c
 
241c492
273089c
241c492
 
273089c
241c492
 
273089c
241c492
 
273089c
241c492
 
273089c
 
 
 
 
241c492
273089c
 
 
 
 
 
241c492
 
 
273089c
241c492
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import gradio as gr
import tempfile
import os
import fitz  # PyMuPDF for working with PDF files
import uuid

# Importing middleware and RAG (Retrieval-Augmented Generation) components
from middleware import Middleware
from rag import Rag

rag = Rag()  # Initializing RAG for question-answering functionality

# Function to generate a unique UUID for each user session
def generate_uuid(state):
    # Check if UUID already exists in session state
    if state["user_uuid"] is None:
        # Generate a new UUID if not already set
        state["user_uuid"] = str(uuid.uuid4())
    return state["user_uuid"]


class PDFSearchApp:
    """Class to manage PDF upload, indexing, and querying."""

    def __init__(self):
        self.indexed_docs = {}  # Dictionary to track indexed documents by user ID
        self.current_pdf = None  # Store the currently processed PDF

    # Function to handle file uploads and convert PDFs into searchable data
    def upload_and_convert(self, state, file, max_pages):
        id = generate_uuid(state)  # Get unique user ID

        if file is None:  # Check if a file was uploaded
            return "No file uploaded"

        print(f"Uploading file: {file.name}, id: {id}")

        try:
            self.current_pdf = file.name  # Store the name of the uploaded file

            # Initialize Middleware for indexing the PDF content
            middleware = Middleware(id, create_collection=True)

            # Index the specified number of pages from the PDF
            pages = middleware.index(pdf_path=file.name, id=id, max_pages=max_pages)

            # Mark the document as indexed for this user
            self.indexed_docs[id] = True

            return f"Uploaded and extracted {len(pages)} pages"
        except Exception as e:  # Handle errors during processing
            return f"Error processing PDF: {str(e)}"
    # def search_documents(self, state, query, num_results=3):  # Set num_results to return more pages
    #     """
    #     Search for a query within indexed PDF documents and return multiple matching pages.

    #     Args:
    #         state (dict): Session state containing user-specific data.
    #         query (str): The user's search query.
    #         num_results (int): Number of top results to return (default is 3).

    #     Returns:
    #         tuple: (list of image paths, RAG response) or an error message if no match is found.
    #     """
    #     print(f"Searching for query: {query}")
    #     id = generate_uuid(state)  # Get unique user ID

    #     # Check if the document has been indexed
    #     if not self.indexed_docs.get(id, False):
    #         print("Please index documents first")
    #         return "Please index documents first", None

    #     # Check if a query was provided
    #     if not query:
    #         print("Please enter a search query")
    #         return "Please enter a search query", None

    #     try:
    #         # Initialize Middleware for searching
    #         middleware = Middleware(id, create_collection=False)

    #         # Perform the search and retrieve the top results
    #         search_results = middleware.search([query])  # Returns multiple matches

    #         # Check if there are valid search results
    #         if not search_results or not search_results[0]:
    #             print("No relevant matches found in the PDF")
    #             return "No relevant matches found in the PDF", None

    #         # Extract multiple matching pages (up to num_results)
    #         image_paths = []
    #         for i in range(min(len(search_results[0]), num_results)):  # Limit to num_results
    #             page_num = search_results[0][i][1] + 1  # Convert zero-based index to one-based
    #             img_path = f"pages/{id}/page_{page_num}.png"
    #             image_paths.append(img_path)

    #         print(f"Retrieved image paths: {image_paths}")

    #         # Get an answer from the RAG model using multiple images
    #         rag_response = rag.get_answer_from_gemini(query, image_paths)

    #         return image_paths, rag_response  # Return multiple image paths and RAG response

    #     except Exception as e:
    #         # Handle and log any errors that occur
    #         print(f"Error during search: {e}")
    #         return f"Error during search: {str(e)}", None


    # Function to handle search queries within indexed PDFs
    def search_documents(self, state, query, num_results=1):
        print(f"Searching for query: {query}")
        id = generate_uuid(state)  # Get unique user ID

        # Check if the document has been indexed
        if not self.indexed_docs.get(id, False):
            print("Please index documents first")
            return "Please index documents first", "--"

        # Check if a query was provided
        if not query:
            print("Please enter a search query")
            return "Please enter a search query", "--"

        try:
            # Initialize Middleware for searching
            middleware = Middleware(id, create_collection=False)

            # Perform the search and retrieve the top result
            search_results = middleware.search([query])[0]

            # Extract the page number from the search results
            page_num = search_results[0][1] + 1

            print(f"Retrieved page number: {page_num}")

            # Construct the image path for the retrieved page
            img_path = f"pages/{id}/page_{page_num}.png"
            print(f"Retrieved image path: {img_path}")

            # Get an answer from the RAG model using the query and associated image
            rag_response = rag.get_answer_from_gemini(query, [img_path])

            return img_path, rag_response
        except Exception as e:  # Handle errors during the search process
            return f"Error during search: {str(e)}", "--"
    

# Function to create the Gradio user interface
def create_ui():
    app = PDFSearchApp()  # Instantiate the PDFSearchApp class

    with gr.Blocks() as demo:
        state = gr.State(value={"user_uuid": None})  # Initialize session state

        # Header and introduction markdown
        gr.Markdown("# Colpali Milvus Multimodal RAG Demo")
        gr.Markdown(
            "This demo showcases how to use [Colpali](https://github.com/illuin-tech/colpali) embeddings with [Milvus](https://milvus.io/) and utilizing Gemini/OpenAI multimodal RAG for pdf search and Q&A."
        )

        # Upload PDF tab
        with gr.Tab("Upload PDF"):
            with gr.Column():
                # Input for uploading files
                file_input = gr.File(label="Upload PDF")

                # Slider to select the maximum number of pages to index
                max_pages_input = gr.Slider(
                    minimum=1,
                    maximum=50,
                    value=20,
                    step=10,
                    label="Max pages to extract and index"
                )

                # Textbox to display indexing status
                status = gr.Textbox(label="Indexing Status", interactive=False)

        # Query tab for searching documents
        with gr.Tab("Query"):
            with gr.Column():
                # Textbox for entering search queries
                query_input = gr.Textbox(label="Enter query")

                # Button to trigger the search
                search_btn = gr.Button("Query")

                # Textbox to display the response from RAG
                llm_answer = gr.Textbox(label="RAG Response", interactive=False)

                # Image display for the top-matching page
                images = gr.Image(label="Top page matching query")

        # Event handlers to connect UI components with backend functions
        file_input.change(
            fn=app.upload_and_convert,
            inputs=[state, file_input, max_pages_input],
            outputs=[status]
        )

        search_btn.click(
            fn=app.search_documents,
            inputs=[state, query_input],
            outputs=[images, llm_answer]
        )

    return demo  # Return the constructed UI

# Entry point to launch the application
if __name__ == "__main__":
    demo = create_ui()  # Create the Gradio interface
    demo.launch()  # Launch the app