Spaces:

ultron1996
/

multimodal_rag

Runtime error

multimodal_rag / app.py

ej68okap

new code added

241c492 5 months ago

8.28 kB

	import gradio as gr
	import tempfile
	import os
	import fitz # PyMuPDF for working with PDF files
	import uuid

	# Importing middleware and RAG (Retrieval-Augmented Generation) components
	from middleware import Middleware
	from rag import Rag

	rag = Rag() # Initializing RAG for question-answering functionality

	# Function to generate a unique UUID for each user session
	def generate_uuid(state):
	# Check if UUID already exists in session state
	if state["user_uuid"] is None:
	# Generate a new UUID if not already set
	state["user_uuid"] = str(uuid.uuid4())
	return state["user_uuid"]


	class PDFSearchApp:
	"""Class to manage PDF upload, indexing, and querying."""

	def __init__(self):
	self.indexed_docs = {} # Dictionary to track indexed documents by user ID
	self.current_pdf = None # Store the currently processed PDF

	# Function to handle file uploads and convert PDFs into searchable data
	def upload_and_convert(self, state, file, max_pages):
	id = generate_uuid(state) # Get unique user ID

	if file is None: # Check if a file was uploaded
	return "No file uploaded"

	print(f"Uploading file: {file.name}, id: {id}")

	try:
	self.current_pdf = file.name # Store the name of the uploaded file

	# Initialize Middleware for indexing the PDF content
	middleware = Middleware(id, create_collection=True)

	# Index the specified number of pages from the PDF
	pages = middleware.index(pdf_path=file.name, id=id, max_pages=max_pages)

	# Mark the document as indexed for this user
	self.indexed_docs[id] = True

	return f"Uploaded and extracted {len(pages)} pages"
	except Exception as e: # Handle errors during processing
	return f"Error processing PDF: {str(e)}"
	def search_documents(self, state, query, num_results=3): # Set num_results to return more pages
	"""
	Search for a query within indexed PDF documents and return multiple matching pages.

	Args:
	state (dict): Session state containing user-specific data.
	query (str): The user's search query.
	num_results (int): Number of top results to return (default is 3).

	Returns:
	tuple: (list of image paths, RAG response) or an error message if no match is found.
	"""
	print(f"Searching for query: {query}")
	id = generate_uuid(state) # Get unique user ID

	# Check if the document has been indexed
	if not self.indexed_docs.get(id, False):
	print("Please index documents first")
	return "Please index documents first", None

	# Check if a query was provided
	if not query:
	print("Please enter a search query")
	return "Please enter a search query", None

	try:
	# Initialize Middleware for searching
	middleware = Middleware(id, create_collection=False)

	# Perform the search and retrieve the top results
	search_results = middleware.search([query]) # Returns multiple matches

	# Check if there are valid search results
	if not search_results or not search_results[0]:
	print("No relevant matches found in the PDF")
	return "No relevant matches found in the PDF", None

	# Extract multiple matching pages (up to num_results)
	image_paths = []
	for i in range(min(len(search_results[0]), num_results)): # Limit to num_results
	page_num = search_results[0][i][1] + 1 # Convert zero-based index to one-based
	img_path = f"pages/{id}/page_{page_num}.png"
	image_paths.append(img_path)

	print(f"Retrieved image paths: {image_paths}")

	# Get an answer from the RAG model using multiple images
	rag_response = rag.get_answer_from_gemini(query, image_paths)

	return image_paths, rag_response # Return multiple image paths and RAG response

	except Exception as e:
	# Handle and log any errors that occur
	print(f"Error during search: {e}")
	return f"Error during search: {str(e)}", None


	# # Function to handle search queries within indexed PDFs
	# def search_documents(self, state, query, num_results=1):
	# print(f"Searching for query: {query}")
	# id = generate_uuid(state) # Get unique user ID

	# # Check if the document has been indexed
	# if not self.indexed_docs.get(id, False):
	# print("Please index documents first")
	# return "Please index documents first", "--"

	# # Check if a query was provided
	# if not query:
	# print("Please enter a search query")
	# return "Please enter a search query", "--"

	# try:
	# # Initialize Middleware for searching
	# middleware = Middleware(id, create_collection=False)

	# # Perform the search and retrieve the top result
	# search_results = middleware.search([query])[0]

	# # Extract the page number from the search results
	# page_num = search_results[0][1] + 1

	# print(f"Retrieved page number: {page_num}")

	# # Construct the image path for the retrieved page
	# img_path = f"pages/{id}/page_{page_num}.png"
	# print(f"Retrieved image path: {img_path}")

	# # Get an answer from the RAG model using the query and associated image
	# rag_response = rag.get_answer_from_gemini(query, [img_path])

	# return img_path, rag_response
	# except Exception as e: # Handle errors during the search process
	# return f"Error during search: {str(e)}", "--"


	# Function to create the Gradio user interface
	def create_ui():
	app = PDFSearchApp() # Instantiate the PDFSearchApp class

	with gr.Blocks() as demo:
	state = gr.State(value={"user_uuid": None}) # Initialize session state

	# Header and introduction markdown
	gr.Markdown("# Colpali Milvus Multimodal RAG Demo")
	gr.Markdown(
	"This demo showcases how to use [Colpali](https://github.com/illuin-tech/colpali) embeddings with [Milvus](https://milvus.io/) and utilizing Gemini/OpenAI multimodal RAG for pdf search and Q&A."
	)

	# Upload PDF tab
	with gr.Tab("Upload PDF"):
	with gr.Column():
	# Input for uploading files
	file_input = gr.File(label="Upload PDF")

	# Slider to select the maximum number of pages to index
	max_pages_input = gr.Slider(
	minimum=1,
	maximum=50,
	value=20,
	step=10,
	label="Max pages to extract and index"
	)

	# Textbox to display indexing status
	status = gr.Textbox(label="Indexing Status", interactive=False)

	# Query tab for searching documents
	with gr.Tab("Query"):
	with gr.Column():
	# Textbox for entering search queries
	query_input = gr.Textbox(label="Enter query")

	# Button to trigger the search
	search_btn = gr.Button("Query")

	# Textbox to display the response from RAG
	llm_answer = gr.Textbox(label="RAG Response", interactive=False)

	# Image display for the top-matching page
	images = gr.Image(label="Top page matching query")

	# Event handlers to connect UI components with backend functions
	file_input.change(
	fn=app.upload_and_convert,
	inputs=[state, file_input, max_pages_input],
	outputs=[status]
	)

	search_btn.click(
	fn=app.search_documents,
	inputs=[state, query_input],
	outputs=[images, llm_answer]
	)

	return demo # Return the constructed UI

	# Entry point to launch the application
	if __name__ == "__main__":
	demo = create_ui() # Create the Gradio interface
	demo.launch() # Launch the app