Spaces:
Runtime error
Runtime error
ej68okap
commited on
Commit
Β·
9832882
1
Parent(s):
b61a6a6
new code added
Browse files
app.py
CHANGED
@@ -50,100 +50,100 @@ class PDFSearchApp:
|
|
50 |
return f"Uploaded and extracted {len(pages)} pages"
|
51 |
except Exception as e: # Handle errors during processing
|
52 |
return f"Error processing PDF: {str(e)}"
|
53 |
-
def search_documents(self, state, query, num_results=3): # Set num_results to return more pages
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
|
82 |
-
|
83 |
-
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
|
97 |
-
|
98 |
|
99 |
-
|
100 |
-
|
101 |
|
102 |
-
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
|
109 |
|
110 |
-
#
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
|
129 |
-
|
130 |
-
|
131 |
|
132 |
-
|
133 |
-
|
134 |
|
135 |
-
|
136 |
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
|
141 |
-
|
142 |
-
|
143 |
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
|
148 |
|
149 |
# Function to create the Gradio user interface
|
|
|
50 |
return f"Uploaded and extracted {len(pages)} pages"
|
51 |
except Exception as e: # Handle errors during processing
|
52 |
return f"Error processing PDF: {str(e)}"
|
53 |
+
# def search_documents(self, state, query, num_results=3): # Set num_results to return more pages
|
54 |
+
# """
|
55 |
+
# Search for a query within indexed PDF documents and return multiple matching pages.
|
56 |
+
|
57 |
+
# Args:
|
58 |
+
# state (dict): Session state containing user-specific data.
|
59 |
+
# query (str): The user's search query.
|
60 |
+
# num_results (int): Number of top results to return (default is 3).
|
61 |
+
|
62 |
+
# Returns:
|
63 |
+
# tuple: (list of image paths, RAG response) or an error message if no match is found.
|
64 |
+
# """
|
65 |
+
# print(f"Searching for query: {query}")
|
66 |
+
# id = generate_uuid(state) # Get unique user ID
|
67 |
|
68 |
+
# # Check if the document has been indexed
|
69 |
+
# if not self.indexed_docs.get(id, False):
|
70 |
+
# print("Please index documents first")
|
71 |
+
# return "Please index documents first", None
|
72 |
|
73 |
+
# # Check if a query was provided
|
74 |
+
# if not query:
|
75 |
+
# print("Please enter a search query")
|
76 |
+
# return "Please enter a search query", None
|
77 |
|
78 |
+
# try:
|
79 |
+
# # Initialize Middleware for searching
|
80 |
+
# middleware = Middleware(id, create_collection=False)
|
81 |
|
82 |
+
# # Perform the search and retrieve the top results
|
83 |
+
# search_results = middleware.search([query]) # Returns multiple matches
|
84 |
|
85 |
+
# # Check if there are valid search results
|
86 |
+
# if not search_results or not search_results[0]:
|
87 |
+
# print("No relevant matches found in the PDF")
|
88 |
+
# return "No relevant matches found in the PDF", None
|
89 |
|
90 |
+
# # Extract multiple matching pages (up to num_results)
|
91 |
+
# image_paths = []
|
92 |
+
# for i in range(min(len(search_results[0]), num_results)): # Limit to num_results
|
93 |
+
# page_num = search_results[0][i][1] + 1 # Convert zero-based index to one-based
|
94 |
+
# img_path = f"pages/{id}/page_{page_num}.png"
|
95 |
+
# image_paths.append(img_path)
|
96 |
|
97 |
+
# print(f"Retrieved image paths: {image_paths}")
|
98 |
|
99 |
+
# # Get an answer from the RAG model using multiple images
|
100 |
+
# rag_response = rag.get_answer_from_gemini(query, image_paths)
|
101 |
|
102 |
+
# return image_paths, rag_response # Return multiple image paths and RAG response
|
103 |
|
104 |
+
# except Exception as e:
|
105 |
+
# # Handle and log any errors that occur
|
106 |
+
# print(f"Error during search: {e}")
|
107 |
+
# return f"Error during search: {str(e)}", None
|
108 |
|
109 |
|
110 |
+
# Function to handle search queries within indexed PDFs
|
111 |
+
def search_documents(self, state, query, num_results=1):
|
112 |
+
print(f"Searching for query: {query}")
|
113 |
+
id = generate_uuid(state) # Get unique user ID
|
114 |
|
115 |
+
# Check if the document has been indexed
|
116 |
+
if not self.indexed_docs.get(id, False):
|
117 |
+
print("Please index documents first")
|
118 |
+
return "Please index documents first", "--"
|
119 |
|
120 |
+
# Check if a query was provided
|
121 |
+
if not query:
|
122 |
+
print("Please enter a search query")
|
123 |
+
return "Please enter a search query", "--"
|
124 |
|
125 |
+
try:
|
126 |
+
# Initialize Middleware for searching
|
127 |
+
middleware = Middleware(id, create_collection=False)
|
128 |
|
129 |
+
# Perform the search and retrieve the top result
|
130 |
+
search_results = middleware.search([query])[0]
|
131 |
|
132 |
+
# Extract the page number from the search results
|
133 |
+
page_num = search_results[0][1] + 1
|
134 |
|
135 |
+
print(f"Retrieved page number: {page_num}")
|
136 |
|
137 |
+
# Construct the image path for the retrieved page
|
138 |
+
img_path = f"pages/{id}/page_{page_num}.png"
|
139 |
+
print(f"Retrieved image path: {img_path}")
|
140 |
|
141 |
+
# Get an answer from the RAG model using the query and associated image
|
142 |
+
rag_response = rag.get_answer_from_gemini(query, [img_path])
|
143 |
|
144 |
+
return img_path, rag_response
|
145 |
+
except Exception as e: # Handle errors during the search process
|
146 |
+
return f"Error during search: {str(e)}", "--"
|
147 |
|
148 |
|
149 |
# Function to create the Gradio user interface
|
rag.py
CHANGED
@@ -12,46 +12,46 @@ class Rag:
|
|
12 |
based on user queries and associated images.
|
13 |
"""
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
|
32 |
-
|
33 |
-
|
34 |
|
35 |
-
|
36 |
-
|
37 |
|
38 |
-
|
39 |
-
|
40 |
|
41 |
-
|
42 |
-
|
43 |
|
44 |
-
|
45 |
-
|
46 |
|
47 |
-
|
48 |
|
49 |
-
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
|
56 |
def get_answer_from_openai(self, query: str, imagesPaths: List[str]) -> str:
|
57 |
"""
|
@@ -95,54 +95,54 @@ class Rag:
|
|
95 |
# Handle and log any errors that occur
|
96 |
print(f"An error occurred while querying OpenAI: {e}")
|
97 |
return None
|
98 |
-
def get_answer_from_gemini(self, query: str, imagePaths: List[str]) -> str:
|
99 |
-
|
100 |
-
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
|
115 |
-
|
116 |
-
|
117 |
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
|
126 |
-
|
127 |
-
|
128 |
|
129 |
-
|
130 |
-
|
131 |
|
132 |
-
|
133 |
-
|
134 |
|
135 |
-
|
136 |
-
|
137 |
|
138 |
-
|
139 |
|
140 |
-
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
|
147 |
def __get_openai_api_payload(self, query: str, imagesPaths: List[str]) -> dict:
|
148 |
"""
|
|
|
12 |
based on user queries and associated images.
|
13 |
"""
|
14 |
|
15 |
+
def get_answer_from_gemini(self, query: str, imagePaths: List[str]) -> str:
|
16 |
+
"""
|
17 |
+
Query the Gemini model with a text query and associated images.
|
18 |
|
19 |
+
Args:
|
20 |
+
query (str): The user's query.
|
21 |
+
imagePaths (List[str]): List of file paths to images.
|
22 |
|
23 |
+
Returns:
|
24 |
+
str: The response text from the Gemini model.
|
25 |
+
"""
|
26 |
+
print(f"Querying Gemini for query={query}, imagePaths={imagePaths}")
|
27 |
|
28 |
+
try:
|
29 |
+
# Configure the Gemini API client using the API key from environment variables
|
30 |
+
genai.configure(api_key=os.environ['GEMINI_API_KEY'])
|
31 |
|
32 |
+
# Initialize the Gemini generative model
|
33 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
34 |
|
35 |
+
# Load images from the given paths
|
36 |
+
images = [Image.open(path) for path in imagePaths]
|
37 |
|
38 |
+
# Start a new chat session
|
39 |
+
chat = model.start_chat()
|
40 |
|
41 |
+
# Send the query and images to the model
|
42 |
+
response = chat.send_message([*images, query])
|
43 |
|
44 |
+
# Extract the response text
|
45 |
+
answer = response.text
|
46 |
|
47 |
+
print(answer) # Log the answer
|
48 |
|
49 |
+
return answer
|
50 |
|
51 |
+
except Exception as e:
|
52 |
+
# Handle and log any errors that occur
|
53 |
+
print(f"An error occurred while querying Gemini: {e}")
|
54 |
+
return f"Error: {str(e)}"
|
55 |
|
56 |
def get_answer_from_openai(self, query: str, imagesPaths: List[str]) -> str:
|
57 |
"""
|
|
|
95 |
# Handle and log any errors that occur
|
96 |
print(f"An error occurred while querying OpenAI: {e}")
|
97 |
return None
|
98 |
+
# def get_answer_from_gemini(self, query: str, imagePaths: List[str]) -> str:
|
99 |
+
# """
|
100 |
+
# Query the Gemini model with a text query and associated images.
|
101 |
|
102 |
+
# Args:
|
103 |
+
# query (str): The user's query.
|
104 |
+
# imagePaths (List[str]): List of file paths to images.
|
105 |
|
106 |
+
# Returns:
|
107 |
+
# str: The response text from the Gemini model.
|
108 |
+
# """
|
109 |
+
# print(f"Querying Gemini for query={query}, imagePaths={imagePaths}")
|
110 |
|
111 |
+
# try:
|
112 |
+
# # Configure the Gemini API client using the API key from environment variables
|
113 |
+
# genai.configure(api_key=os.environ['GEMINI_API_KEY'])
|
114 |
|
115 |
+
# # Initialize the Gemini generative model
|
116 |
+
# model = genai.GenerativeModel('gemini-1.5-flash')
|
117 |
|
118 |
+
# # Load images from the given paths (skip missing files)
|
119 |
+
# images = []
|
120 |
+
# for path in imagePaths:
|
121 |
+
# if os.path.exists(path):
|
122 |
+
# images.append(Image.open(path))
|
123 |
+
# else:
|
124 |
+
# print(f"Warning: Image not found {path}, skipping.")
|
125 |
|
126 |
+
# # Start a new chat session
|
127 |
+
# chat = model.start_chat()
|
128 |
|
129 |
+
# # Construct the input for the model (handle cases with and without images)
|
130 |
+
# input_data = [query] if not images else [*images, query]
|
131 |
|
132 |
+
# # Send the query (and images, if any) to the model
|
133 |
+
# response = chat.send_message(input_data)
|
134 |
|
135 |
+
# # Extract the response text
|
136 |
+
# answer = response.text
|
137 |
|
138 |
+
# print(answer) # Log the answer
|
139 |
|
140 |
+
# return answer
|
141 |
|
142 |
+
# except Exception as e:
|
143 |
+
# # Handle and log any errors that occur
|
144 |
+
# print(f"An error occurred while querying Gemini: {e}")
|
145 |
+
# return f"Error: {str(e)}"
|
146 |
|
147 |
def __get_openai_api_payload(self, query: str, imagesPaths: List[str]) -> dict:
|
148 |
"""
|