Spaces:

saritha
/

RAG_with_page_index_gemini

Sleeping

saritha commited on Aug 11, 2024

Commit

ea1c14e

verified ·

1 Parent(s): b177750

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ async def initialize(file_path, question):
         pages = pdf_loader.load_and_split()
         # Extract content from each page and store along with page number
-        page_contexts = [f"Page {i+1}: {page.page_content}" for i, page in enumerate(pages)]
         context = "\n".join(page_contexts[:30])  # Using the first 30 pages for context
         # Load the question-answering chain
@@ -52,17 +52,13 @@ async def initialize(file_path, question):
                 if phrase.lower() in page.page_content.lower():
                     page_scores[i] += 1
-        # Determine the maximum score and get top pages
-        max_score = max(page_scores)
-        top_pages = [i+1 for i, score in enumerate(page_scores) if score == max_score]
-        # Limit to the top 2 pages
-        num_top_pages = 2
-        top_pages = sorted(top_pages)[:num_top_pages]
         # Generate links for each top page
         file_name = os.path.basename(file_path)
-        # Use a general link with instructions for manual navigation if automatic links are not supported
         page_links = [f"[Page {p}](file://{os.path.abspath(file_path)})" for p in top_pages]
         page_links_str = ', '.join(page_links)

         pages = pdf_loader.load_and_split()
         # Extract content from each page and store along with page number
+        page_contexts = [page.page_content for i, page in enumerate(pages)]
         context = "\n".join(page_contexts[:30])  # Using the first 30 pages for context
         # Load the question-answering chain
                 if phrase.lower() in page.page_content.lower():
                     page_scores[i] += 1
+        # Determine the top pages based on highest scores
+        top_pages_with_scores = sorted(enumerate(page_scores), key=lambda x: x[1], reverse=True)
+        top_pages = [i + 1 for i, score in top_pages_with_scores if score > 0][:2]  # Get top 2 pages
         # Generate links for each top page
         file_name = os.path.basename(file_path)
+        # Use a general link format with instructions for manual navigation if automatic links are not supported
         page_links = [f"[Page {p}](file://{os.path.abspath(file_path)})" for p in top_pages]
         page_links_str = ', '.join(page_links)