Spaces:

saritha
/

RAG_with_page_index_gemini

Sleeping

saritha commited on Aug 11, 2024

Commit

fd9a79e

verified ·

1 Parent(s): 84df10e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ from langchain_google_genai import ChatGoogleGenerativeAI
 import google.generativeai as genai
 from langchain.chains.question_answering import load_qa_chain  # Import load_qa_chain
 async def initialize(file_path, question):
     genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
     model = genai.GenerativeModel('gemini-pro')
@@ -31,7 +30,8 @@ async def initialize(file_path, question):
     if os.path.exists(file_path):
         pdf_loader = PyPDFLoader(file_path)
         pages = pdf_loader.load_and_split()
-        context = "\n".join(f"Page {i+1}: {page.page_content}" for i, page in enumerate(pages[:30]))
         stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
         # Use ainvoke to get the result
@@ -40,14 +40,14 @@ async def initialize(file_path, question):
         # Access the correct key for the answer
         answer = stuff_answer.get('output_text', '').strip()
-        # Extract the page number where the context was found
-        sources = []
         for i, page in enumerate(pages):
-            if question.lower() in page.page_content.lower():
-                sources.append(f"Page {i+1}")
-        if sources:
-            source_str = f" (Source: {', '.join(sources)})"
         else:
             source_str = " (Source: Not found in specific page)"

 import google.generativeai as genai
 from langchain.chains.question_answering import load_qa_chain  # Import load_qa_chain
 async def initialize(file_path, question):
     genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
     model = genai.GenerativeModel('gemini-pro')
     if os.path.exists(file_path):
         pdf_loader = PyPDFLoader(file_path)
         pages = pdf_loader.load_and_split()
+        page_contexts = [f"Page {i+1}: {page.page_content}" for i, page in enumerate(pages[:30])]
+        context = "\n".join(page_contexts)
         stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
         # Use ainvoke to get the result
         # Access the correct key for the answer
         answer = stuff_answer.get('output_text', '').strip()
+        # Find the most relevant pages by searching for content overlap with the answer
+        relevant_pages = []
         for i, page in enumerate(pages):
+            if any(phrase in page.page_content for phrase in answer.split()):
+                relevant_pages.append(f"Page {i+1}")
+        if relevant_pages:
+            source_str = f" (Source: {', '.join(relevant_pages)})"
         else:
             source_str = " (Source: Not found in specific page)"