Spaces:

saritha
/

RAG_with_page_index_gemini

Sleeping

App Files Files Community

saritha commited on Aug 11, 2024

Commit

f32ba7f

verified ·

1 Parent(s): fd9a79e

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -10

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ async def initialize(file_path, question):
     model = genai.GenerativeModel('gemini-pro')
     model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
-    # Refined prompt template to encourage precise and concise answers
     prompt_template = """Answer the question precisely and concisely using the provided context. Avoid any additional commentary or system messages.
                           If the answer is not contained in the context, respond with "answer not available in context".
@@ -30,20 +30,22 @@ async def initialize(file_path, question):
     if os.path.exists(file_path):
         pdf_loader = PyPDFLoader(file_path)
         pages = pdf_loader.load_and_split()
-        page_contexts = [f"Page {i+1}: {page.page_content}" for i, page in enumerate(pages[:30])]
-        context = "\n".join(page_contexts)
         stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
-        # Use ainvoke to get the result
         stuff_answer = await stuff_chain.ainvoke({"input_documents": pages, "question": question, "context": context})
-        # Access the correct key for the answer
         answer = stuff_answer.get('output_text', '').strip()
-        # Find the most relevant pages by searching for content overlap with the answer
         relevant_pages = []
         for i, page in enumerate(pages):
-            if any(phrase in page.page_content for phrase in answer.split()):
                 relevant_pages.append(f"Page {i+1}")
         if relevant_pages:
@@ -51,7 +53,7 @@ async def initialize(file_path, question):
         else:
             source_str = " (Source: Not found in specific page)"
-        # Add the clickable link to the source
         file_name = os.path.basename(file_path)
         source_link = f"[{file_name}](file://{os.path.abspath(file_path)})"
         return f"{answer} {source_str} - [Document: {source_link}]"

     model = genai.GenerativeModel('gemini-pro')
     model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
+    # Prompt template for precise answers
     prompt_template = """Answer the question precisely and concisely using the provided context. Avoid any additional commentary or system messages.
                           If the answer is not contained in the context, respond with "answer not available in context".
     if os.path.exists(file_path):
         pdf_loader = PyPDFLoader(file_path)
         pages = pdf_loader.load_and_split()
+        # Extract content from each page and store along with page number
+        page_contexts = [f"Page {i+1}: {page.page_content}" for i, page in enumerate(pages)]
+        context = "\n".join(page_contexts[:30])  # Using the first 30 pages for context
+        # Load the question-answering chain
         stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
+        # Get the answer from the model
         stuff_answer = await stuff_chain.ainvoke({"input_documents": pages, "question": question, "context": context})
         answer = stuff_answer.get('output_text', '').strip()
+        # Identify the pages that contain the answer
         relevant_pages = []
         for i, page in enumerate(pages):
+            if answer.lower() in page.page_content.lower():
                 relevant_pages.append(f"Page {i+1}")
         if relevant_pages:
         else:
             source_str = " (Source: Not found in specific page)"
+        # Create a clickable link for the document
         file_name = os.path.basename(file_path)
         source_link = f"[{file_name}](file://{os.path.abspath(file_path)})"
         return f"{answer} {source_str} - [Document: {source_link}]"