import os import gradio as gr import asyncio from langchain_core.prompts import PromptTemplate from langchain_community.output_parsers.rail_parser import GuardrailsOutputParser from langchain_community.document_loaders import PyPDFLoader from langchain_google_genai import ChatGoogleGenerativeAI import google.generativeai as genai from langchain.chains.question_answering import load_qa_chain # Import load_qa_chain async def initialize(file_path, question): genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) model = genai.GenerativeModel('gemini-pro') model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3) # Refined prompt template to encourage precise and concise answers prompt_template = """Answer the question precisely and concisely using the provided context. Avoid any additional commentary or system messages. If the answer is not contained in the context, respond with "answer not available in context". Context: {context} Question: {question} Answer: """ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) if os.path.exists(file_path): pdf_loader = PyPDFLoader(file_path) pages = pdf_loader.load_and_split() context = "\n".join(f"Page {i+1}: {page.page_content}" for i, page in enumerate(pages[:30])) stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) # Use ainvoke to get the result stuff_answer = await stuff_chain.ainvoke({"input_documents": pages, "question": question, "context": context}) # Access the correct key for the answer answer = stuff_answer.get('output_text', '').strip() # Extract the page number where the context was found sources = [] for i, page in enumerate(pages): if question.lower() in page.page_content.lower(): sources.append(f"Page {i+1}") if sources: source_str = f" (Source: {', '.join(sources)})" else: source_str = " (Source: Not found in specific page)" # Add the clickable link to the source file_name = os.path.basename(file_path) source_link = f"[{file_name}](file://{os.path.abspath(file_path)})" return f"{answer} {source_str} - [Document: {source_link}]" else: return "Error: Unable to process the document. Please ensure the PDF file is valid." # Define Gradio Interface input_file = gr.File(label="Upload PDF File") input_question = gr.Textbox(label="Ask about the document") output_text = gr.Textbox(label="Answer - GeminiPro") async def pdf_qa(file, question): answer = await initialize(file.name, question) return answer # Create Gradio Interface with share=True to enable a public link gr.Interface(fn=pdf_qa, inputs=[input_file, input_question], outputs=output_text, title="PDF Question Answering System", description="Upload a PDF file and ask questions about the content.").launch(share=True)