Spaces:
Sleeping
Sleeping
File size: 5,434 Bytes
ac04873 6f97d1c ac04873 633c443 6f97d1c 633c443 6f97d1c ac04873 633c443 ac04873 68f99dc 9105bd6 9e1463b 9105bd6 ac04873 9105bd6 ac04873 f32ba7f 34be9dd 001d160 f32ba7f ac04873 c12a4ac 633c443 f32ba7f 633c443 84df10e f32ba7f f8c8ec1 dfa9f23 ac04873 f8c8ec1 dfa9f23 34be9dd 68f99dc 34be9dd 68f99dc dfa9f23 001d160 68f99dc ac04873 001d160 ac04873 f32ba7f 68f99dc 3896ff0 6f97d1c 633c443 68f99dc ac04873 6f97d1c ac04873 6f97d1c ac04873 f8c8ec1 ac04873 6f97d1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import os
import gradio as gr
import asyncio
from datetime import datetime
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_google_genai import ChatGoogleGenerativeAI
import google.generativeai as genai
from langchain.chains.question_answering import load_qa_chain # Import load_qa_chain
# Initialize an empty list to store chat history and context
chat_history = []
context_history = ""
async def initialize(file_path, question):
global context_history
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
model = genai.GenerativeModel('gemini-pro')
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
# Refined prompt template to encourage precise and concise answers
prompt_template = """Answer the question precisely and concisely using the provided context. Avoid any additional commentary or system messages.
If the answer is not contained in the context, respond with "answer not available in context".
Context:
{context}
Question:
{question}
Answer:
"""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
if os.path.exists(file_path):
pdf_loader = PyPDFLoader(file_path)
pages = pdf_loader.load_and_split()
# Extract content from each page and store along with page number
page_contexts = [page.page_content for i, page in enumerate(pages)]
context = "\n".join(page_contexts[:30]) # Using the first 30 pages for context
# Load the question-answering chain
stuff_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
# Combine previous context with the new context
combined_context = context_history + "\n" + context
# Get the answer from the model
stuff_answer = await stuff_chain.ainvoke({"input_documents": pages, "question": question, "context": combined_context})
answer = stuff_answer.get('output_text', '').strip()
# Identify key sentences or phrases
key_phrases = answer.split(". ") # Split answer into sentences for more precise matching
# Score each page based on the presence of key phrases
page_scores = [0] * len(pages)
for i, page in enumerate(pages):
for phrase in key_phrases:
if phrase.lower() in page.page_content.lower():
page_scores[i] += 1
# Determine the top pages based on highest scores
top_pages_with_scores = sorted(enumerate(page_scores), key=lambda x: x[1], reverse=True)
top_pages = [i + 1 for i, score in top_pages_with_scores if score > 0][:2] # Get top 2 pages
# Generate links for each top page
file_name = os.path.basename(file_path)
page_links = [f"[Page {p}](file://{os.path.abspath(file_path)})" for p in top_pages]
page_links_str = ', '.join(page_links)
if top_pages:
source_str = f"Top relevant page(s): {page_links_str}"
else:
source_str = "Top relevant page(s): Not found in specific page"
# Create a clickable link for the document
source_link = f"[Document: {file_name}](file://{os.path.abspath(file_path)})"
# Save interaction to chat history
timestamp = datetime.now().isoformat()
chat_history.append({
'timestamp': timestamp,
'question': question,
'answer': answer,
'source': source_str,
'document_link': source_link
})
# Update context history
context_history += f"\nQ: {question}\nA: {answer}"
return f"Answer: {answer}\n{source_str}\n{source_link}"
else:
return "Error: Unable to process the document. Please ensure the PDF file is valid."
# Define Gradio Interface for QA and Chat History
input_file = gr.File(label="Upload PDF File")
input_question = gr.Textbox(label="Ask about the document")
output_text = gr.Textbox(label="Answer and Top Pages", lines=10, max_lines=10)
def get_chat_history():
history_str = "\n".join([f"Q: {entry['question']}\nA: {entry['answer']}\n{entry['source']}\n{entry['document_link']}\nTimestamp: {entry['timestamp']}\n" for entry in chat_history])
return history_str
async def pdf_qa(file, question):
if file is None:
return "Error: No file uploaded. Please upload a PDF document."
answer = await initialize(file.name, question)
return answer
# Create Gradio Interfaces
qa_interface = gr.Interface(
fn=pdf_qa,
inputs=[input_file, input_question],
outputs=output_text,
title="PDF Question Answering System",
description="Upload a PDF file and ask questions about the content."
)
history_interface = gr.Interface(
fn=get_chat_history,
inputs=[],
outputs=gr.Textbox(label="Chat History", lines=20, max_lines=20),
title="Chat History",
description="View the history of interactions."
)
# Launch both interfaces
qa_interface.launch(share=True)
history_interface.launch(share=True)
|