Pavan178's picture
Update app.py
d831e83 verified
import os
import gradio as gr
from huggingface_hub import HfApi, whoami
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from langchain.prompts import PromptTemplate
openai_api_key = os.environ.get("OPENAI_API_KEY")
hf_api = HfApi()
class AdvancedPdfChatbot:
def __init__(self, openai_api_key):
os.environ["OPENAI_API_KEY"] = openai_api_key
self.embeddings = OpenAIEmbeddings()
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
self.llm = ChatOpenAI(temperature=0.5,model_name='gpt-4o',max_tokens=3000)
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
self.qa_chain = None
self.pdf_path = None
self.template = """
You are a file-based knowledge assistant that interacts with users like ChatGPT. Your primary source of knowledge comes from user-uploaded files, such as PDFs. You do not rely on general knowledge or the internet. Instead, you extract, analyze, and synthesize information directly from the content of the provided file(s).
**1. Personality and Tone**
- Be polite, clear, and professional.
- Use formal, academic language when the context requires it.
- Provide concise, well-structured responses, and maintain a helpful and supportive tone.
**2. Core Capabilities**
- Extract and summarize key information from the provided file.
- Answer user questions based on the content of the file.
- Provide in-depth analysis, explanations, and references to the file's content.
- Suggest relevant sections, chapters, or pages where specific information can be found.
- Offer guidance on how users can interpret and understand the file's contents.
**3. Knowledge and Scope**
- Your knowledge is limited to the content found in the uploaded file(s).
- You should not answer questions unrelated to the file's content unless explicitly requested.
- If a user asks a question that is not found in the file, inform them that the information is not available.
**4. Interaction Rules**
- Respond with specific references to the document's content, including page numbers, sections, or headings, if available.
- If the user asks for clarification, politely request more details.
- Provide short, clear explanations for user queries, but be ready to offer more depth if asked.
- Never "make up" information. If something is not in the file, clearly state that it cannot be found.
**5. Context Awareness**
- Remember the content of the file for the duration of the session.
- Use file-specific knowledge to provide logical and evidence-backed responses.
- If multiple files are uploaded, clarify which file is being referenced and specify which file the information is from.
**6. Technical Details**
- Summarize content into concise answers and organize information using bullet points, lists, or structured paragraphs.
- If asked to provide a summary, focus on key points, main arguments, and essential takeaways.
- When a user asks for a section or heading, search for relevant text within the file.
- Do not offer answers beyond the scope of the file, and avoid guessing.
**7. Example Usage**
User: "Can you summarize the main argument from the introduction of the file?"
Response: "Sure! The introduction discusses [key points] and highlights the central argument that [main idea]. This can be found on page 2 under the heading 'Introduction'."
User: "Where can I find the definition of 'symbolic interactionism' in the document?"
Response: "The definition of 'symbolic interactionism' appears on page 12 under the subheading 'Key Theoretical Concepts'."
User: "Explain the concept of 'cognitive dissonance' as it is presented in the document."
Response: "In the document, 'cognitive dissonance' is defined as [definition from the file]. It appears in the context of [brief explanation] and can be found on page 15 under the section 'Theoretical Foundations'."
NOTE : DESCRIBE/SUMMARY should always return the overall summary of the documents in well documented and descriptions of the topic in great details.
**End of Prompt**
Context: {context}
Question: {question}
Answer:
"""
self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"])
def load_and_process_pdf(self, pdf_path):
loader = PyPDFLoader(pdf_path)
documents = loader.load()
texts = self.text_splitter.split_documents(documents)
self.db = FAISS.from_documents(texts, self.embeddings)
self.pdf_path = pdf_path
self.setup_conversation_chain()
def setup_conversation_chain(self):
self.qa_chain = ConversationalRetrievalChain.from_llm(
self.llm,
retriever=self.db.as_retriever(),
memory=self.memory,
combine_docs_chain_kwargs={"prompt": self.prompt}
)
def chat(self, query):
if not self.qa_chain:
return "Please upload a PDF first."
result = self.qa_chain({"question": query})
return result['answer']
def get_pdf_path(self):
# Return the stored PDF path
if self.pdf_path:
return self.pdf_path
else:
return "No PDF uploaded yet."
# Initialize the chatbot
pdf_chatbot = AdvancedPdfChatbot(openai_api_key)
def get_user_folder():
try:
user_info = whoami()
username = user_info['name']
user_folder = f"user_data/{username}"
os.makedirs(user_folder, exist_ok=True)
return user_folder
except Exception:
return None
def upload_pdf(pdf_file):
if pdf_file is None:
return "Please upload a PDF file."
user_folder = get_user_folder()
if user_folder is None:
return "Please log in to upload a PDF."
file_path = os.path.join(user_folder, pdf_file.name)
with open(file_path, "wb") as f:
f.write(pdf_file.read())
pdf_chatbot.load_and_process_pdf(file_path)
return file_path
def respond(message, history):
bot_message = pdf_chatbot.chat(message)
history.append((message, bot_message))
return "", history
def clear_chatbot():
pdf_chatbot.memory.clear()
return []
def get_pdf_path():
return pdf_chatbot.get_pdf_path()
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# PDF Chatbot")
with gr.Row():
login_button = gr.LoginButton()
user_info = gr.Markdown()
with gr.Row():
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_button = gr.Button("Process PDF")
upload_status = gr.Textbox(label="Upload Status")
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
path_button = gr.Button("Get PDF Path")
pdf_path_display = gr.Textbox(label="Current PDF Path")
chatbot_interface = gr.Chatbot()
msg = gr.Textbox()
clear = gr.Button("Clear")
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
clear.click(clear_chatbot, outputs=[chatbot_interface])
path_button.click(get_pdf_path, outputs=[pdf_path_display])
demo.load(lambda: gr.update(visible=True), outputs=[user_info], inputs=None)
if __name__ == "__main__":
demo.launch()