import streamlit as st from langchain.prompts import PromptTemplate from langchain.chains.question_answering import load_qa_chain from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores.faiss import FAISS from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI from dotenv import load_dotenv import PyPDF2 import os import io # Set page configuration st.set_page_config(layout="centered") st.markdown("

PDF ChatBot by Ali & Arooj

", unsafe_allow_html=True) # Load environment variables from .env file load_dotenv() # Retrieve API key from environment variable google_api_key = os.getenv("GOOGLE_API_KEY") # Check if the API key is available if google_api_key is None: st.warning("API key not found. Please set the google_api_key environment variable.") st.stop() uploaded_file = st.file_uploader("Your PDF file here", type=["pdf", "docx"]) # Prompt template prompt_template = """ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n Context:\n {context}?\n Question: \n{question}\n Answer: """ # Additional prompts prompt_template += """ -------------------------------------------------- Prompt Suggestions: 1. Summarize the primary theme of the context. 2. Elaborate on the crucial concepts highlighted in the context. ... 20. Cite case studies or examples that demonstrate the concepts discussed in the context. """ # Function to process PDF and DOCX files def process_files(uploaded_file): if uploaded_file is not None: st.text("File Uploaded Successfully!") # Check file type and process accordingly if uploaded_file.type == "application/pdf": # PDF Processing pdf_data = uploaded_file.read() pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_data)) pdf_pages = pdf_reader.pages context = "\n\n".join(page.extract_text() for page in pdf_pages) text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200) texts = text_splitter.split_text(context) embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") vector_index = FAISS.from_texts(texts, embeddings).as_retriever() elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": # DOCX Processing (if needed) pass else: st.warning("Unsupported file format. Please upload PDF or DOCX.") st.stop() user_question = st.text_input("Ask Anything from PDF:", "") if st.button("Get Answer"): if user_question: with st.spinner("Processing..."): docs = vector_index.get_relevant_documents(user_question) prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question']) model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, api_key=google_api_key) chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True) st.subheader("Answer:") st.write(response['output_text']) else: st.warning("Please Ask.") # Main function def main(): process_files(uploaded_file) if __name__ == "__main__": main()