PDF ChatBot by Ali & Arooj

import streamlit as st
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores.faiss import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from dotenv import load_dotenv
import PyPDF2
import os
import io

# Set page configuration
st.set_page_config(layout="centered")
st.markdown("<h1 style='font-size:24px;'>PDF ChatBot by Ali & Arooj</h1>", unsafe_allow_html=True)

# Load environment variables from .env file
load_dotenv()

# Retrieve API key from environment variable
google_api_key = os.getenv("GOOGLE_API_KEY")

# Check if the API key is available
if google_api_key is None:
    st.warning("API key not found. Please set the google_api_key environment variable.")
    st.stop()

uploaded_file = st.file_uploader("Your PDF file here", type=["pdf", "docx"])

# Prompt template
prompt_template = """
Answer the question as detailed as possible from the provided context,
make sure to provide all the details, if the answer is not in
provided context just say, "answer is not available in the context",
don't provide the wrong answer\n\n
Context:\n {context}?\n
Question: \n{question}\n
Answer:
"""

# Additional prompts
prompt_template += """
--------------------------------------------------
Prompt Suggestions:
1. Summarize the primary theme of the context.
2. Elaborate on the crucial concepts highlighted in the context.
...
20. Cite case studies or examples that demonstrate the concepts discussed in the context.
"""

# Function to process PDF and DOCX files
def process_files(uploaded_file):
    if uploaded_file is not None:
        st.text("File Uploaded Successfully!")
        
        # Check file type and process accordingly
        if uploaded_file.type == "application/pdf":
            # PDF Processing
            pdf_data = uploaded_file.read()
            pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_data))
            pdf_pages = pdf_reader.pages
            
            context = "\n\n".join(page.extract_text() for page in pdf_pages)
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
            texts = text_splitter.split_text(context)
            embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
            vector_index = FAISS.from_texts(texts, embeddings).as_retriever()
        
        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            # DOCX Processing (if needed)
            pass
        else:
            st.warning("Unsupported file format. Please upload PDF or DOCX.")
            st.stop()
        
        user_question = st.text_input("Ask Anything from PDF:", "")
        
        if st.button("Get Answer"):
            if user_question:
                with st.spinner("Processing..."):
                    docs = vector_index.get_relevant_documents(user_question)
                    prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
                    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, api_key=google_api_key)
                    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
                    response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
                    st.subheader("Answer:")
                    st.write(response['output_text'])
            else:
                st.warning("Please Ask.")

# Main function
def main():
    process_files(uploaded_file)

if __name__ == "__main__":
    main()