RAG with LangChain & GenAI: Any PDF

import streamlit as st
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_community.vectorstores.faiss import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from dotenv import load_dotenv
import PyPDF2
import os
import io

# st.title("Chat Your PDFs")  # Updated title
st.set_page_config(layout="centered")
st.markdown("<h1 style='font-size:24px;'>RAG with LangChain & GenAI: Any PDF</h1>", unsafe_allow_html=True)

# Load environment variables from .env file
load_dotenv()

# Retrieve API key from environment variable
google_api_key = os.getenv("GOOGLE_API_KEY")

# Check if the API key is available
if google_api_key is None:
    st.warning("API key not found. Please set the google_api_key environment variable.")
    st.stop()

# File Upload with user-defined name
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

prompt_template = """
Answer the question as detailed as possible from the provided context,
make sure to provide all the details, if the answer is not in
provided context just say, "answer is not available in the context",
don't provide the wrong answer\n\n
Context:\n {context}?\n
Question: \n{question}\n
Answer:
"""

# Additional prompts to enhance the template
prompt_template = prompt_template + """
--------------------------------------------------
Prompt Suggestions:
1. Summarize the main idea of the context.
2. Provide a detailed explanation of the key concepts mentioned in the context.
3. Identify any supporting evidence or examples that can be used to answer the question.
4. Analyze any trends or patterns mentioned in the context that are relevant to the question.
5. Compare and contrast different aspects or viewpoints presented in the context.
6. Discuss any implications or consequences of the information provided in the context.
7. Evaluate the reliability or credibility of the information presented in the context.
8. Offer recommendations or suggestions based on the information provided.
9. Predict potential future developments or outcomes based on the context.
10. Provide additional context or background information relevant to the question.
11. Explain any technical terms or jargon used in the context.
12. Interpret any charts, graphs, or visual aids included in the context.
13. Discuss any limitations or caveats that should be considered when answering the question.
14. Address any potential biases or assumptions present in the context.
15. Offer alternative perspectives or interpretations of the information provided.
16. Discuss any ethical considerations or implications raised by the context.
17. Analyze any cause-and-effect relationships mentioned in the context.
18. Identify any unanswered questions or areas for further investigation.
19. Clarify any ambiguities or inconsistencies in the context.
20. Provide examples or case studies that illustrate the concepts discussed in the context.
"""

# Return the enhanced prompt template
prompt_template = prompt_template + """
--------------------------------------------------
Context:\n{context}\n
Question:\n{question}\n
Answer:
"""

if uploaded_file is not None:
    st.text("PDF File Uploaded Successfully!")

    # PDF Processing (using PyPDF2 directly)
    pdf_data = uploaded_file.read()
    pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_data))
    pdf_pages = pdf_reader.pages

    context = "\n\n".join(page.extract_text() for page in pdf_pages)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
    texts = text_splitter.split_text(context)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    # vector_index = Chroma.from_texts(texts, embeddings).as_retriever()
    vector_index = FAISS.from_texts(texts, embeddings).as_retriever()

    user_question = st.text_input("Enter your Question below:", "")


    if st.button("Get Answer"):
        if user_question:
            with st.spinner("Processing..."):
                # Get Relevant Documents
                docs = vector_index.get_relevant_documents(user_question)
                prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
                model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, api_key=google_api_key)
                chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
                response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
                st.subheader("Answer:")
                st.write(response['output_text'])

        else:
            st.warning("Please enter a question.")