LLM_pdf_Gemini / app.py
Roberta2024's picture
Create app.py
eaf6a82 verified
import streamlit as st
from PyPDF2 import PdfReader #library to read pdf files
from langchain.text_splitter import RecursiveCharacterTextSplitter#library to split pdf files
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings #to embed the text
import google.generativeai as genai
from langchain.vectorstores import FAISS #for vector embeddings
from langchain_google_genai import ChatGoogleGenerativeAI #
from langchain.chains.question_answering import load_qa_chain #to chain the prompts
from langchain.prompts import PromptTemplate #to create prompt templates
from dotenv import load_dotenv
load_dotenv()
genai.configure(api_key = os.getenv("AIzaSyDDGaplA8ya5n_sc4hkMY_vxpsRE6ZDMV8"))
def get_pdf_text(pdf_docs):
text = ""
# iterate over all pdf files uploaded
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
# iterate over all pages in a pdf
for page in pdf_reader.pages:
text += page.extract_text()
return text
def get_text_chunks(text):
# create an object of RecursiveCharacterTextSplitter with specific chunk size and overlap size
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 10000, chunk_overlap = 1000)
# now split the text we have using object created
chunks = text_splitter.split_text(text)
return chunks
def get_vector_store(text_chunks):
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") # google embeddings
vector_store = FAISS.from_texts(text_chunks,embeddings) # use the embedding object on the splitted text of pdf docs
vector_store.save_local("faiss_index") # save the embeddings in local
def get_conversation_chain():
# define the prompt
prompt_template = """
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
Context:\n {context}?\n
Question: \n{question}\n
Answer:
"""
model = ChatGoogleGenerativeAI(model = "gemini-pro", temperatue = 0.3) # create object of gemini-pro
prompt = PromptTemplate(template = prompt_template, input_variables= ["context","question"])
chain = load_qa_chain(model,chain_type="stuff",prompt = prompt)
return chain
def user_input(user_question):
# user_question is the input question
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
# load the local faiss db
new_db = FAISS.load_local("faiss_index", embeddings)
# using similarity search, get the answer based on the input
docs = new_db.similarity_search(user_question)
chain = get_conversation_chain()
response = chain(
{"input_documents":docs, "question": user_question}
, return_only_outputs=True)
print(response)
st.write("Reply: ", response["output_text"])
def main():
st.set_page_config("Chat PDF")
st.header("Chat with PDF using Gemini")
user_question = st.text_input("Ask a Question:")
if user_question:
user_input(user_question)
with st.sidebar:
st.title("Menu:")
pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
if st.button("Submit & Process"):
with st.spinner("Processing..."):
raw_text = get_pdf_text(pdf_docs)
text_chunks = get_text_chunks(raw_text)
get_vector_store(text_chunks)
st.success("Done")
if __name__ == "__main__":
main()