Spaces:

Roberta2024
/

LLM_pdf_Gemini

Sleeping

App Files Files Community

LLM_pdf_Gemini / app.py

Roberta2024

Create app.py

eaf6a82 verified 11 months ago

raw

history blame contribute delete

3.64 kB

	import streamlit as st
	from PyPDF2 import PdfReader #library to read pdf files
	from langchain.text_splitter import RecursiveCharacterTextSplitter#library to split pdf files
	import os

	from langchain_google_genai import GoogleGenerativeAIEmbeddings #to embed the text
	import google.generativeai as genai

	from langchain.vectorstores import FAISS #for vector embeddings
	from langchain_google_genai import ChatGoogleGenerativeAI #
	from langchain.chains.question_answering import load_qa_chain #to chain the prompts
	from langchain.prompts import PromptTemplate #to create prompt templates
	from dotenv import load_dotenv

	load_dotenv()

	genai.configure(api_key = os.getenv("AIzaSyDDGaplA8ya5n_sc4hkMY_vxpsRE6ZDMV8"))

	def get_pdf_text(pdf_docs):
	text = ""
	# iterate over all pdf files uploaded
	for pdf in pdf_docs:
	pdf_reader = PdfReader(pdf)
	# iterate over all pages in a pdf
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	def get_text_chunks(text):
	# create an object of RecursiveCharacterTextSplitter with specific chunk size and overlap size
	text_splitter = RecursiveCharacterTextSplitter(chunk_size = 10000, chunk_overlap = 1000)
	# now split the text we have using object created
	chunks = text_splitter.split_text(text)

	return chunks

	def get_vector_store(text_chunks):
	embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") # google embeddings
	vector_store = FAISS.from_texts(text_chunks,embeddings) # use the embedding object on the splitted text of pdf docs
	vector_store.save_local("faiss_index") # save the embeddings in local

	def get_conversation_chain():

	# define the prompt
	prompt_template = """
	Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
	provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
	Context:\n {context}?\n
	Question: \n{question}\n

	Answer:
	"""

	model = ChatGoogleGenerativeAI(model = "gemini-pro", temperatue = 0.3) # create object of gemini-pro

	prompt = PromptTemplate(template = prompt_template, input_variables= ["context","question"])

	chain = load_qa_chain(model,chain_type="stuff",prompt = prompt)

	return chain

	def user_input(user_question):
	# user_question is the input question
	embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
	# load the local faiss db
	new_db = FAISS.load_local("faiss_index", embeddings)

	# using similarity search, get the answer based on the input
	docs = new_db.similarity_search(user_question)

	chain = get_conversation_chain()


	response = chain(
	{"input_documents":docs, "question": user_question}
	, return_only_outputs=True)

	print(response)
	st.write("Reply: ", response["output_text"])

	def main():
	st.set_page_config("Chat PDF")
	st.header("Chat with PDF using Gemini")

	user_question = st.text_input("Ask a Question:")

	if user_question:
	user_input(user_question)

	with st.sidebar:
	st.title("Menu:")
	pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
	if st.button("Submit & Process"):
	with st.spinner("Processing..."):
	raw_text = get_pdf_text(pdf_docs)
	text_chunks = get_text_chunks(raw_text)
	get_vector_store(text_chunks)
	st.success("Done")


	if __name__ == "__main__":
	main()