udayr commited on
Commit
4bfe85d
·
verified ·
1 Parent(s): 307407c

Upload 3 files

Browse files
Files changed (3) hide show
  1. .env +1 -0
  2. app.py +95 -0
  3. requirements.txt +7 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GOOGLE_API_KEY = ""
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import google.generativeai as genai
4
+
5
+ from dotenv import load_dotenv
6
+ from PyPDF2 import PdfReader # read the PDF file
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings # converting text to vectors
9
+ from langchain.vectorstores import FAISS # for vector embeddings
10
+ from langchain_google_genai import ChatGoogleGenerativeAI
11
+ from langchain.chains.question_answering import load_qa_chain # helps for prompts
12
+ from langchain.prompts import PromptTemplate
13
+
14
+
15
+ load_dotenv()
16
+ genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
17
+
18
+ # read the pdf, and extract the text
19
+ def get_pdf_text(pdf_docs):
20
+ text = ""
21
+ for pdf in pdf_docs:
22
+ pdf_reader = PdfReader(pdf)
23
+ for page in pdf_reader.pages:
24
+ text += page.extract_text()
25
+ return text
26
+
27
+ # divide the text into chunks
28
+ def get_text_chunks(text):
29
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
30
+ chunks = text_splitter.split_text(text)
31
+ return chunks
32
+
33
+ # convert the text to vectors
34
+ def get_vector_store(text_chunks):
35
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
36
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
37
+ vector_store.save_local("faiss_index") # save the vector as local, can also save it on Pinecone, DataStax
38
+
39
+ # creating chain for conversational
40
+ def get_conversational_chain():
41
+ prompt_template = """
42
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
43
+ provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
44
+ Context:\n {context}?\n
45
+ Question: \n{question}\n
46
+ Answer:
47
+ """
48
+ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.8)
49
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context','question'])
50
+ chain = load_qa_chain(model, chain_type='stuff',prompt=prompt) #stuff for internal summerization
51
+ return chain
52
+
53
+
54
+ # user question besed on the textbox and calling the other functions
55
+ def user_input(user_question):
56
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
57
+
58
+ new_db = FAISS.load_local("faiss_index",embeddings) # load the embedding from local
59
+ docs = new_db.similarity_search(user_question)
60
+ chain = get_conversational_chain()
61
+
62
+ response = chain(
63
+ {'input_documents': docs, "question": user_question},
64
+ return_only_outputs=True
65
+ )
66
+ print(response)
67
+ st.write("Reply: ", response["output_text"])
68
+
69
+ # creating the streamlit Application
70
+ def main():
71
+ st.set_page_config("Chat PDF")
72
+ st.header("Chat with PDF using Gemini💁")
73
+
74
+ user_question = st.text_input("Ask a Question from the PDF Files")
75
+
76
+ if user_question:
77
+ user_input(user_question)
78
+
79
+ with st.sidebar:
80
+ st.title("Menu:")
81
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
82
+ if st.button("Submit & Process"):
83
+ with st.spinner("Processing..."):
84
+ raw_text = get_pdf_text(pdf_docs)
85
+ text_chunks = get_text_chunks(raw_text)
86
+ get_vector_store(text_chunks)
87
+ st.success("Done")
88
+
89
+
90
+
91
+ if __name__ == "__main__":
92
+ main()
93
+
94
+
95
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ google-generativeai
3
+ langchain
4
+ python-dotenv
5
+ PyPDF2
6
+ faiss-cpu
7
+ langchain_google_genai