MrHalk commited on
Commit
fa5f50e
·
verified ·
1 Parent(s): 36f10c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -105
app.py CHANGED
@@ -1,105 +1,115 @@
1
- import streamlit as st
2
- from PyPDF2 import PdfReader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- import os
5
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
- import google.generativeai as genai
7
- from langchain.vectorstores import FAISS
8
- from langchain_google_genai import ChatGoogleGenerativeAI
9
- from langchain.chains.question_answering import load_qa_chain
10
- from langchain.prompts import PromptTemplate
11
- from dotenv import load_dotenv
12
-
13
-
14
- load_dotenv()
15
- os.getenv("GOOGLE_API_KEY")
16
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
17
-
18
-
19
- def get_pdf_text(pdf_docs):
20
- text = ""
21
- for pdf in pdf_docs:
22
- pdf_reader = PdfReader(pdf)
23
- for page in pdf_reader.pages:
24
- text += page.extract_text()
25
- return text
26
-
27
-
28
- def get_text_chunks(text):
29
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
30
- chunks = text_splitter.split_text(text)
31
- return chunks
32
-
33
-
34
- def get_vector_store(text_chunks):
35
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
36
- vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
37
- vector_store.save_local("faiss_index")
38
-
39
-
40
- def get_conversational_chain():
41
- prompt_template = """
42
- Answer the question as detailed as possible from the provided context, make sure to provide all the details. If the answer is not in
43
- the provided context, just say, "Answer is not available in the context." Don't provide the wrong answer.
44
-
45
- Context:\n {context}?\n
46
- Question: \n{question}\n
47
-
48
- Answer:
49
- """
50
-
51
- model = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.3)
52
- prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
53
- chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
54
-
55
- return chain
56
-
57
-
58
- def user_input(user_question):
59
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
60
-
61
- # Allow dangerous deserialization
62
- new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
63
- docs = new_db.similarity_search(user_question)
64
-
65
- chain = get_conversational_chain()
66
-
67
- response = chain(
68
- {"input_documents": docs, "question": user_question},
69
- return_only_outputs=True
70
- )
71
-
72
- print(response)
73
- st.write("Reply: ", response.get("output_text", "No output generated"))
74
-
75
-
76
- def main():
77
- st.set_page_config("Chat PDF")
78
- st.header("Chat with PDF using Gemini💁")
79
-
80
- user_question = st.text_input("Ask a Question from the PDF Files")
81
-
82
- # Add a Submit button for the user input
83
- if st.button("Submit Question"):
84
- if user_question: # Ensure the input is not empty
85
- user_input(user_question)
86
- else:
87
- st.warning("Please enter a question before submitting!")
88
-
89
- with st.sidebar:
90
- st.title("Menu:")
91
- pdf_docs = st.file_uploader(
92
- "Upload your PDF Files and Click on the Submit & Process Button",
93
- accept_multiple_files=True
94
- )
95
- if st.button("Submit & Process"):
96
- with st.spinner("Processing..."):
97
- raw_text = get_pdf_text(pdf_docs)
98
- text_chunks = get_text_chunks(raw_text)
99
- get_vector_store(text_chunks)
100
- st.success("Done")
101
-
102
-
103
-
104
- if __name__ == "__main__":
105
- main()
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import os
5
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
6
+ import google.generativeai as genai
7
+ from langchain.vectorstores import FAISS
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from langchain.chains.question_answering import load_qa_chain
10
+ from langchain.prompts import PromptTemplate
11
+ from dotenv import load_dotenv
12
+
13
+
14
+ load_dotenv()
15
+ os.getenv("GOOGLE_API_KEY")
16
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
17
+
18
+
19
+ def get_pdf_text(pdf_docs):
20
+ text = ""
21
+ for pdf in pdf_docs:
22
+ pdf_reader = PdfReader(pdf)
23
+ for page in pdf_reader.pages:
24
+ text += page.extract_text()
25
+ return text
26
+
27
+
28
+ def get_text_chunks(text):
29
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
30
+ chunks = text_splitter.split_text(text)
31
+ return chunks
32
+
33
+
34
+ def get_vector_store(text_chunks):
35
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
36
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
37
+ vector_store.save_local("faiss_index")
38
+
39
+
40
+ def get_conversational_chain():
41
+ prompt_template = """
42
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details. If the answer is not in
43
+ the provided context, just say, "Answer is not available in the context." Don't provide the wrong answer.
44
+
45
+ Context:\n {context}?\n
46
+ Question: \n{question}\n
47
+
48
+ Answer:
49
+ """
50
+
51
+ model = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.3)
52
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
53
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
54
+
55
+ return chain
56
+
57
+
58
+ def user_input(user_question):
59
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
60
+
61
+ # Allow dangerous deserialization
62
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
63
+ docs = new_db.similarity_search(user_question)
64
+
65
+ chain = get_conversational_chain()
66
+
67
+ response = chain(
68
+ {"input_documents": docs, "question": user_question},
69
+ return_only_outputs=True
70
+ )
71
+
72
+ print(response)
73
+ st.write("Reply: ", response.get("output_text", "No output generated"))
74
+
75
+
76
+ def main():
77
+ st.set_page_config("Chat PDF")
78
+ st.header("Chat with PDF using Gemini💁")
79
+
80
+ # Define a callback to clear input
81
+ def clear_input():
82
+ st.session_state["user_question"] = ""
83
+ st.balloons()
84
+
85
+ # Input box with key
86
+ st.text_input("Ask a Question from the PDF Files", key="user_question")
87
+
88
+ # Submit button
89
+ if st.button("Submit Question"):
90
+ if st.session_state.user_question:
91
+ user_input(st.session_state.user_question)
92
+ else:
93
+ st.warning("Please enter a question before submitting!")
94
+
95
+ # Clear button right below
96
+ st.button("Clear", on_click=clear_input)
97
+
98
+ # Sidebar for PDF upload
99
+ with st.sidebar:
100
+ st.title("Menu:")
101
+ pdf_docs = st.file_uploader(
102
+ "Upload your PDF Files and Click on the Submit & Process Button",
103
+ accept_multiple_files=True
104
+ )
105
+ if st.button("Submit & Process"):
106
+ with st.spinner("Processing..."):
107
+ raw_text = get_pdf_text(pdf_docs)
108
+ text_chunks = get_text_chunks(raw_text)
109
+ get_vector_store(text_chunks)
110
+ st.success("Done")
111
+
112
+
113
+
114
+ if __name__ == "__main__":
115
+ main()