mainakhf commited on
Commit
297c1c5
·
verified ·
1 Parent(s): b402914

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -50
app.py CHANGED
@@ -1,14 +1,89 @@
1
  from PyPDF2 import PdfReader
2
  from langchain.embeddings.openai import OpenAIEmbeddings
3
- from langchain.text_splitter import CharacterTextSplitter
4
  from langchain.vectorstores import FAISS
5
  from langchain.chains.question_answering import load_qa_chain
6
  from langchain.chains import load_chain
7
  from langchain.llms import OpenAI
8
  import streamlit as st
9
- # import pyautogui
 
 
 
 
10
  import os, shutil
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def delete_directory(directory_path):
13
  try:
14
  shutil.rmtree(directory_path)
@@ -16,60 +91,52 @@ def delete_directory(directory_path):
16
  except Exception as e:
17
  print(f"Error deleting directory '{directory_path}': {e}")
18
 
19
- st.set_page_config(page_title="Query any Pdf", page_icon="📄")
20
-
21
- st.title("📄 PDF Query Bot 📄")
22
- st.write("Made with ❤️ by Mainak")
23
-
24
  def return_response(query,document_search,chain):
25
  query = query
26
  docs = document_search.similarity_search(query)
27
  result = chain.run(input_documents=docs, question=query)
28
  return result
29
 
30
- uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"])
31
 
32
- # API key input box
33
- api_key = st.text_input("Enter Your OpenAI API Key",type="password")
34
 
35
- if not uploaded_file:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  try:
37
  delete_directory('faiss_index')
38
  except:
39
  pass
40
-
41
- if st.button('Submit'):
42
- if api_key:
43
- if uploaded_file is not None:
44
- # Read text from the uploaded file
45
- os.environ["OPENAI_API_KEY"] = api_key
46
- with st.spinner('Wait for it...'):
47
- pdfreader = PdfReader(uploaded_file)
48
- # read text from pdf
49
- raw_text = ''
50
- for i, page in enumerate(pdfreader.pages):
51
- content = page.extract_text()
52
- if content:
53
- raw_text += content
54
-
55
- text_splitter = CharacterTextSplitter(
56
- separator = "\n",
57
- chunk_size = 800,
58
- chunk_overlap = 200,
59
- length_function = len,
60
- )
61
- texts = text_splitter.split_text(raw_text)
62
- embeddings = OpenAIEmbeddings()
63
- document_search = FAISS.from_texts(texts, embeddings)
64
- document_search.save_local("faiss_index")
65
  else:
66
- st.warning("Please enter your Pdf File")
67
- else:
68
- st.warning("Please enter your API key")
69
  if os.path.exists("faiss_index"):
70
- # if st.checkbox("chat"):
71
  if api_key:
72
- if uploaded_file is not None:
73
  if "messages" not in st.session_state:
74
  st.session_state.messages = []
75
 
@@ -83,18 +150,14 @@ if os.path.exists("faiss_index"):
83
  st.markdown(prompt)
84
  # Add user message to chat history
85
  st.session_state.messages.append({"role": "user", "content": prompt})
86
- os.environ["OPENAI_API_KEY"] = api_key
87
- embeddings = OpenAIEmbeddings()
88
- document_search = FAISS.load_local("faiss_index", embeddings)
89
- chain = load_qa_chain(OpenAI(), chain_type="stuff")
90
  if prompt is None:
91
  re='Ask me anything about the pdf'
92
- # elif prompt=='exit':
93
- # delete_directory('faiss_index')
94
- # pyautogui.hotkey('f5') #Simulates F5 key press = page refresh
95
  else:
96
  with st.spinner('Typping...'):
97
- re=return_response(str(prompt),document_search,chain)
 
 
98
  response = f"PDF Mate: {re}"
99
  # Display assistant response in chat message container
100
  with st.chat_message("assistant"):
@@ -106,4 +169,5 @@ if os.path.exists("faiss_index"):
106
  else:
107
  st.warning("Please enter your API key")
108
  else:
109
- pass
 
 
1
  from PyPDF2 import PdfReader
2
  from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
4
  from langchain.vectorstores import FAISS
5
  from langchain.chains.question_answering import load_qa_chain
6
  from langchain.chains import load_chain
7
  from langchain.llms import OpenAI
8
  import streamlit as st
9
+ import openai
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings,ChatGoogleGenerativeAI
12
+ import google.generativeai as genai
13
+
14
  import os, shutil
15
 
16
+
17
+ def get_pdf_text(pdf_docs):
18
+ text=""
19
+ for pdf in pdf_docs:
20
+ pdf_reader= PdfReader(pdf)
21
+ for page in pdf_reader.pages:
22
+ text+= page.extract_text()
23
+ return text
24
+
25
+ def get_text_chunks(text,method):
26
+ if method=='Google-Gemini':
27
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=300)
28
+ chunks = text_splitter.split_text(text)
29
+ else:
30
+ text_splitter = CharacterTextSplitter(separator = "\n",chunk_size = 1000,chunk_overlap = 300,length_function = len)
31
+ chunks = text_splitter.split_text(raw_text)
32
+ return chunks
33
+
34
+ def get_vector_store(text_chunks,method):
35
+ try:
36
+ if method=='Google-Gemini':
37
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
38
+ else:
39
+ embeddings = OpenAIEmbeddings()
40
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
41
+ vector_store.save_local("faiss_index")
42
+ except:
43
+ st.warning("Wrong API, give a valid API")
44
+
45
+
46
+ def get_conversational_chain(method):
47
+
48
+ prompt_template = """
49
+ Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
50
+ provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
51
+ Context:\n {context}?\n
52
+ Question: \n{question}\n
53
+
54
+ Answer:
55
+ """
56
+ if method=='Google-Gemini':
57
+ model = ChatGoogleGenerativeAI(model="gemini-pro",
58
+ temperature=0.3)
59
+ else:
60
+ model= OpenAI()
61
+ prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
62
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
63
+ return chain
64
+
65
+
66
+
67
+ def user_input(user_question,method):
68
+ if method=='Google-Gemini':
69
+ embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
70
+ else:
71
+ embeddings = OpenAIEmbeddings()
72
+
73
+ new_db = FAISS.load_local("faiss_index", embeddings)
74
+ docs = new_db.similarity_search(user_question)
75
+
76
+ chain = get_conversational_chain(method)
77
+
78
+
79
+ response = chain(
80
+ {"input_documents":docs, "question": user_question}
81
+ , return_only_outputs=True)
82
+ return response
83
+
84
+
85
+
86
+
87
  def delete_directory(directory_path):
88
  try:
89
  shutil.rmtree(directory_path)
 
91
  except Exception as e:
92
  print(f"Error deleting directory '{directory_path}': {e}")
93
 
 
 
 
 
 
94
  def return_response(query,document_search,chain):
95
  query = query
96
  docs = document_search.similarity_search(query)
97
  result = chain.run(input_documents=docs, question=query)
98
  return result
99
 
 
100
 
 
 
101
 
102
+
103
+ st.set_page_config(page_title="Query any Pdf", page_icon="📄")
104
+
105
+ st.title("📄 PDF Query Bot 📄")
106
+ st.write("Made with ❤️ by Mainak")
107
+ with st.sidebar:
108
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit Button", accept_multiple_files=True,type=['pdf'])
109
+ option = st.selectbox('Select a Model(choose OpenAI for best results)',('OpenAI', 'Google-Gemini'))
110
+ if option=='OpenAI':
111
+ api_key = st.text_input("Enter Your OpenAI API Key",type="password")
112
+ os.environ["OPENAI_API_KEY"] = api_key
113
+ else:
114
+ api_key = st.text_input("Enter Your Google-Gemini API Key",type="password")
115
+ os.environ["google_API_KEY"] = api_key
116
+ genai.configure(api_key=os.getenv("google_API_KEY"))
117
+ if not pdf_docs:
118
  try:
119
  delete_directory('faiss_index')
120
  except:
121
  pass
122
+ with st.sidebar:
123
+ if st.button('Submit'):
124
+ if api_key:
125
+ if pdf_docs is not None:
126
+ # Read text from the uploaded file
127
+ os.environ["OPENAI_API_KEY"] = api_key
128
+ with st.spinner('Wait for it...'):
129
+ raw_text = get_pdf_text(pdf_docs)
130
+ chunks = get_text_chunks(raw_text,option)
131
+ get_vector_store(chunks,option)
132
+ else:
133
+ st.warning("Please enter your Pdf File")
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  else:
135
+ st.warning("Please enter your API key")
136
+
 
137
  if os.path.exists("faiss_index"):
 
138
  if api_key:
139
+ if pdf_docs is not None:
140
  if "messages" not in st.session_state:
141
  st.session_state.messages = []
142
 
 
150
  st.markdown(prompt)
151
  # Add user message to chat history
152
  st.session_state.messages.append({"role": "user", "content": prompt})
153
+ # os.environ["OPENAI_API_KEY"] = api_key
 
 
 
154
  if prompt is None:
155
  re='Ask me anything about the pdf'
 
 
 
156
  else:
157
  with st.spinner('Typping...'):
158
+ re = user_input(str(prompt),option)
159
+ re = re["output_text"]
160
+ # re=return_response(str(prompt),document_search,chain)
161
  response = f"PDF Mate: {re}"
162
  # Display assistant response in chat message container
163
  with st.chat_message("assistant"):
 
169
  else:
170
  st.warning("Please enter your API key")
171
  else:
172
+ pass
173
+