CosmoAI commited on
Commit
6ae72bf
·
verified ·
1 Parent(s): 44eb0ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -117
app.py CHANGED
@@ -1,130 +1,239 @@
1
- import gradio as gr
2
  import streamlit as st
 
 
3
  from langchain.embeddings.openai import OpenAIEmbeddings
4
- from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.vectorstores import Chroma
6
- from langchain.chains import ConversationalRetrievalChain
7
- from langchain.chat_models import ChatOpenAI
8
- from langchain.document_loaders import PyPDFLoader
9
- import os
10
- import fitz
11
- from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
 
14
- # Global variables
15
- COUNT, N = 0, 0
16
- chat_history = []
17
- chain = None # Initialize chain as None
18
 
19
- # Function to set the OpenAI API key
20
 
21
- api_key = os.environ['OPENAI_API_KEY']
22
 
23
- st.write(api_key)
24
 
25
 
26
- # Function to enable the API key input box
27
- def enable_api_box():
28
- return enable_box
29
-
30
- # Function to add text to the chat history
31
- def add_text(history, text):
32
- if not text:
33
- raise gr.Error('Enter text')
34
- history = history + [(text, '')]
35
- return history
36
-
37
- # Function to process the PDF file and create a conversation chain
38
- def process_file(file):
39
- global chain
40
- if 'OPENAI_API_KEY' not in os.environ:
41
- raise gr.Error('Upload your OpenAI API key')
42
-
43
- # Replace with your actual PDF processing logic
44
- loader = PyPDFLoader(file.name)
45
- documents = loader.load()
46
- embeddings = OpenAIEmbeddings()
47
- pdfsearch = Chroma.from_documents(documents, embeddings)
48
-
49
- chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
50
- retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
51
- return_source_documents=True)
52
- return chain
53
-
54
- # Function to generate a response based on the chat history and query
55
- def generate_response(history, query, pdf_upload):
56
- global COUNT, N, chat_history, chain
57
- if not pdf_upload:
58
- raise gr.Error(message='Upload a PDF')
59
-
60
- if COUNT == 0:
61
- chain = process_file(pdf_upload)
62
- COUNT += 1
63
-
64
- # Replace with your LangChain logic to generate a response
65
- result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
66
- chat_history += [(query, result["answer"])]
67
- N = list(result['source_documents'][0])[1][1]['page'] # Adjust as needed
68
-
69
- for char in result['answer']:
70
- history[-1][-1] += char
71
- return history, ''
72
-
73
- # Function to render a specific page of a PDF file as an image
74
- def render_file(file):
75
- global N
76
- doc = fitz.open(file.name)
77
- page = doc[N]
78
- pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72))
79
- image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
80
- return image
81
-
82
- # Function to render initial content from the PDF
83
- def render_first(pdf_file):
84
- # Replace with logic to process the PDF and generate an initial image
85
- image = Image.new('RGB', (600, 400), color = 'white') # Placeholder
86
- return image
87
-
88
- # Streamlit & Gradio Interface
89
-
90
- st.title("PDF-Powered Chatbot")
91
-
92
- with st.container():
93
- gr.Markdown("""
94
- <style>
95
- .image-container { height: 680px; }
96
- </style>
97
- """)
98
-
99
- with gr.Blocks() as demo:
100
- pdf_upload1 = gr.UploadButton("📁 Upload PDF 1", file_types=[".pdf"]) # Define pdf_upload1
101
-
102
- # ... (rest of your interface creation)
103
-
104
- txt = gr.Textbox(label="Enter your query", placeholder="Ask a question...")
105
- submit_btn = gr.Button('Submit')
106
-
107
- @submit_btn.click()
108
- def on_submit():
109
- add_text(chatbot, txt)
110
- generate_response(chatbot, txt, pdf_upload1) # Use pdf_upload1 here
111
- render_file(pdf_upload1) # Use pdf_upload1 here
112
-
113
- if __name__ == "__main__":
114
- gr.Interface(
115
- fn=generate_response,
116
- inputs=[
117
- "file", # Define pdf_upload1
118
- "text", # Define chatbot output
119
- "text" # Define txt
120
- ],
121
- outputs=[
122
- "image", # Define show_img
123
- "text", # Define chatbot output
124
- "text" # Define txt
125
- ],
126
- title="PDF-Powered Chatbot"
127
- ).launch()
128
 
129
 
130
 
 
 
1
  import streamlit as st
2
+ import langchain
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from langchain.embeddings.openai import OpenAIEmbeddings
 
5
  from langchain.vectorstores import Chroma
6
+ from langchain import OpenAI, VectorDBQA
7
+ from langchain.chains import RetrievalQAWithSourcesChain
8
+ import PyPDF2
9
+
10
+ api_key = os.environ["OPENAI_API_KEY"]
11
+
12
+ #This function will go through pdf and extract and return list of page texts.
13
+ def read_and_textify(files):
14
+ text_list = []
15
+ sources_list = []
16
+ for file in files:
17
+ pdfReader = PyPDF2.PdfReader(file)
18
+ #print("Page Number:", len(pdfReader.pages))
19
+ for i in range(len(pdfReader.pages)):
20
+ pageObj = pdfReader.pages[i]
21
+ text = pageObj.extract_text()
22
+ pageObj.clear()
23
+ text_list.append(text)
24
+ sources_list.append(file.name + "_page_"+str(i))
25
+ return [text_list,sources_list]
26
+
27
+ st.set_page_config(layout="centered", page_title="Multidoc_QnA")
28
+ st.header("Multidoc_QnA")
29
+ st.write("---")
30
+
31
+ #file uploader
32
+ uploaded_files = st.file_uploader("Upload documents",accept_multiple_files=True, type=["txt","pdf"])
33
+ st.write("---")
34
+
35
+ if uploaded_files is None:
36
+ st.info(f"""Upload files to analyse""")
37
+ elif uploaded_files:
38
+ st.write(str(len(uploaded_files)) + " document(s) loaded..")
39
+
40
+ textify_output = read_and_textify(uploaded_files)
41
+
42
+ documents = textify_output[0]
43
+ sources = textify_output[1]
44
+
45
+ #extract embeddings
46
+ embeddings = OpenAIEmbeddings(openai_api_key = api_key)
47
+ #vstore with metadata. Here we will store page numbers.
48
+ vStore = Chroma.from_texts(documents, embeddings, metadatas=[{"source": s} for s in sources])
49
+ #deciding model
50
+ model_name = "gpt-3.5-turbo"
51
+ # model_name = "gpt-4"
52
+
53
+ retriever = vStore.as_retriever()
54
+ retriever.search_kwargs = {'k':2}
55
+
56
+ #initiate model
57
+ llm = OpenAI(model_name=model_name, openai_api_key = api_key, streaming=True)
58
+ model = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
59
+
60
+ st.header("Ask your data")
61
+ user_q = st.text_area("Enter your questions here")
62
+
63
+ if st.button("Get Response"):
64
+ try:
65
+ with st.spinner("Model is working on it..."):
66
+ result = model({"question":user_q}, return_only_outputs=True)
67
+ st.subheader('Your response:')
68
+ st.write(result['answer'])
69
+ st.subheader('Source pages:')
70
+ st.write(result['sources'])
71
+ except Exception as e:
72
+ st.error(f"An error occurred: {e}")
73
+ st.error('Oops, the GPT response resulted in an error :( Please try again with a different question.')
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+ # import gradio as gr
111
+ # import streamlit as st
112
+ # from langchain.embeddings.openai import OpenAIEmbeddings
113
+ # from langchain.text_splitter import CharacterTextSplitter
114
+ # from langchain.vectorstores import Chroma
115
+ # from langchain.chains import ConversationalRetrievalChain
116
+ # from langchain.chat_models import ChatOpenAI
117
+ # from langchain.document_loaders import PyPDFLoader
118
+ # import os
119
+ # import fitz
120
+ # from PIL import Image
121
 
122
 
123
+ # # Global variables
124
+ # COUNT, N = 0, 0
125
+ # chat_history = []
126
+ # chain = None # Initialize chain as None
127
 
128
+ # # Function to set the OpenAI API key
129
 
130
+ # api_key = os.environ['OPENAI_API_KEY']
131
 
132
+ # st.write(api_key)
133
 
134
 
135
+ # # Function to enable the API key input box
136
+ # def enable_api_box():
137
+ # return enable_box
138
+
139
+ # # Function to add text to the chat history
140
+ # def add_text(history, text):
141
+ # if not text:
142
+ # raise gr.Error('Enter text')
143
+ # history = history + [(text, '')]
144
+ # return history
145
+
146
+ # # Function to process the PDF file and create a conversation chain
147
+ # def process_file(file):
148
+ # global chain
149
+ # if 'OPENAI_API_KEY' not in os.environ:
150
+ # raise gr.Error('Upload your OpenAI API key')
151
+
152
+ # # Replace with your actual PDF processing logic
153
+ # loader = PyPDFLoader(file.name)
154
+ # documents = loader.load()
155
+ # embeddings = OpenAIEmbeddings()
156
+ # pdfsearch = Chroma.from_documents(documents, embeddings)
157
+
158
+ # chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
159
+ # retriever=pdfsearch.as_retriever(search_kwargs={"k": 1}),
160
+ # return_source_documents=True)
161
+ # return chain
162
+
163
+ # # Function to generate a response based on the chat history and query
164
+ # def generate_response(history, query, pdf_upload):
165
+ # global COUNT, N, chat_history, chain
166
+ # if not pdf_upload:
167
+ # raise gr.Error(message='Upload a PDF')
168
+
169
+ # if COUNT == 0:
170
+ # chain = process_file(pdf_upload)
171
+ # COUNT += 1
172
+
173
+ # # Replace with your LangChain logic to generate a response
174
+ # result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
175
+ # chat_history += [(query, result["answer"])]
176
+ # N = list(result['source_documents'][0])[1][1]['page'] # Adjust as needed
177
+
178
+ # for char in result['answer']:
179
+ # history[-1][-1] += char
180
+ # return history, ''
181
+
182
+ # # Function to render a specific page of a PDF file as an image
183
+ # def render_file(file):
184
+ # global N
185
+ # doc = fitz.open(file.name)
186
+ # page = doc[N]
187
+ # pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72))
188
+ # image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
189
+ # return image
190
+
191
+ # # Function to render initial content from the PDF
192
+ # def render_first(pdf_file):
193
+ # # Replace with logic to process the PDF and generate an initial image
194
+ # image = Image.new('RGB', (600, 400), color = 'white') # Placeholder
195
+ # return image
196
+
197
+ # # Streamlit & Gradio Interface
198
+
199
+ # st.title("PDF-Powered Chatbot")
200
+
201
+ # with st.container():
202
+ # gr.Markdown("""
203
+ # <style>
204
+ # .image-container { height: 680px; }
205
+ # </style>
206
+ # """)
207
+
208
+ # with gr.Blocks() as demo:
209
+ # pdf_upload1 = gr.UploadButton("📁 Upload PDF 1", file_types=[".pdf"]) # Define pdf_upload1
210
+
211
+ # # ... (rest of your interface creation)
212
+
213
+ # txt = gr.Textbox(label="Enter your query", placeholder="Ask a question...")
214
+ # submit_btn = gr.Button('Submit')
215
+
216
+ # @submit_btn.click()
217
+ # def on_submit():
218
+ # add_text(chatbot, txt)
219
+ # generate_response(chatbot, txt, pdf_upload1) # Use pdf_upload1 here
220
+ # render_file(pdf_upload1) # Use pdf_upload1 here
221
+
222
+ # if __name__ == "__main__":
223
+ # gr.Interface(
224
+ # fn=generate_response,
225
+ # inputs=[
226
+ # "file", # Define pdf_upload1
227
+ # "text", # Define chatbot output
228
+ # "text" # Define txt
229
+ # ],
230
+ # outputs=[
231
+ # "image", # Define show_img
232
+ # "text", # Define chatbot output
233
+ # "text" # Define txt
234
+ # ],
235
+ # title="PDF-Powered Chatbot"
236
+ # ).launch()
237
 
238
 
239