chomakov commited on
Commit
eb87d78
1 Parent(s): 69f964f

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. GPT-4_PDF_summary.ipynb +42 -29
Dockerfile CHANGED
@@ -10,7 +10,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade -r requirements.txt
10
 
11
  COPY . .
12
 
13
- CMD ["panel", "serve", "/GPT-4_PDF_summary.ipynb", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "chomakov-GPT-4_PDF_summary.hf.space", "--allow-websocket-origin", "0.0.0.0:7860"]
14
 
15
  RUN mkdir /.cache
16
  RUN chmod 777 /.cache
 
10
 
11
  COPY . .
12
 
13
+ CMD ["panel", "serve", "/GPT-4_PDF_summary.ipynb", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "https://chomakov-gpt-4-pdf-summary.hf.space", "--allow-websocket-origin", "0.0.0.0:7860"]
14
 
15
  RUN mkdir /.cache
16
  RUN chmod 777 /.cache
GPT-4_PDF_summary.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "id": "409a312d",
7
  "metadata": {},
8
  "outputs": [
@@ -106,12 +106,12 @@
106
  }
107
  ],
108
  "source": [
109
- "!pip install langchain openai chromadb tiktoken pypdf panel"
110
  ]
111
  },
112
  {
113
  "cell_type": "code",
114
- "execution_count": 2,
115
  "id": "dcbbf6df",
116
  "metadata": {},
117
  "outputs": [
@@ -147,7 +147,7 @@
147
  }
148
  ],
149
  "source": [
150
- "import os\n",
151
  "from langchain.chains import RetrievalQA\n",
152
  "from langchain.llms import OpenAI\n",
153
  "from langchain.document_loaders import TextLoader\n",
@@ -157,7 +157,17 @@
157
  "from langchain.embeddings import OpenAIEmbeddings\n",
158
  "from langchain.vectorstores import Chroma\n",
159
  "import panel as pn\n",
160
- "pn.extension ('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n",
 
 
 
 
 
 
 
 
 
 
161
  "pn.state.template.param.update(\n",
162
  " main_max_width=\"690px\",\n",
163
  " header_background=\"#F08080\",\n",
@@ -171,14 +181,13 @@
171
  "metadata": {},
172
  "outputs": [],
173
  "source": [
174
- "import panel as pn\n",
175
- "\n",
176
  "file_input = pn.widgets.FileInput(width=300)\n",
 
177
  "openaikey = pn.widgets.PasswordInput(\n",
178
  " value=\"\", placeholder=\"Enter your OpenAI API Key here...\", width=300\n",
179
  ")\n",
180
  "prompt = pn.widgets.TextEditor(\n",
181
- " value='', placeholder=\"Enter your questions here...\", height=160, toolbar=False\n",
182
  ")\n",
183
  "run_button = pn.widgets.Button(name=\"Run!\")\n",
184
  "\n",
@@ -186,13 +195,15 @@
186
  " name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n",
187
  ")\n",
188
  "select_chain_type = pn.widgets.RadioButtonGroup(\n",
189
- " name='Chain type',\n",
190
  " options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"]\n",
191
  ")\n",
 
192
  "widgets = pn.Row(\n",
193
  " pn.Column(prompt, run_button, margin=5),\n",
194
  " pn.Card(\n",
195
- " pn.Column(\"Chain type:\", select_chain_type, select_k),\n",
 
196
  " title=\"Advanced settings\", margin=10\n",
197
  " ), width=600\n",
198
  ")"
@@ -206,23 +217,23 @@
206
  "outputs": [],
207
  "source": [
208
  "def qa(file, query, chain_type, k):\n",
209
- "# load document\n",
210
  " loader = PyPDFLoader(file)\n",
211
  " documents = loader.load()\n",
212
- "# split the documents into chunks\n",
213
  " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
214
  " texts = text_splitter.split_documents(documents)\n",
215
- "# select which embeddings we want to use\n",
216
  " embeddings = OpenAIEmbeddings()\n",
217
- "# create the vectorestore to use as the index\n",
218
- " b = Chroma.from_documents(texts, embeddings)\n",
219
- "# expose this index in a retriever interface\n",
220
  " retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n",
221
- "# create a chain to answer questions\n",
222
  " qa = RetrievalQA.from_chain_type(\n",
223
- " llm = OpenAI(), chain_type = chain_type, retriever=retriever, return_source_documents=True)\n",
224
  " result = qa({\"query\": query})\n",
225
- " print (result ['result'])\n",
226
  " return result"
227
  ]
228
  },
@@ -233,17 +244,18 @@
233
  "metadata": {},
234
  "outputs": [],
235
  "source": [
236
- "convos = [] # store all panel objects in a list\n",
237
- "def qa_result(_):\n",
238
- " os.environ[\"'OPENAI_API_KEY\"] = openaikey.value\n",
239
  "\n",
240
- " # save pdf file to a temp file\n",
 
 
 
241
  " if file_input.value is not None:\n",
242
- " file_input.save (\"/.cache/temp.pdf\")\n",
243
- "\n",
244
  " prompt_text = prompt.value\n",
245
  " if prompt_text:\n",
246
- " result = qa(file=\"/.cache/temp-pdf\", query=prompt_text, chain_type=select_chain_type. value, k=select_k.value)\n",
247
  " convos.extend([\n",
248
  " pn.Row(\n",
249
  " pn.panel(\"\\U0001F60A\", width=10),\n",
@@ -255,7 +267,7 @@
255
  " pn.Column(\n",
256
  " result[\"result\"],\n",
257
  " \"Relevant source text:\",\n",
258
- " pn.panel.Markdown('\\n----------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n",
259
  " )\n",
260
  " )\n",
261
  " ])\n",
@@ -377,12 +389,13 @@
377
  " pn.pane.Markdown(\"\"\"\n",
378
  " ## \\U0001F60A! Question Answering with your PDF file\n",
379
  " \n",
380
- " 1) Upload a PDF. 2) Enter OpenAI API key. This costs $. Set up billing at OpenAl. 3) Type a question and click \"Run\".\n",
381
  " \n",
382
  " \"\"\"),\n",
383
- " pn.Row(file_input, openaikey),\n",
384
  " output,\n",
385
  " widgets\n",
 
386
  ").servable()"
387
  ]
388
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": null,
6
  "id": "409a312d",
7
  "metadata": {},
8
  "outputs": [
 
106
  }
107
  ],
108
  "source": [
109
+ "#!pip install langchain openai chromadb tiktoken pypdf panel"
110
  ]
111
  },
112
  {
113
  "cell_type": "code",
114
+ "execution_count": null,
115
  "id": "dcbbf6df",
116
  "metadata": {},
117
  "outputs": [
 
147
  }
148
  ],
149
  "source": [
150
+ "import os \n",
151
  "from langchain.chains import RetrievalQA\n",
152
  "from langchain.llms import OpenAI\n",
153
  "from langchain.document_loaders import TextLoader\n",
 
157
  "from langchain.embeddings import OpenAIEmbeddings\n",
158
  "from langchain.vectorstores import Chroma\n",
159
  "import panel as pn\n",
160
+ "import tempfile"
161
+ ]
162
+ },
163
+ {
164
+ "cell_type": "code",
165
+ "execution_count": null,
166
+ "id": "9d891458",
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": [
170
+ "pn.extension('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n",
171
  "pn.state.template.param.update(\n",
172
  " main_max_width=\"690px\",\n",
173
  " header_background=\"#F08080\",\n",
 
181
  "metadata": {},
182
  "outputs": [],
183
  "source": [
 
 
184
  "file_input = pn.widgets.FileInput(width=300)\n",
185
+ "\n",
186
  "openaikey = pn.widgets.PasswordInput(\n",
187
  " value=\"\", placeholder=\"Enter your OpenAI API Key here...\", width=300\n",
188
  ")\n",
189
  "prompt = pn.widgets.TextEditor(\n",
190
+ " value=\"\", placeholder=\"Enter your questions here...\", height=160, toolbar=False\n",
191
  ")\n",
192
  "run_button = pn.widgets.Button(name=\"Run!\")\n",
193
  "\n",
 
195
  " name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n",
196
  ")\n",
197
  "select_chain_type = pn.widgets.RadioButtonGroup(\n",
198
+ " name='Chain type', \n",
199
  " options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"]\n",
200
  ")\n",
201
+ "\n",
202
  "widgets = pn.Row(\n",
203
  " pn.Column(prompt, run_button, margin=5),\n",
204
  " pn.Card(\n",
205
+ " \"Chain type:\",\n",
206
+ " pn.Column(select_chain_type, select_k),\n",
207
  " title=\"Advanced settings\", margin=10\n",
208
  " ), width=600\n",
209
  ")"
 
217
  "outputs": [],
218
  "source": [
219
  "def qa(file, query, chain_type, k):\n",
220
+ " # load document\n",
221
  " loader = PyPDFLoader(file)\n",
222
  " documents = loader.load()\n",
223
+ " # split the documents into chunks\n",
224
  " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
225
  " texts = text_splitter.split_documents(documents)\n",
226
+ " # select which embeddings we want to use\n",
227
  " embeddings = OpenAIEmbeddings()\n",
228
+ " # create the vectorestore to use as the index\n",
229
+ " db = Chroma.from_documents(texts, embeddings)\n",
230
+ " # expose this index in a retriever interface\n",
231
  " retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n",
232
+ " # create a chain to answer questions \n",
233
  " qa = RetrievalQA.from_chain_type(\n",
234
+ " llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)\n",
235
  " result = qa({\"query\": query})\n",
236
+ " print(result['result'])\n",
237
  " return result"
238
  ]
239
  },
 
244
  "metadata": {},
245
  "outputs": [],
246
  "source": [
247
+ "convos = [] # store all panel objects in a list\n",
 
 
248
  "\n",
249
+ "def qa_result(_):\n",
250
+ " os.environ[\"OPENAI_API_KEY\"] = openaikey.value\n",
251
+ " \n",
252
+ " # save pdf file to a temp file \n",
253
  " if file_input.value is not None:\n",
254
+ " file_input.save(\"/.cache/temp.pdf\")\n",
255
+ " \n",
256
  " prompt_text = prompt.value\n",
257
  " if prompt_text:\n",
258
+ " result = qa(file=\"/.cache/temp.pdf\", query=prompt_text, chain_type=select_chain_type.value, k=select_k.value)\n",
259
  " convos.extend([\n",
260
  " pn.Row(\n",
261
  " pn.panel(\"\\U0001F60A\", width=10),\n",
 
267
  " pn.Column(\n",
268
  " result[\"result\"],\n",
269
  " \"Relevant source text:\",\n",
270
+ " pn.pane.Markdown('\\n--------------------------------------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n",
271
  " )\n",
272
  " )\n",
273
  " ])\n",
 
389
  " pn.pane.Markdown(\"\"\"\n",
390
  " ## \\U0001F60A! Question Answering with your PDF file\n",
391
  " \n",
392
+ " 1) Upload a PDF. 2) Enter OpenAI API key. This costs $. Set up billing at [OpenAI](https://platform.openai.com/account). 3) Type a question and click \"Run\".\n",
393
  " \n",
394
  " \"\"\"),\n",
395
+ " pn.Row(file_input,openaikey),\n",
396
  " output,\n",
397
  " widgets\n",
398
+ "\n",
399
  ").servable()"
400
  ]
401
  }