Ubai commited on
Commit
ca7da68
·
verified ·
1 Parent(s): f658d2c

Upload LangChain_QA_Panel_App.ipynb

Browse files
Files changed (1) hide show
  1. LangChain_QA_Panel_App.ipynb +273 -0
LangChain_QA_Panel_App.ipynb ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "04815d1b-44ee-4bd3-878e-fa0c3bf9fa7f",
6
+ "metadata": {
7
+ "tags": [],
8
+ "id": "04815d1b-44ee-4bd3-878e-fa0c3bf9fa7f"
9
+ },
10
+ "source": [
11
+ "# LangChain QA Panel App"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": null,
17
+ "id": "a181568b-9cde-4a55-a853-4d2a41dbfdad",
18
+ "metadata": {
19
+ "tags": [],
20
+ "id": "a181568b-9cde-4a55-a853-4d2a41dbfdad"
21
+ },
22
+ "outputs": [],
23
+ "source": [
24
+ "!pip install langchain openai chromadb tiktoken pypdf panel"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "id": "9a464409-d064-4766-a9cb-5119f6c4b8f5",
31
+ "metadata": {
32
+ "tags": [],
33
+ "id": "9a464409-d064-4766-a9cb-5119f6c4b8f5"
34
+ },
35
+ "outputs": [],
36
+ "source": [
37
+ "import os\n",
38
+ "from langchain.chains import RetrievalQA\n",
39
+ "from langchain.llms import OpenAI\n",
40
+ "from langchain.document_loaders import TextLoader\n",
41
+ "from langchain.document_loaders import PyPDFLoader\n",
42
+ "from langchain.indexes import VectorstoreIndexCreator\n",
43
+ "from langchain.text_splitter import CharacterTextSplitter\n",
44
+ "from langchain.embeddings import OpenAIEmbeddings\n",
45
+ "from langchain.vectorstores import Chroma\n",
46
+ "from langchain.embeddings import HuggingFaceEmbeddings\n",
47
+ "from langchain import HuggingFaceHub\n",
48
+ "import panel as pn\n",
49
+ "import tempfile\n"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": null,
55
+ "id": "b2d07ea5-9ff2-4c96-a8dc-92895d870b73",
56
+ "metadata": {
57
+ "tags": [],
58
+ "id": "b2d07ea5-9ff2-4c96-a8dc-92895d870b73"
59
+ },
60
+ "outputs": [],
61
+ "source": [
62
+ "pn.extension('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n",
63
+ "pn.state.template.param.update(\n",
64
+ " main_max_width=\"690px\",\n",
65
+ " header_background=\"#F08080\",\n",
66
+ ")"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": null,
72
+ "id": "763db4d0-3436-41d3-8b0f-e66ce16468cd",
73
+ "metadata": {
74
+ "tags": [],
75
+ "id": "763db4d0-3436-41d3-8b0f-e66ce16468cd"
76
+ },
77
+ "outputs": [],
78
+ "source": [
79
+ "file_input = pn.widgets.FileInput(width=300)\n",
80
+ "\n",
81
+ "hfkey = pn.widgets.PasswordInput(\n",
82
+ " value=\"\", placeholder=\"Enter your Huggingface Key...\", width=300\n",
83
+ ")\n",
84
+ "prompt = pn.widgets.TextEditor(\n",
85
+ " value=\"\", placeholder=\"Enter your questions here...\", height=160, toolbar=False\n",
86
+ ")\n",
87
+ "run_button = pn.widgets.Button(name=\"Run!\")\n",
88
+ "\n",
89
+ "select_k = pn.widgets.IntSlider(\n",
90
+ " name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n",
91
+ ")\n",
92
+ "select_chain_type = pn.widgets.RadioButtonGroup(\n",
93
+ " name='Chain type',\n",
94
+ " options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"],\n",
95
+ " value='map_reduce'\n",
96
+ ")\n",
97
+ "\n",
98
+ "widgets = pn.Row(\n",
99
+ " pn.Column(prompt, run_button, margin=5),\n",
100
+ " pn.Card(\n",
101
+ " \"Chain type:\",\n",
102
+ " pn.Column(select_chain_type, select_k),\n",
103
+ " title=\"Advanced settings\"\n",
104
+ " ), width=670\n",
105
+ ")"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": null,
111
+ "id": "9b83cc06-3401-498f-8f84-8a98370f3121",
112
+ "metadata": {
113
+ "tags": [],
114
+ "id": "9b83cc06-3401-498f-8f84-8a98370f3121"
115
+ },
116
+ "outputs": [],
117
+ "source": [
118
+ "def qa(file, query, chain_type, k):\n",
119
+ " # load document\n",
120
+ " loader = PyPDFLoader(file)\n",
121
+ " documents = loader.load()\n",
122
+ " # split the documents into chunks\n",
123
+ " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
124
+ " texts = text_splitter.split_documents(documents)\n",
125
+ " # select which embeddings we want to use\n",
126
+ " #embeddings = OpenAIEmbeddings()\n",
127
+ " embeddings = HuggingFaceEmbeddings()\n",
128
+ " # create the vectorestore to use as the index\n",
129
+ " db = Chroma.from_documents(texts, embeddings)\n",
130
+ " # expose this index in a retriever interface\n",
131
+ " retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n",
132
+ " # create a chain to answer questions\n",
133
+ " qa = RetrievalQA.from_chain_type(\n",
134
+ " llm = HuggingFaceHub(), chain_type=chain_type, retriever=retriever, return_source_documents=True)\n",
135
+ " result = qa({\"query\": query})\n",
136
+ " print(result['result'])\n",
137
+ " return result"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": null,
143
+ "id": "2722f43b-daf6-4d17-a842-41203ae9b140",
144
+ "metadata": {
145
+ "tags": [],
146
+ "id": "2722f43b-daf6-4d17-a842-41203ae9b140"
147
+ },
148
+ "outputs": [],
149
+ "source": [
150
+ "# os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
151
+ "# result = qa(\"materials/example.pdf\", \"When was GPT-2 created?\", \"map_reduce\", 2)"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": null,
157
+ "id": "60e1b3d3-c0d2-4260-ae0c-26b03f1b8824",
158
+ "metadata": {
159
+ "id": "60e1b3d3-c0d2-4260-ae0c-26b03f1b8824"
160
+ },
161
+ "outputs": [],
162
+ "source": [
163
+ "convos = [] # store all panel objects in a list\n",
164
+ "\n",
165
+ "def qa_result(_):\n",
166
+ " #os.environ[\"OPENAI_API_KEY\"] = openaikey.value\n",
167
+ " os.environ['HUGGING_FACE_HUB_API_KEY'] = hfkey.value\n",
168
+ "\n",
169
+ " # save pdf file to a temp file\n",
170
+ " if file_input.value is not None:\n",
171
+ " file_input.save(\"/.cache/temp.pdf\")\n",
172
+ "\n",
173
+ " prompt_text = prompt.value\n",
174
+ " if prompt_text:\n",
175
+ " result = qa(file=\"/.cache/temp.pdf\", query=prompt_text, chain_type=select_chain_type.value, k=select_k.value)\n",
176
+ " convos.extend([\n",
177
+ " pn.Row(\n",
178
+ " pn.panel(\"\\U0001F60A\", width=10),\n",
179
+ " prompt_text,\n",
180
+ " width=600\n",
181
+ " ),\n",
182
+ " pn.Row(\n",
183
+ " pn.panel(\"\\U0001F916\", width=10),\n",
184
+ " pn.Column(\n",
185
+ " result[\"result\"],\n",
186
+ " \"Relevant source text:\",\n",
187
+ " pn.pane.Markdown('\\n--------------------------------------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n",
188
+ " )\n",
189
+ " )\n",
190
+ " ])\n",
191
+ " return pn.Column(*convos, margin=15, width=575, min_height=400)\n"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": null,
197
+ "id": "c3a70857-0b98-4f62-a9c0-b62ca42b474c",
198
+ "metadata": {
199
+ "tags": [],
200
+ "id": "c3a70857-0b98-4f62-a9c0-b62ca42b474c"
201
+ },
202
+ "outputs": [],
203
+ "source": [
204
+ "qa_interactive = pn.panel(\n",
205
+ " pn.bind(qa_result, run_button),\n",
206
+ " loading_indicator=True,\n",
207
+ ")"
208
+ ]
209
+ },
210
+ {
211
+ "cell_type": "code",
212
+ "execution_count": null,
213
+ "id": "228e2b42-b1ed-43af-b923-031a70241ab0",
214
+ "metadata": {
215
+ "tags": [],
216
+ "id": "228e2b42-b1ed-43af-b923-031a70241ab0"
217
+ },
218
+ "outputs": [],
219
+ "source": [
220
+ "output = pn.WidgetBox('*Output will show up here:*', qa_interactive, width=670, scroll=True)"
221
+ ]
222
+ },
223
+ {
224
+ "cell_type": "code",
225
+ "execution_count": null,
226
+ "id": "1b0ec253-2bcd-4f91-96d8-d8456e900a58",
227
+ "metadata": {
228
+ "tags": [],
229
+ "id": "1b0ec253-2bcd-4f91-96d8-d8456e900a58"
230
+ },
231
+ "outputs": [],
232
+ "source": [
233
+ "# layout\n",
234
+ "pn.Column(\n",
235
+ " pn.pane.Markdown(\"\"\"\n",
236
+ " ## \\U0001F60A! Question Answering with your PDF file\n",
237
+ "\n",
238
+ " 1) Upload a PDF. 2) Enter OpenAI API key. This costs $. Set up billing at [OpenAI](https://platform.openai.com/account). 3) Type a question and click \"Run\"\n",
239
+ "\n",
240
+ " \"\"\"),\n",
241
+ " pn.Row(file_input,hfkey),\n",
242
+ " output,\n",
243
+ " widgets\n",
244
+ "\n",
245
+ ").servable()"
246
+ ]
247
+ }
248
+ ],
249
+ "metadata": {
250
+ "kernelspec": {
251
+ "display_name": "Python 3 (ipykernel)",
252
+ "language": "python",
253
+ "name": "python3"
254
+ },
255
+ "language_info": {
256
+ "codemirror_mode": {
257
+ "name": "ipython",
258
+ "version": 3
259
+ },
260
+ "file_extension": ".py",
261
+ "mimetype": "text/x-python",
262
+ "name": "python",
263
+ "nbconvert_exporter": "python",
264
+ "pygments_lexer": "ipython3",
265
+ "version": "3.10.11"
266
+ },
267
+ "colab": {
268
+ "provenance": []
269
+ }
270
+ },
271
+ "nbformat": 4,
272
+ "nbformat_minor": 5
273
+ }