Ubai commited on
Commit
b0781d5
·
verified ·
1 Parent(s): 5455ea8

Delete LangChain_PDF1.ipynb

Browse files
Files changed (1) hide show
  1. LangChain_PDF1.ipynb +0 -258
LangChain_PDF1.ipynb DELETED
@@ -1,258 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "04815d1b-44ee-4bd3-878e-fa0c3bf9fa7f",
6
- "metadata": {
7
- "tags": []
8
- },
9
- "source": [
10
- "# LangChain QA Panel App\n",
11
- "\n",
12
- "This notebook shows how to make this app:"
13
- ]
14
- },
15
- {
16
- "cell_type": "code",
17
- "execution_count": null,
18
- "id": "a181568b-9cde-4a55-a853-4d2a41dbfdad",
19
- "metadata": {
20
- "tags": []
21
- },
22
- "outputs": [],
23
- "source": [
24
- "#!pip install langchain openai chromadb tiktoken pypdf panel\n"
25
- ]
26
- },
27
- {
28
- "cell_type": "code",
29
- "execution_count": null,
30
- "id": "9a464409-d064-4766-a9cb-5119f6c4b8f5",
31
- "metadata": {
32
- "tags": []
33
- },
34
- "outputs": [],
35
- "source": [
36
- "import os \n",
37
- "from langchain.chains import RetrievalQA\n",
38
- "from langchain.llms import OpenAI\n",
39
- "from langchain.document_loaders import TextLoader\n",
40
- "from langchain.document_loaders import PyPDFLoader\n",
41
- "from langchain.indexes import VectorstoreIndexCreator\n",
42
- "from langchain.text_splitter import CharacterTextSplitter\n",
43
- "from langchain.embeddings import OpenAIEmbeddings\n",
44
- "from langchain.embeddings import HuggingFaceEmbeddings\n",
45
- "from langchain.vectorstores import Chroma\n",
46
- "import panel as pn\n",
47
- "import tempfile\n"
48
- ]
49
- },
50
- {
51
- "cell_type": "code",
52
- "execution_count": null,
53
- "id": "b2d07ea5-9ff2-4c96-a8dc-92895d870b73",
54
- "metadata": {
55
- "tags": []
56
- },
57
- "outputs": [],
58
- "source": [
59
- "pn.extension('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n",
60
- "pn.state.template.param.update(\n",
61
- " main_max_width=\"690px\",\n",
62
- " header_background=\"#F08080\",\n",
63
- ")"
64
- ]
65
- },
66
- {
67
- "cell_type": "code",
68
- "execution_count": null,
69
- "id": "763db4d0-3436-41d3-8b0f-e66ce16468cd",
70
- "metadata": {
71
- "tags": []
72
- },
73
- "outputs": [],
74
- "source": [
75
- "file_input = pn.widgets.FileInput(width=300)\n",
76
- "\n",
77
- "openaikey = pn.widgets.PasswordInput(\n",
78
- " value=\"\", placeholder=\"Enter your OpenAI API Key here...\", width=300\n",
79
- ")\n",
80
- "prompt = pn.widgets.TextEditor(\n",
81
- " value=\"\", placeholder=\"Enter your questions here...\", height=160, toolbar=False\n",
82
- ")\n",
83
- "run_button = pn.widgets.Button(name=\"Run!\")\n",
84
- "\n",
85
- "select_k = pn.widgets.IntSlider(\n",
86
- " name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n",
87
- ")\n",
88
- "select_chain_type = pn.widgets.RadioButtonGroup(\n",
89
- " name='Chain type', \n",
90
- " options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"]\n",
91
- ")\n",
92
- "\n",
93
- "widgets = pn.Row(\n",
94
- " pn.Column(prompt, run_button, margin=5),\n",
95
- " pn.Card(\n",
96
- " \"Chain type:\",\n",
97
- " pn.Column(select_chain_type, select_k),\n",
98
- " title=\"Advanced settings\", margin=10\n",
99
- " ), width=600\n",
100
- ")"
101
- ]
102
- },
103
- {
104
- "cell_type": "code",
105
- "execution_count": null,
106
- "id": "9b83cc06-3401-498f-8f84-8a98370f3121",
107
- "metadata": {
108
- "tags": []
109
- },
110
- "outputs": [],
111
- "source": [
112
- "def qa(file, query, chain_type, k):\n",
113
- " # load document\n",
114
- " loader = PyPDFLoader(file)\n",
115
- " documents = loader.load()\n",
116
- " # split the documents into chunks\n",
117
- " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
118
- " texts = text_splitter.split_documents(documents)\n",
119
- " # select which embeddings we want to use\n",
120
- " #embeddings = OpenAIEmbeddings()\n",
121
- " embeddings = HuggingFaceEmbeddings()\n",
122
- " # create the vectorestore to use as the index\n",
123
- " db = Chroma.from_documents(texts, embeddings)\n",
124
- " # expose this index in a retriever interface\n",
125
- " retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n",
126
- " # create a chain to answer questions \n",
127
- " qa = RetrievalQA.from_chain_type(\n",
128
- " llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)\n",
129
- llm=HuggingFaceHub(huggingfacehub_api_token = os.environ['HUGGING_FACE_HUB_API_KEY]), repo_id=repo_id, model_kwargs={'temperature':0.2, 'max_lenght':1000}, chain_type=chain_type, retriever=retriever, return_source_documents=True)\n",
130
- " result = qa({\"query\": query})\n",
131
- " print(result['result'])\n",
132
- " return result"
133
- ]
134
- },
135
- {
136
- "cell_type": "code",
137
- "execution_count": null,
138
- "id": "2722f43b-daf6-4d17-a842-41203ae9b140",
139
- "metadata": {
140
- "tags": []
141
- },
142
- "outputs": [],
143
- "source": [
144
- "# result = qa(\"example.pdf\", \"what is the total number of AI publications?\")"
145
- ]
146
- },
147
- {
148
- "cell_type": "code",
149
- "execution_count": null,
150
- "id": "60e1b3d3-c0d2-4260-ae0c-26b03f1b8824",
151
- "metadata": {},
152
- "outputs": [],
153
- "source": [
154
- "convos = [] # store all panel objects in a list\n",
155
- "\n",
156
- "def qa_result(_):\n",
157
- " os.environ[\"OPENAI_API_KEY\"] = openaikey.value\n",
158
- " \n",
159
- " # save pdf file to a temp file \n",
160
- " if file_input.value is not None:\n",
161
- " file_input.save(\"/.cache/temp.pdf\")\n",
162
- " \n",
163
- " prompt_text = prompt.value\n",
164
- " if prompt_text:\n",
165
- " result = qa(file=\"/.cache/temp.pdf\", query=prompt_text, chain_type=select_chain_type.value, k=select_k.value)\n",
166
- " convos.extend([\n",
167
- " pn.Row(\n",
168
- " pn.panel(\"\\U0001F60A\", width=10),\n",
169
- " prompt_text,\n",
170
- " width=600\n",
171
- " ),\n",
172
- " pn.Row(\n",
173
- " pn.panel(\"\\U0001F916\", width=10),\n",
174
- " pn.Column(\n",
175
- " result[\"result\"],\n",
176
- " \"Relevant source text:\",\n",
177
- " pn.pane.Markdown('\\n--------------------------------------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n",
178
- " )\n",
179
- " )\n",
180
- " ])\n",
181
- " #return convos\n",
182
- " return pn.Column(*convos, margin=15, width=575, min_height=400)\n"
183
- ]
184
- },
185
- {
186
- "cell_type": "code",
187
- "execution_count": null,
188
- "id": "c3a70857-0b98-4f62-a9c0-b62ca42b474c",
189
- "metadata": {
190
- "tags": []
191
- },
192
- "outputs": [],
193
- "source": [
194
- "qa_interactive = pn.panel(\n",
195
- " pn.bind(qa_result, run_button),\n",
196
- " loading_indicator=True,\n",
197
- ")"
198
- ]
199
- },
200
- {
201
- "cell_type": "code",
202
- "execution_count": null,
203
- "id": "228e2b42-b1ed-43af-b923-031a70241ab0",
204
- "metadata": {
205
- "tags": []
206
- },
207
- "outputs": [],
208
- "source": [
209
- "output = pn.WidgetBox('*Output will show up here:*', qa_interactive, width=630, scroll=True)"
210
- ]
211
- },
212
- {
213
- "cell_type": "code",
214
- "execution_count": null,
215
- "id": "1b0ec253-2bcd-4f91-96d8-d8456e900a58",
216
- "metadata": {
217
- "tags": []
218
- },
219
- "outputs": [],
220
- "source": [
221
- "# layout\n",
222
- "pn.Column(\n",
223
- " pn.pane.Markdown(\"\"\"\n",
224
- " ## Talk with with your PDF \n",
225
- " \n",
226
- " 1) Submit a PDF file.\n 2) Input your OpenAI API key.\n 3) Type a question and Press \"Run\".\n",
227
- " \n",
228
- " \"\"\"),\n",
229
- " pn.Row(file_input,openaikey),\n",
230
- " output,\n",
231
- " widgets\n",
232
- "\n",
233
- ").servable()"
234
- ]
235
- }
236
- ],
237
- "metadata": {
238
- "kernelspec": {
239
- "display_name": "Python 3 (ipykernel)",
240
- "language": "python",
241
- "name": "python3"
242
- },
243
- "language_info": {
244
- "codemirror_mode": {
245
- "name": "ipython",
246
- "version": 3
247
- },
248
- "file_extension": ".py",
249
- "mimetype": "text/x-python",
250
- "name": "python",
251
- "nbconvert_exporter": "python",
252
- "pygments_lexer": "ipython3",
253
- "version": "3.10.10"
254
- }
255
- },
256
- "nbformat": 4,
257
- "nbformat_minor": 5
258
- }