kaansahin commited on
Commit
b999f66
·
1 Parent(s): 21cc236

Upload 4 files

Browse files
Files changed (3) hide show
  1. Dockerfile +20 -0
  2. app.ipynb +202 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ WORKDIR /code
7
+
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ RUN python3 -m pip install --no-cache-dir --upgrade pip
11
+ RUN python3 -m pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
14
+
15
+ COPY . .
16
+
17
+ CMD ["panel", "serve", "/code/app.ipynb", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "kaansahin-communicatelocalpdf.hf.space", "--allow-websocket-origin", "0.0.0.0:7860"]
18
+
19
+
20
+
app.ipynb ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "#!pip install langchain openai chromadb tiktoken pypdf panel"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": null,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "import os \n",
19
+ "from langchain.chains import RetrievalQA\n",
20
+ "from langchain.llms import OpenAI\n",
21
+ "from langchain.document_loaders import TextLoader\n",
22
+ "from langchain.document_loaders import PyPDFLoader\n",
23
+ "from langchain.indexes import VectorstoreIndexCreator\n",
24
+ "from langchain.text_splitter import CharacterTextSplitter\n",
25
+ "from langchain.embeddings import OpenAIEmbeddings\n",
26
+ "from langchain.vectorstores import Chroma\n",
27
+ "import panel as pn\n",
28
+ "import tempfile"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": null,
34
+ "metadata": {},
35
+ "outputs": [],
36
+ "source": [
37
+ "pn.extension('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n",
38
+ "pn.state.template.param.update(\n",
39
+ " main_max_width=\"690px\",\n",
40
+ " header_background=\"#F08080\",\n",
41
+ ")"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": null,
47
+ "metadata": {},
48
+ "outputs": [],
49
+ "source": [
50
+ "file_input = pn.widgets.FileInput(width=300)\n",
51
+ "\n",
52
+ "openaikey = pn.widgets.PasswordInput(\n",
53
+ " value=\"\", placeholder=\"Enter your OpenAI API Key here...\", width=300\n",
54
+ ")\n",
55
+ "prompt = pn.widgets.TextEditor(\n",
56
+ " value=\"\", placeholder=\"Enter your questions here...\", height=160, toolbar=False\n",
57
+ ")\n",
58
+ "run_button = pn.widgets.Button(name=\"Run!\")\n",
59
+ "\n",
60
+ "select_k = pn.widgets.IntSlider(\n",
61
+ " name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n",
62
+ ")\n",
63
+ "select_chain_type = pn.widgets.RadioButtonGroup(\n",
64
+ " name='Chain type', \n",
65
+ " options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"]\n",
66
+ ")\n",
67
+ "\n",
68
+ "widgets = pn.Row(\n",
69
+ " pn.Column(prompt, run_button, margin=5),\n",
70
+ " pn.Card(\n",
71
+ " \"Chain type:\",\n",
72
+ " pn.Column(select_chain_type, select_k),\n",
73
+ " title=\"Advanced settings\", margin=10\n",
74
+ " ), width=600\n",
75
+ ")"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": null,
81
+ "metadata": {},
82
+ "outputs": [],
83
+ "source": [
84
+ "def qa(file, query, chain_type, k):\n",
85
+ " # load document\n",
86
+ " loader = PyPDFLoader(file)\n",
87
+ " documents = loader.load()\n",
88
+ " # split the documents into chunks\n",
89
+ " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
90
+ " texts = text_splitter.split_documents(documents)\n",
91
+ " # select which embeddings we want to use\n",
92
+ " embeddings = OpenAIEmbeddings()\n",
93
+ " # create the vectorestore to use as the index\n",
94
+ " db = Chroma.from_documents(texts, embeddings)\n",
95
+ " # expose this index in a retriever interface\n",
96
+ " retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n",
97
+ " # create a chain to answer questions \n",
98
+ " qa = RetrievalQA.from_chain_type(\n",
99
+ " llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)\n",
100
+ " result = qa({\"query\": query})\n",
101
+ " print(result['result'])\n",
102
+ " return result"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": null,
108
+ "metadata": {},
109
+ "outputs": [],
110
+ "source": [
111
+ "# result = qa(\"example.pdf\", \"what is the total number of AI publications?\")"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": null,
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": [
120
+ "convos = [] # store all panel objects in a list\n",
121
+ "\n",
122
+ "def qa_result(_):\n",
123
+ " os.environ[\"OPENAI_API_KEY\"] = openaikey.value\n",
124
+ " \n",
125
+ " # save pdf file to a temp file \n",
126
+ " if file_input.value is not None:\n",
127
+ " file_input.save(\"/.cache/temp.pdf\")\n",
128
+ " \n",
129
+ " prompt_text = prompt.value\n",
130
+ " if prompt_text:\n",
131
+ " result = qa(file=\"/.cache/temp.pdf\", query=prompt_text, chain_type=select_chain_type.value, k=select_k.value)\n",
132
+ " convos.extend([\n",
133
+ " pn.Row(\n",
134
+ " pn.panel(\"\\U0001F60A\", width=10),\n",
135
+ " prompt_text,\n",
136
+ " width=600\n",
137
+ " ),\n",
138
+ " pn.Row(\n",
139
+ " pn.panel(\"\\U0001F916\", width=10),\n",
140
+ " pn.Column(\n",
141
+ " result[\"result\"],\n",
142
+ " \"Relevant source text:\",\n",
143
+ " pn.pane.Markdown('\\n--------------------------------------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n",
144
+ " )\n",
145
+ " )\n",
146
+ " ])\n",
147
+ " #return convos\n",
148
+ " return pn.Column(*convos, margin=15, width=575, min_height=400)"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": null,
154
+ "metadata": {},
155
+ "outputs": [],
156
+ "source": [
157
+ "qa_interactive = pn.panel(\n",
158
+ " pn.bind(qa_result, run_button),\n",
159
+ " loading_indicator=True,\n",
160
+ ")"
161
+ ]
162
+ },
163
+ {
164
+ "cell_type": "code",
165
+ "execution_count": null,
166
+ "metadata": {},
167
+ "outputs": [],
168
+ "source": [
169
+ "output = pn.WidgetBox('*Output will show up here:*', qa_interactive, width=630, scroll=True)"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": null,
175
+ "metadata": {},
176
+ "outputs": [],
177
+ "source": [
178
+ "# layout\n",
179
+ "pn.Column(\n",
180
+ " pn.pane.Markdown(\"\"\"\n",
181
+ " ## \\U0001F60A! Question Answering with your PDF file\n",
182
+ " \n",
183
+ " 1) Upload a PDF. 2) Enter OpenAI API key. This costs $. Set up billing at [OpenAI](https://platform.openai.com/account). 3) Type a question and click \"Run\".\n",
184
+ " \n",
185
+ " \"\"\"),\n",
186
+ " pn.Row(file_input,openaikey),\n",
187
+ " output,\n",
188
+ " widgets\n",
189
+ "\n",
190
+ ").servable()"
191
+ ]
192
+ }
193
+ ],
194
+ "metadata": {
195
+ "language_info": {
196
+ "name": "python"
197
+ },
198
+ "orig_nbformat": 4
199
+ },
200
+ "nbformat": 4,
201
+ "nbformat_minor": 2
202
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ chromadb
4
+ pypdf
5
+ tiktoken
6
+ panel
7
+ notebook