Spaces:

chomakov
/

GPT-4_PDF_summary

Runtime error

App Files Files Community

chomakov commited on Apr 11, 2023

Commit

eb87d78

1 Parent(s): 69f964f

Upload 2 files

Browse files

Files changed (2) hide show

Dockerfile +1 -1
GPT-4_PDF_summary.ipynb +42 -29

Dockerfile CHANGED Viewed

@@ -10,7 +10,7 @@ RUN python3 -m pip install --no-cache-dir --upgrade -r requirements.txt
 COPY . .
-CMD ["panel", "serve", "/GPT-4_PDF_summary.ipynb", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "chomakov-GPT-4_PDF_summary.hf.space", "--allow-websocket-origin", "0.0.0.0:7860"]
 RUN mkdir /.cache
 RUN chmod 777 /.cache

 COPY . .
+CMD ["panel", "serve", "/GPT-4_PDF_summary.ipynb", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "https://chomakov-gpt-4-pdf-summary.hf.space", "--allow-websocket-origin", "0.0.0.0:7860"]
 RUN mkdir /.cache
 RUN chmod 777 /.cache

GPT-4_PDF_summary.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
    "id": "409a312d",
    "metadata": {},
    "outputs": [
@@ -106,12 +106,12 @@
     }
    ],
    "source": [
-    "!pip install langchain openai chromadb tiktoken pypdf panel"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "id": "dcbbf6df",
    "metadata": {},
    "outputs": [
@@ -147,7 +147,7 @@
     }
    ],
    "source": [
-    "import os\n",
     "from langchain.chains import RetrievalQA\n",
     "from langchain.llms import OpenAI\n",
     "from langchain.document_loaders import TextLoader\n",
@@ -157,7 +157,17 @@
     "from langchain.embeddings import OpenAIEmbeddings\n",
     "from langchain.vectorstores import Chroma\n",
     "import panel as pn\n",
-    "pn.extension ('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n",
     "pn.state.template.param.update(\n",
     "    main_max_width=\"690px\",\n",
     "    header_background=\"#F08080\",\n",
@@ -171,14 +181,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import panel as pn\n",
-    "\n",
     "file_input = pn.widgets.FileInput(width=300)\n",
     "openaikey = pn.widgets.PasswordInput(\n",
     "    value=\"\", placeholder=\"Enter your OpenAI API Key here...\", width=300\n",
     ")\n",
     "prompt = pn.widgets.TextEditor(\n",
-    "    value='', placeholder=\"Enter your questions here...\", height=160, toolbar=False\n",
     ")\n",
     "run_button = pn.widgets.Button(name=\"Run!\")\n",
     "\n",
@@ -186,13 +195,15 @@
     "    name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n",
     ")\n",
     "select_chain_type = pn.widgets.RadioButtonGroup(\n",
-    "    name='Chain type',\n",
     "    options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"]\n",
     ")\n",
     "widgets = pn.Row(\n",
     "    pn.Column(prompt, run_button, margin=5),\n",
     "    pn.Card(\n",
-    "        pn.Column(\"Chain type:\", select_chain_type, select_k),\n",
     "        title=\"Advanced settings\", margin=10\n",
     "    ), width=600\n",
     ")"
@@ -206,23 +217,23 @@
    "outputs": [],
    "source": [
     "def qa(file, query, chain_type, k):\n",
-    "# load document\n",
     "    loader = PyPDFLoader(file)\n",
     "    documents = loader.load()\n",
-    "# split the documents into chunks\n",
     "    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
     "    texts = text_splitter.split_documents(documents)\n",
-    "# select which embeddings we want to use\n",
     "    embeddings = OpenAIEmbeddings()\n",
-    "# create the vectorestore to use as the index\n",
-    "    b = Chroma.from_documents(texts, embeddings)\n",
-    "# expose this index in a retriever interface\n",
     "    retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n",
-    "# create a chain to answer questions\n",
     "    qa = RetrievalQA.from_chain_type(\n",
-    "    llm = OpenAI(), chain_type = chain_type, retriever=retriever, return_source_documents=True)\n",
     "    result = qa({\"query\": query})\n",
-    "    print (result ['result'])\n",
     "    return result"
    ]
   },
@@ -233,17 +244,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "convos = [] # store all panel objects in a list\n",
-    "def qa_result(_):\n",
-    "    os.environ[\"'OPENAI_API_KEY\"] = openaikey.value\n",
     "\n",
-    "    # save pdf file to a temp file\n",
     "    if file_input.value is not None:\n",
-    "        file_input.save (\"/.cache/temp.pdf\")\n",
-    "\n",
     "        prompt_text = prompt.value\n",
     "        if prompt_text:\n",
-    "            result = qa(file=\"/.cache/temp-pdf\", query=prompt_text, chain_type=select_chain_type. value, k=select_k.value)\n",
     "            convos.extend([\n",
     "                pn.Row(\n",
     "                    pn.panel(\"\\U0001F60A\", width=10),\n",
@@ -255,7 +267,7 @@
     "                    pn.Column(\n",
     "                        result[\"result\"],\n",
     "                        \"Relevant source text:\",\n",
-    "                        pn.panel.Markdown('\\n----------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n",
     "                    )\n",
     "                )\n",
     "            ])\n",
@@ -377,12 +389,13 @@
     "    pn.pane.Markdown(\"\"\"\n",
     "    ## \\U0001F60A! Question Answering with your PDF file\n",
     "    \n",
-    "    1) Upload a PDF. 2) Enter OpenAI API key. This costs $. Set up billing at OpenAl. 3) Type a question and click \"Run\".\n",
     "    \n",
     "    \"\"\"),\n",
-    "    pn.Row(file_input, openaikey),\n",
     "    output,\n",
     "    widgets\n",
     ").servable()"
    ]
   }

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "409a312d",
    "metadata": {},
    "outputs": [
     }
    ],
    "source": [
+    "#!pip install langchain openai chromadb tiktoken pypdf panel"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "id": "dcbbf6df",
    "metadata": {},
    "outputs": [
     }
    ],
    "source": [
+    "import os \n",
     "from langchain.chains import RetrievalQA\n",
     "from langchain.llms import OpenAI\n",
     "from langchain.document_loaders import TextLoader\n",
     "from langchain.embeddings import OpenAIEmbeddings\n",
     "from langchain.vectorstores import Chroma\n",
     "import panel as pn\n",
+    "import tempfile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9d891458",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pn.extension('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n",
     "pn.state.template.param.update(\n",
     "    main_max_width=\"690px\",\n",
     "    header_background=\"#F08080\",\n",
    "metadata": {},
    "outputs": [],
    "source": [
     "file_input = pn.widgets.FileInput(width=300)\n",
+    "\n",
     "openaikey = pn.widgets.PasswordInput(\n",
     "    value=\"\", placeholder=\"Enter your OpenAI API Key here...\", width=300\n",
     ")\n",
     "prompt = pn.widgets.TextEditor(\n",
+    "    value=\"\", placeholder=\"Enter your questions here...\", height=160, toolbar=False\n",
     ")\n",
     "run_button = pn.widgets.Button(name=\"Run!\")\n",
     "\n",
     "    name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n",
     ")\n",
     "select_chain_type = pn.widgets.RadioButtonGroup(\n",
+    "    name='Chain type', \n",
     "    options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"]\n",
     ")\n",
+    "\n",
     "widgets = pn.Row(\n",
     "    pn.Column(prompt, run_button, margin=5),\n",
     "    pn.Card(\n",
+    "        \"Chain type:\",\n",
+    "        pn.Column(select_chain_type, select_k),\n",
     "        title=\"Advanced settings\", margin=10\n",
     "    ), width=600\n",
     ")"
    "outputs": [],
    "source": [
     "def qa(file, query, chain_type, k):\n",
+    "    # load document\n",
     "    loader = PyPDFLoader(file)\n",
     "    documents = loader.load()\n",
+    "    # split the documents into chunks\n",
     "    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
     "    texts = text_splitter.split_documents(documents)\n",
+    "    # select which embeddings we want to use\n",
     "    embeddings = OpenAIEmbeddings()\n",
+    "    # create the vectorestore to use as the index\n",
+    "    db = Chroma.from_documents(texts, embeddings)\n",
+    "    # expose this index in a retriever interface\n",
     "    retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n",
+    "    # create a chain to answer questions \n",
     "    qa = RetrievalQA.from_chain_type(\n",
+    "        llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)\n",
     "    result = qa({\"query\": query})\n",
+    "    print(result['result'])\n",
     "    return result"
    ]
   },
    "metadata": {},
    "outputs": [],
    "source": [
+    "convos = []  # store all panel objects in a list\n",
     "\n",
+    "def qa_result(_):\n",
+    "    os.environ[\"OPENAI_API_KEY\"] = openaikey.value\n",
+    "    \n",
+    "    # save pdf file to a temp file \n",
     "    if file_input.value is not None:\n",
+    "        file_input.save(\"/.cache/temp.pdf\")\n",
+    "    \n",
     "        prompt_text = prompt.value\n",
     "        if prompt_text:\n",
+    "            result = qa(file=\"/.cache/temp.pdf\", query=prompt_text, chain_type=select_chain_type.value, k=select_k.value)\n",
     "            convos.extend([\n",
     "                pn.Row(\n",
     "                    pn.panel(\"\\U0001F60A\", width=10),\n",
     "                    pn.Column(\n",
     "                        result[\"result\"],\n",
     "                        \"Relevant source text:\",\n",
+    "                        pn.pane.Markdown('\\n--------------------------------------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n",
     "                    )\n",
     "                )\n",
     "            ])\n",
     "    pn.pane.Markdown(\"\"\"\n",
     "    ## \\U0001F60A! Question Answering with your PDF file\n",
     "    \n",
+    "    1) Upload a PDF. 2) Enter OpenAI API key. This costs $. Set up billing at [OpenAI](https://platform.openai.com/account). 3) Type a question and click \"Run\".\n",
     "    \n",
     "    \"\"\"),\n",
+    "    pn.Row(file_input,openaikey),\n",
     "    output,\n",
     "    widgets\n",
+    "\n",
     ").servable()"
    ]
   }