{ "cells": [ { "cell_type": "markdown", "id": "04815d1b-44ee-4bd3-878e-fa0c3bf9fa7f", "metadata": { "tags": [] }, "source": [ "# LangChain QA Panel App\n", "\n", "This notebook shows how to make this app:" ] }, { "cell_type": "code", "execution_count": null, "id": "a181568b-9cde-4a55-a853-4d2a41dbfdad", "metadata": { "tags": [] }, "outputs": [], "source": [ "#!pip install langchain openai chromadb tiktoken pypdf panel\n" ] }, { "cell_type": "code", "execution_count": null, "id": "9a464409-d064-4766-a9cb-5119f6c4b8f5", "metadata": { "tags": [] }, "outputs": [], "source": [ "import os \n", "from langchain.chains import RetrievalQA\n", "from langchain.llms import OpenAI\n", "from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import PyPDFLoader\n", "from langchain.indexes import VectorstoreIndexCreator\n", "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.vectorstores import Chroma\n", "import panel as pn\n", "import tempfile\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b2d07ea5-9ff2-4c96-a8dc-92895d870b73", "metadata": { "tags": [] }, "outputs": [], "source": [ "pn.extension('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n", "pn.state.template.param.update(\n", " main_max_width=\"690px\",\n", " header_background=\"#F08080\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "763db4d0-3436-41d3-8b0f-e66ce16468cd", "metadata": { "tags": [] }, "outputs": [], "source": [ "file_input = pn.widgets.FileInput(width=300)\n", "\n", "openaikey = pn.widgets.PasswordInput(\n", " value=\"\", placeholder=\"Enter your OpenAI API Key here...\", width=300\n", ")\n", "prompt = pn.widgets.TextEditor(\n", " value=\"\", placeholder=\"Enter your questions here...\", height=160, toolbar=False\n", ")\n", "run_button = pn.widgets.Button(name=\"Run!\")\n", "\n", "select_k = pn.widgets.IntSlider(\n", " name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n", ")\n", "select_chain_type = pn.widgets.RadioButtonGroup(\n", " name='Chain type', \n", " options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"]\n", ")\n", "\n", "widgets = pn.Row(\n", " pn.Column(prompt, run_button, margin=5),\n", " pn.Card(\n", " \"Chain type:\",\n", " pn.Column(select_chain_type, select_k),\n", " title=\"Advanced settings\", margin=10\n", " ), width=600\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "9b83cc06-3401-498f-8f84-8a98370f3121", "metadata": { "tags": [] }, "outputs": [], "source": [ "def qa(file, query, chain_type, k):\n", " # load document\n", " loader = PyPDFLoader(file)\n", " documents = loader.load()\n", " # split the documents into chunks\n", " text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", " texts = text_splitter.split_documents(documents)\n", " # select which embeddings we want to use\n", " embeddings = OpenAIEmbeddings()\n", " # create the vectorestore to use as the index\n", " db = Chroma.from_documents(texts, embeddings)\n", " # expose this index in a retriever interface\n", " retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n", " # create a chain to answer questions \n", " qa = RetrievalQA.from_chain_type(\n", " llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)\n", " result = qa({\"query\": query})\n", " print(result['result'])\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "id": "2722f43b-daf6-4d17-a842-41203ae9b140", "metadata": { "tags": [] }, "outputs": [], "source": [ "# result = qa(\"example.pdf\", \"what is the total number of AI publications?\")" ] }, { "cell_type": "code", "execution_count": null, "id": "60e1b3d3-c0d2-4260-ae0c-26b03f1b8824", "metadata": {}, "outputs": [], "source": [ "convos = [] # store all panel objects in a list\n", "\n", "def qa_result(_):\n", " os.environ[\"OPENAI_API_KEY\"] = openaikey.value\n", " \n", " # save pdf file to a temp file \n", " if file_input.value is not None:\n", " file_input.save(\"/.cache/temp.pdf\")\n", " \n", " prompt_text = prompt.value\n", " if prompt_text:\n", " result = qa(file=\"/.cache/temp.pdf\", query=prompt_text, chain_type=select_chain_type.value, k=select_k.value)\n", " convos.extend([\n", " pn.Row(\n", " pn.panel(\"\\U0001F60A\", width=10),\n", " prompt_text,\n", " width=600\n", " ),\n", " pn.Row(\n", " pn.panel(\"\\U0001F916\", width=10),\n", " pn.Column(\n", " result[\"result\"],\n", " \"Relevant source text:\",\n", " pn.pane.Markdown('\\n--------------------------------------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n", " )\n", " )\n", " ])\n", " #return convos\n", " return pn.Column(*convos, margin=15, width=575, min_height=400)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "c3a70857-0b98-4f62-a9c0-b62ca42b474c", "metadata": { "tags": [] }, "outputs": [], "source": [ "qa_interactive = pn.panel(\n", " pn.bind(qa_result, run_button),\n", " loading_indicator=True,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "228e2b42-b1ed-43af-b923-031a70241ab0", "metadata": { "tags": [] }, "outputs": [], "source": [ "output = pn.WidgetBox('*Output will show up here:*', qa_interactive, width=630, scroll=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "1b0ec253-2bcd-4f91-96d8-d8456e900a58", "metadata": { "tags": [] }, "outputs": [], "source": [ "# layout\n", "pn.Column(\n", " pn.pane.Markdown(\"\"\"\n", " ## \\U0001F60A! Question Answering with your PDF file\n", " \n", " 1) 上传PDF. 2) 输入OpenAI API key. 会产生openai api费用. 3) 输入问题然后点击 \"Run\".\n", " \n", " \"\"\"),\n", " pn.Row(file_input,openaikey),\n", " output,\n", " widgets\n", "\n", ").servable()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" } }, "nbformat": 4, "nbformat_minor": 5 }