{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.vectorstores import Chroma\n", "from langchain.chains import VectorDBQA\n", "from langchain.document_loaders import PagedPDFSplitter\n", "from langchain.llms import OpenAI\n", "from langchain import OpenAI, VectorDBQA" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "file_path = \"1706.03762.pdf\"\n", "\n", "# Load the document\n", "\n", "loader = PagedPDFSplitter(file_path)\n", "docs = loader.load()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chroma = Chroma(embedding_function=OpenAIEmbeddings())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "chroma.add_documents(docs)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Text Generation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from langchain.chains import LLMChain\n", "from langchain import PromptTemplate\n", "\n", "prompt_template = \"\"\"\n", "You will be presented with a section of an Arxiv paper. Your job is to write the python + PyTorch code that exactly implements the paper with NO ERRORS.\n", "Additionally, you will be shown previously generated code. You must use this code as a reference and keep variable/function names the same.\n", "Use the context below to write a 400 word blog post about the topic below:\n", " \n", " Arxiv paper section: {paper}\n", " Previous Code: {prev_code}\n", " Next Code: \n", "\"\"\"\n", "\n", "PROMPT = PromptTemplate(\n", " template=prompt_template, input_variables=[\"paper\", \"prev_code\"]\n", ")\n", "\n", "llm = OpenAI(temperature=0)\n", "\n", "chain = LLMChain(llm=llm, prompt=PROMPT)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def generate_code(title, paper, prev_code, **kwargs):\n", " return chain.apply({\"title\": title, \"paper\": paper, \"prev_code\": prev_code})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with open(\"main.tex\", \"r\") as f:\n", " main_tex = f.read()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "len(main_tex)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "out = generate_code(title=\"Long Range Language Modeling via Gated State Spaces\", prev_code=\"import torch\", paper=main_tex)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# QA" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", vectorstore=chroma)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "qa.run(\"What is the purpose of this paper?\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "qa.run(\"What is the main contribution of this paper?\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "qa.run(\"Given the text of the arxiv paper you know about, can you propose a structure for a jupyter notebook that would summarize the papers key contributions and findings? The notebook should be structure in a logical and coherent way, with sections and sub-sections that reflect the papers organization. Only include portions of the paper that are relevant to code -- for example, do not include suggestions for further research or future work. The output should be in this format:\\n- each section should be numbered and have a title (e.g. Training and Inference)\\n- each subsection should start with a dash (e.g., - Overview of the training process)\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "qa.run(\"For the following arxiv paper sections, can you generate text descriptions and code for a jupyter notebook:\\n3. Training\\n- Overview of training data and batching\\n- hardware and schedule\\n- optimizer\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "qa.run(\"What python code that implements this paper.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "langchain", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "38273b2daeac471f0eac904bde99a8af597df3ec437acfdd6914b298b9a2825e" } } }, "nbformat": 4, "nbformat_minor": 2 }