shanjay
/

mgc-ds

TensorBoard

Model card Files Files and versions Metrics Training metrics Community

shanjay commited on Dec 15, 2023

Commit

dd0a849

1 Parent(s): 4587806

Delete Test-mgc-Copy1.ipynb

Browse files

Files changed (1) hide show

Test-mgc-Copy1.ipynb +0 -1177

Test-mgc-Copy1.ipynb DELETED Viewed

@@ -1,1177 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "addd199c-097c-419d-a0f2-c3d73efb8d5d",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "===================================BUG REPORT===================================\n",
-      "Welcome to bitsandbytes. For bug reports, please run\n",
-      "\n",
-      "python -m bitsandbytes\n",
-      "\n",
-      " and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n",
-      "================================================================================\n",
-      "bin /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121.so\n",
-      "CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...\n",
-      "CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so\n",
-      "CUDA SETUP: Highest compute capability among GPUs detected: 8.6\n",
-      "CUDA SETUP: Detected CUDA version 121\n",
-      "CUDA SETUP: Loading binary /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121.so...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/usr/local/nvidia/lib64'), PosixPath('/usr/local/nvidia/lib')}\n",
-      "  warn(msg)\n",
-      "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: /usr/local/nvidia/lib:/usr/local/nvidia/lib64 did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...\n",
-      "  warn(msg)\n",
-      "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCcuY6EsmJRfLsI1l1rpDWVRhwkL7A9nzITTDbCFOX0wzshP65l/Sa54NrS1pX2uM6YiB7OvgGUm7uUKf9OBCcpd2ohFJiOkTznhDHk+D7IkFZf/VTRIHy/JZoAtzN/qBQKMOygFam1XzTMDnkehMkKvR23BgH72hzGUfYPIsq+OlStYVMhE1bncYSnC4SRucbdT5BeIsival514xsbAhCjjwPd8UHfw1cxaDq4edWjbhN8wkDU+V8i/jS/wWTZIt7pIZiAREEl/YC+Sc4FCSnb4c3p+adl5pqXrEsKygi+UmBtC1poLSXTgZOc/0kerx4jv/HB8NiH4kLsg4S2HjdFFQIB0WSV0i4KDVRE9cv18gQ7kbEv0t9Uwg4xdoMntCNS6aFDm51ufhshwQylzfSwX71Ka3mPdftfnVk81wKpIxN784FEcb7IE7HcNyomnP9N382Fg8j6pILwsKK6w4oOg8Cn2C66cySA6CNTFpK1kYBwsqdU3X8WBQUIZZNVCn4x/qRWYxrKHmdlUW8oCf9AT32eydDQWp1y0AlycA4wfbDQ8g4dtu9Rf+tBrYTztdCt5PbGy4SbwfynWysc/PuhcyaLNtuRYt3LeiCKhKJFNFST1BqjACrjkQ9kMrPSB/7j3JX9O2ncDHDQgCQIQon9BETVQZJ49EqMrusQ3/K39w== shanjay@LAPTOP-Q1PG3AE7')}\n",
-      "  warn(msg)\n",
-      "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('//g.notebooksg.jarvislabs.net'), PosixPath('https')}\n",
-      "  warn(msg)\n",
-      "/opt/conda/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('module'), PosixPath('//matplotlib_inline.backend_inline')}\n",
-      "  warn(msg)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import json\n",
-    "import os\n",
-    "from pprint import pprint\n",
-    "\n",
-    "import bitsandbytes as bnb\n",
-    "import pandas as pd\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "\n",
-    "import transformers\n",
-    "from datasets import load_dataset\n",
-    "from huggingface_hub import notebook_login\n",
-    "from peft import (\n",
-    "    LoraConfig,\n",
-    "    PeftConfig,\n",
-    "    PeftModel,\n",
-    "    get_peft_model,\n",
-    "    prepare_model_for_kbit_training,\n",
-    ")\n",
-    "from transformers import (\n",
-    "    AutoConfig,\n",
-    "    AutoModelForCausalLM,\n",
-    "    AutoTokenizer,\n",
-    "    BitsAndBytesConfig,\n",
-    ")\n",
-    "import warnings\n",
-    "warnings.filterwarnings(\"ignore\")\n",
-    "\n",
-    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "acfb1578-a66f-44f0-8df9-1c6bcf7530ea",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3edf6ee054e9464eb510d3aff9d1dc5f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "notebook_login()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "d2f13cac-1536-4da0-8ff7-0a0454fd0b4a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(\"ds1000-test-cleaned.json\") as json_file:\n",
-    "    data = json.load(json_file)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "6706e68b-d525-4392-ab2c-1dff356da52d",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'answer': 'import pandas as pd\\n'\n",
-      "           '\\n'\n",
-      "           '\\n'\n",
-      "           'index = range(14)\\n'\n",
-      "           'data = [1, 0, 0, 2, 0, 4, 6, 8, 0, 0, 0, 0, 2, 1]\\n'\n",
-      "           \"df = pd.DataFrame(data=data, index=index, columns = ['A'])\\n\"\n",
-      "           'def g(df):\\n'\n",
-      "           \"    l = df['A'].replace(to_replace=0, method='ffill')\\n\"\n",
-      "           \"    r = df['A'].replace(to_replace=0, method='bfill')\\n\"\n",
-      "           '    for i in range(len(df)):\\n'\n",
-      "           \"        df['A'].iloc[i] = max(l[i], r[i])\\n\"\n",
-      "           '    return df\\n'\n",
-      "           '\\n'\n",
-      "           'df = g(df.copy())\\n'\n",
-      "           'result = df\\n'\n",
-      "           'print(result)',\n",
-      " 'question': 'Problem:\\n'\n",
-      "             'I have the following dataframe:\\n'\n",
-      "             'index = range(14)\\n'\n",
-      "             'data = [1, 0, 0, 2, 0, 4, 6, 8, 0, 0, 0, 0, 2, 1]\\n'\n",
-      "             \"df = pd.DataFrame(data=data, index=index, columns = ['A'])\\n\"\n",
-      "             '\\n'\n",
-      "             '\\n'\n",
-      "             'How can I fill the zeros with the maximun between previous and '\n",
-      "             'posterior non-zero value using pandas? Is there a fillna that is '\n",
-      "             'not just for \"NaN\"?.  \\n'\n",
-      "             'The output should look like:\\n'\n",
-      "             '    A\\n'\n",
-      "             '0   1\\n'\n",
-      "             '1   2\\n'\n",
-      "             '2   2\\n'\n",
-      "             '3   2\\n'\n",
-      "             '4   4\\n'\n",
-      "             '5   4\\n'\n",
-      "             '6   6\\n'\n",
-      "             '7   8\\n'\n",
-      "             '8   8\\n'\n",
-      "             '9   8\\n'\n",
-      "             '10  8\\n'\n",
-      "             '11  8\\n'\n",
-      "             '12  2\\n'\n",
-      "             '13  1'}\n"
-     ]
-    }
-   ],
-   "source": [
-    "pprint(data[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "9cc4983a-9a3f-485f-983f-efe2f10ce516",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(\"ds1000-test-cleaned.json\", \"w\") as f:\n",
-    "    json.dump(data, f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "f45c3674-4eed-4ca5-8343-2184ff1e4da1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>question</th>\n",
-       "      <th>answer</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Problem:\\nI have the following dataframe:\\nind...</td>\n",
-       "      <td>import pandas as pd\\n\\n\\nindex = range(14)\\nda...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Problem:\\ni got an issue over ranking of date ...</td>\n",
-       "      <td>import pandas as pd\\n\\n\\ndf = pd.DataFrame({'I...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Problem:\\nI have a DataFrame like :\\n     0   ...</td>\n",
-       "      <td>import pandas as pd\\nimport numpy as np\\n\\ndf ...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Problem:\\nI have this Pandas dataframe (df):\\n...</td>\n",
-       "      <td>import pandas as pd\\n\\n\\ndf = pd.DataFrame({'A...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Problem:\\nI have\\n\\ndf = pd.DataFrame.from_dic...</td>\n",
-       "      <td>import pandas as pd\\n\\ndf = pd.DataFrame.from_...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                            question  \\\n",
-       "0  Problem:\\nI have the following dataframe:\\nind...   \n",
-       "1  Problem:\\ni got an issue over ranking of date ...   \n",
-       "2  Problem:\\nI have a DataFrame like :\\n     0   ...   \n",
-       "3  Problem:\\nI have this Pandas dataframe (df):\\n...   \n",
-       "4  Problem:\\nI have\\n\\ndf = pd.DataFrame.from_dic...   \n",
-       "\n",
-       "                                              answer  \n",
-       "0  import pandas as pd\\n\\n\\nindex = range(14)\\nda...  \n",
-       "1  import pandas as pd\\n\\n\\ndf = pd.DataFrame({'I...  \n",
-       "2  import pandas as pd\\nimport numpy as np\\n\\ndf ...  \n",
-       "3  import pandas as pd\\n\\n\\ndf = pd.DataFrame({'A...  \n",
-       "4  import pandas as pd\\n\\ndf = pd.DataFrame.from_...  "
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pd.DataFrame(data).head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "6fbdd3ad-062f-4744-bb8e-1c19950adfd5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bnb_config = BitsAndBytesConfig(\n",
-    "    load_in_4bit=True,\n",
-    "    bnb_4bit_use_double_quant=True,\n",
-    "    bnb_4bit_quant_type=\"nf4\",\n",
-    "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "2b5ae38c-b0d2-4b9a-acde-3370130ca6e7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2be27a54d3e14399a41c46cd9c423399",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of LlamaForCausalLM were not initialized from the model checkpoint at ise-uiuc/Magicoder-S-DS-6.7B and are newly initialized: ['model.layers.2.self_attn.rotary_emb.inv_freq', 'model.layers.6.self_attn.rotary_emb.inv_freq', 'model.layers.25.self_attn.rotary_emb.inv_freq', 'model.layers.15.self_attn.rotary_emb.inv_freq', 'model.layers.1.self_attn.rotary_emb.inv_freq', 'model.layers.7.self_attn.rotary_emb.inv_freq', 'model.layers.18.self_attn.rotary_emb.inv_freq', 'model.layers.17.self_attn.rotary_emb.inv_freq', 'model.layers.4.self_attn.rotary_emb.inv_freq', 'model.layers.30.self_attn.rotary_emb.inv_freq', 'model.layers.12.self_attn.rotary_emb.inv_freq', 'model.layers.10.self_attn.rotary_emb.inv_freq', 'model.layers.24.self_attn.rotary_emb.inv_freq', 'model.layers.23.self_attn.rotary_emb.inv_freq', 'model.layers.14.self_attn.rotary_emb.inv_freq', 'model.layers.21.self_attn.rotary_emb.inv_freq', 'model.layers.27.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.rotary_emb.inv_freq', 'model.layers.11.self_attn.rotary_emb.inv_freq', 'model.layers.29.self_attn.rotary_emb.inv_freq', 'model.layers.28.self_attn.rotary_emb.inv_freq', 'model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.31.self_attn.rotary_emb.inv_freq', 'model.layers.26.self_attn.rotary_emb.inv_freq', 'model.layers.13.self_attn.rotary_emb.inv_freq', 'model.layers.3.self_attn.rotary_emb.inv_freq', 'model.layers.22.self_attn.rotary_emb.inv_freq', 'model.layers.9.self_attn.rotary_emb.inv_freq', 'model.layers.5.self_attn.rotary_emb.inv_freq', 'model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.16.self_attn.rotary_emb.inv_freq', 'model.layers.0.self_attn.rotary_emb.inv_freq']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    }
-   ],
-   "source": [
-    "PEFT_MODEL = \"shanjay/mgc-ds\"\n",
-    "\n",
-    "config = PeftConfig.from_pretrained(PEFT_MODEL)\n",
-    "model = AutoModelForCausalLM.from_pretrained(\n",
-    "    config.base_model_name_or_path,\n",
-    "    return_dict=True,\n",
-    "    quantization_config=bnb_config,\n",
-    "    device_map=\"auto\",\n",
-    "    trust_remote_code=True,\n",
-    ")\n",
-    "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
-    "tokenizer.pad_token = tokenizer.eos_token\n",
-    "\n",
-    "model = PeftModel.from_pretrained(model, PEFT_MODEL)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "7c3e35e0-f77c-4d63-8e2b-e72027341e31",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "generation_config = model.generation_config\n",
-    "generation_config.max_new_tokens = 400\n",
-    "generation_config.temperature = 0.7\n",
-    "generation_config.top_p = 0.7\n",
-    "generation_config.num_return_sequences = 1\n",
-    "generation_config.pad_token_id = tokenizer.eos_token_id\n",
-    "generation_config.eos_token_id = tokenizer.eos_token_id"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "aee4385b-d855-4225-9532-4e9002322579",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "DEVICE = \"cuda:0\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "7b14a1c6-ac62-4a9c-9df9-0db50facfd7e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<instruction>: How can I create a dataframe?\n",
-      "<output>: import pandas as pd\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "CPU times: user 26.5 s, sys: 177 ms, total: 26.7 s\n",
-      "Wall time: 26.7 s\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "prompt = f\"\"\"\n",
-    "<instruction>: How can I create a dataframe?\n",
-    "<output>:\n",
-    "\"\"\".strip()\n",
-    "\n",
-    "encoding = tokenizer(prompt, return_tensors=\"pt\").to(DEVICE)\n",
-    "with torch.inference_mode():\n",
-    "    outputs = model.generate(\n",
-    "        input_ids=encoding.input_ids,\n",
-    "        attention_mask=encoding.attention_mask,\n",
-    "        generation_config=generation_config,\n",
-    "    )\n",
-    "print(tokenizer.decode(outputs[0], skip_special_tokens=True))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "93c95988-c563-4871-974d-004bf73fbce8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def generate_response(question: str) -> str:\n",
-    "    prompt = f\"\"\"\n",
-    "<instruction>: {question}\n",
-    "<output>:\n",
-    "\"\"\".strip()\n",
-    "    encoding = tokenizer(prompt, return_tensors=\"pt\").to(DEVICE)\n",
-    "    with torch.inference_mode():\n",
-    "        outputs = model.generate(\n",
-    "            input_ids=encoding.input_ids,\n",
-    "            attention_mask=encoding.attention_mask,\n",
-    "            generation_config=generation_config,\n",
-    "        )\n",
-    "    response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
-    "\n",
-    "    assistant_start = \"<output>:\"\n",
-    "    response_start = response.find(assistant_start)\n",
-    "    return response[response_start + len(assistant_start) :].strip()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "8a9a9b87-193b-4bed-8ef1-57944d931958",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "import pandas as pd\n"
-     ]
-    }
-   ],
-   "source": [
-    "prompt = \"How can I create a dataframe?\"\n",
-    "print(generate_response(prompt))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "4658f305-b7c6-432c-ac0c-f62bd79e9ad5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "import pandas as pd\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "df1 = pd.DataFrame({'A': ['A', 'B', 'C', 'D'],\n",
-      "                   'B': [1, 2, 3, 4]})\n",
-      "df2 = pd.DataFrame({'A': ['A', 'B', 'C', 'E'],\n",
-      "                   'B': [1, 2, 3, 5]})\n",
-      "# merge df1 and df2 on column 'A'\n",
-      "# SOLUTION START\n",
-      "\n",
-      "<output>: import pandas as pd\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "df1 = pd.DataFrame({'A': ['A', 'B', 'C', 'D'],\n",
-      "                   'B': [1, 2, 3, 4]})\n",
-      "df2 = pd.DataFrame({'A': ['A', 'B', 'C', 'E'],\n",
-      "                   'B': [1, 2, 3, 5]})\n",
-      "# merge df1 and df2 on column 'A'\n",
-      "result = pd.merge(df1, df2, on='A')\n",
-      "print(result)\n"
-     ]
-    }
-   ],
-   "source": [
-    "prompt = \"How to merge two dataframes?\"\n",
-    "print(generate_response(prompt))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "0e9ed231-4a62-4331-94df-f3bcd601f138",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "import pandas as pd\n",
-      "\n",
-      "\n",
-      "name = ['joy', 'shan']\n",
-      "roll_no = [1, 2]\n",
-      "df = pd.DataFrame({'name': name, 'roll_no': roll_no})\n",
-      "print(df)\n"
-     ]
-    }
-   ],
-   "source": [
-    "prompt = \"given two arrays name=['joy','shan'], roll_no=[1,2]. put these array in a dataframe ?\"\n",
-    "print(generate_response(prompt))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "381ba5c0-276d-411e-a8d5-9f010528433d",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "import matplotlib.pyplot as plt\n",
-      "\n",
-      "x = [1, 2, 3, 4, 5]\n",
-      "y = [1, 2, 3, 4, 5]\n",
-      "\n",
-      "# plot all types of plots in matplotlib\n",
-      "# SOLUTION START\n",
-      "\n",
-      "<output>: import matplotlib.pyplot as plt\n",
-      "\n",
-      "x = [1, 2, 3, 4, 5]\n",
-      "y = [1, 2, 3, 4, 5]\n",
-      "\n",
-      "# plot all types of plots in matplotlib\n",
-      "plt.plot(x, y, label=\"plot\")\n",
-      "plt.scatter(x, y, label=\"scatter\")\n",
-      "plt.bar(x, y, label=\"bar\")\n",
-      "plt.hist(x, y, label=\"hist\")\n",
-      "plt.boxplot(x, y, label=\"boxplot\")\n",
-      "plt.show()\n",
-      "<output>: import matplotlib.pyplot as plt\n",
-      "\n",
-      "x = [1, 2, 3, 4, 5]\n",
-      "y = [1, 2, 3, 4, 5]\n",
-      "\n",
-      "# plot all types of plots in matplotlib\n",
-      "plt.plot(x, y, label=\"plot\")\n",
-      "plt.scatter(x, y, label=\"scatter\")\n",
-      "plt.bar(x, y, label=\"bar\")\n",
-      "plt.hist(x, y, label=\"hist\")\n",
-      "plt.boxplot(x, y, label=\"boxplot\")\n",
-      "plt.show()\n",
-      "<output>: import matplotlib.pyplot as plt\n",
-      "\n",
-      "x = [1, 2, 3, 4, 5]\n"
-     ]
-    }
-   ],
-   "source": [
-    "prompt = \"can you plot all types of plots in matplotlib?\"\n",
-    "print(generate_response(prompt))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "6864c3c7-b721-48ca-8943-dcff9838f7d2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "import pandas as pd\n",
-      "\n",
-      "\n",
-      "df = pd.DataFrame({'ID': ['01', '01', '01', '02', '02'],\n",
-      "                   'TIME': ['2018-07-11 11:12:20', '2018-07-12 12:00:23', '2018-07-13 12:00:00', '2019-09-11 11:00:00', '2019-09-12 12:00:00']})\n",
-      "def g(df):\n",
-      "    df['TIME'] = pd.to_datetime(df['TIME'])\n",
-      "    df['RANK'] = df.groupby('ID')['TIME'].rank(ascending=True)\n",
-      "    return df\n",
-      "\n",
-      "df = g(df.copy())\n",
-      "print(df)\n",
-      "<output>: import pandas as pd\n",
-      "\n",
-      "\n",
-      "df = pd.DataFrame({'ID': ['01', '01', '01', '02', '02'],\n",
-      "                   'TIME': ['2018-07-11 11:12:20', '2018-07-12 12:00:23', '2018-07-13 12:00:00', '2019-09-11 11:00:00', '2019-09-12 12:00:00']})\n",
-      "def g(df):\n",
-      "    df['TIME'] = pd.to_datetime(df['TIME'])\n"
-     ]
-    }
-   ],
-   "source": [
-    "prompt = \"\"\"Problem:\n",
-    "i got an issue over ranking of date times. Lets say i have following table.\n",
-    "ID    TIME\n",
-    "01    2018-07-11 11:12:20\n",
-    "01    2018-07-12 12:00:23\n",
-    "01    2018-07-13 12:00:00\n",
-    "02    2019-09-11 11:00:00\n",
-    "02    2019-09-12 12:00:00\n",
-    "\n",
-    "\n",
-    "and i want to add another column to rank the table by time for each id and group. I used \n",
-    "df['RANK'] = data.groupby('ID')['TIME'].rank(ascending=True)\n",
-    "\n",
-    "\n",
-    "but get an error:\n",
-    "'NoneType' object is not callable\n",
-    "\n",
-    "\n",
-    "If i replace datetime to numbers, it works.... any solutions?\n",
-    "\"\"\"\n",
-    "print(generate_response(prompt))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "7fa02929-5c65-4aa6-81ce-9c51879e7535",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "import pandas as pd\n",
-      "\n",
-      "\n",
-      "index = range(14)\n",
-      "data = [1, 0, 0, 2, 0, 4, 6, 8, 0, 0, 0, 0, 2, 1]\n",
-      "df = pd.DataFrame(data=data, index=index, columns = ['A'])\n",
-      "def g(df):\n",
-      "    df['A'] = df['A'].replace(0, np.nan)\n",
-      "    df['A'] = df['A'].fillna(method='ffill')\n",
-      "    df['A'] = df['A'].fillna(method='bfill')\n",
-      "    return df\n",
-      "\n",
-      "df = g(df.copy())\n",
-      "result = df\n",
-      "print(result)\n",
-      "<output>: import pandas as pd\n",
-      "import numpy as np\n",
-      "\n",
-      "\n",
-      "index = range(14)\n",
-      "data = [1, 0, 0, 2, 0, 4, 6, 8, 0, 0, 0, 0, 2, 1]\n",
-      "df = pd.DataFrame(data=data, index=index, columns = ['A'])\n",
-      "def g(df):\n",
-      "    df['A'] = df['A'].replace(0, np.nan)\n",
-      "    df['A'] = df['A'].fillna(method='ffill')\n",
-      "    df['A'] = df['A'].fillna(method='bfill')\n",
-      "    return df\n",
-      "\n",
-      "df = g(df.copy())\n",
-      "result = df\n",
-      "print(result)\n",
-      "<output>: import pandas as pd\n",
-      "import numpy as np\n",
-      "\n",
-      "\n",
-      "index = range(14)\n",
-      "data = [1, 0, 0, 2, 0, 4\n"
-     ]
-    }
-   ],
-   "source": [
-    "prompt = \"\"\"Problem:\n",
-    "I have the following dataframe:\n",
-    "index = range(14)\n",
-    "data = [1, 0, 0, 2, 0, 4, 6, 8, 0, 0, 0, 0, 2, 1]\n",
-    "df = pd.DataFrame(data=data, index=index, columns = ['A'])\n",
-    "\n",
-    "\n",
-    "How can I fill the zeros with the maximun between previous and posterior non-zero value using pandas? Is there a fillna that is not just for \"NaN\"?.  \n",
-    "The output should look like:\n",
-    "    A\n",
-    "0   1\n",
-    "1   2\n",
-    "2   2\n",
-    "3   2\n",
-    "4   4\n",
-    "5   4\n",
-    "6   6\n",
-    "7   8\n",
-    "8   8\n",
-    "9   8\n",
-    "10  8\n",
-    "11  8\n",
-    "12  2\n",
-    "13  1\n",
-    "\"\"\"\n",
-    "\n",
-    "print(generate_response(prompt))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "255cc021-5f5e-46af-a75e-a435b9629cdf",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Problem:\n",
-      "My sample df has four columns with NaN values. The goal is to concatenate all the keywords rows while excluding the NaN values.\n",
-      "import pandas as pd\n",
-      "import numpy as np\n",
-      "df = pd.DataFrame({'users': ['Hu Tao', 'Zhongli', 'Xingqiu'],\n",
-      "                   'keywords_0': [\"a\", np.nan, \"c\"],\n",
-      "                   'keywords_1': [\"d\", \"e\", np.nan],\n",
-      "                   'keywords_2': [np.nan, np.nan, \"b\"],\n",
-      "                   'keywords_3': [\"f\", np.nan, \"g\"]})\n",
-      "\n",
-      "\n",
-      "     users keywords_0 keywords_1 keywords_2 keywords_3\n",
-      "0   Hu Tao          a          d        NaN          f\n",
-      "1  Zhongli        NaN          e        NaN        NaN\n",
-      "2  Xingqiu          c        NaN          b          g\n",
-      "\n",
-      "\n",
-      "Want to accomplish the following:\n",
-      "     users keywords_0 keywords_1 keywords_2 keywords_3 keywords_all\n",
-      "0   Hu Tao          a          d        NaN          f        a-d-f\n",
-      "1  Zhongli        NaN          e        NaN        NaN            e\n",
-      "2  Xingqiu          c        NaN          b          g        c-b-g\n",
-      "\n",
-      "\n",
-      "Pseudo code:\n",
-      "cols = [df.keywords_0, df.keywords_1, df.keywords_2, df.keywords_3]\n",
-      "df[\"keywords_all\"] = df[\"keywords_all\"].apply(lambda cols: \"-\".join(cols), axis=1)\n",
-      "\n",
-      "\n",
-      "I know I can use \"-\".join() to get the exact result, but I am unsure how to pass the column names into the function.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(data[5]['question'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "1c5841e9-4331-4185-a7ad-7dd00d4e13b1",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "import pandas as pd\n",
-      "import numpy as np\n",
-      "\n",
-      "\n",
-      "df = pd.DataFrame({'users': ['Hu Tao', 'Zhongli', 'Xingqiu'],\n",
-      "                   'keywords_0': [\"a\", np.nan, \"c\"],\n",
-      "                   'keywords_1': [\"d\", \"e\", np.nan],\n",
-      "                   'keywords_2': [np.nan, np.nan, \"b\"],\n",
-      "                   'keywords_3': [\"f\", np.nan, \"g\"]})\n",
-      "import numpy as np\n",
-      "def g(df):\n",
-      "    df[\"keywords_all\"] = df.filter(like='keyword').apply(lambda x: '-'.join(x.dropna()), axis=1)\n",
-      "    return df\n",
-      "\n",
-      "df = g(df.copy())\n",
-      "result = df\n",
-      "print(result)\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(data[5]['answer'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "090e98c3-78db-4e33-af4b-01c6e1fc23d0",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "import pandas as pd\n",
-      "import numpy as np\n",
-      "\n",
-      "\n",
-      "df = pd.DataFrame({'users': ['Hu Tao', 'Zhongli', 'Xingqiu'],\n",
-      "                   'keywords_0': [\"a\", np.nan, \"c\"],\n",
-      "                   'keywords_1': [\"d\", \"e\", np.nan],\n",
-      "                   'keywords_2': [np.nan, np.nan, \"b\"],\n",
-      "                   'keywords_3': [\"f\", np.nan, \"g\"]})\n",
-      "\n",
-      "\n",
-      "cols = [df.keywords_0, df.keywords_1, df.keywords_2, df.keywords_3]\n",
-      "def f(cols):\n",
-      "    return \"-\".join(cols)\n",
-      "\n",
-      "\n",
-      "df[\"keywords_all\"] = df.apply(lambda row: f(row[cols]), axis=1)\n",
-      "\n",
-      "\n",
-      "print(df)\n",
-      "<output>: import pandas as pd\n",
-      "import numpy as np\n",
-      "\n",
-      "\n",
-      "df = pd.DataFrame({'users': ['Hu Tao', 'Zhongli', 'Xingqiu'],\n",
-      "                   'keywords_0': [\"a\", np.nan, \"c\"],\n",
-      "                   'keywords_1': [\"d\", \"e\", np.nan],\n",
-      "                   'keywords_2': [np.nan, np.nan, \"b\"],\n",
-      "                   'keywords_3': [\"f\", np.nan, \"g\"]})\n",
-      "\n",
-      "\n",
-      "cols = [df.keywords_0, df.keywords_1, df.keywords_2, df.keywords_3]\n",
-      "def f(cols):\n",
-      "    return \"-\".join(cols)\n",
-      "\n",
-      "\n",
-      "df[\"keywords_all\"] = df.apply(lambda\n"
-     ]
-    }
-   ],
-   "source": [
-    "prompt = data[5]['question']\n",
-    "print(generate_response(prompt))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "id": "29609669-1ac7-4f6a-b0e3-64a3bf7a6545",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "import pandas as pd\n",
-      "\n",
-      "\n",
-      "df = pd.DataFrame({'A': [1, 2, None, 4, 5],\n",
-      "                   'B': [None, 2, 3, 4, 5],\n",
-      "                   'C': [1, 2, 3, 4, 5]})\n",
-      "df = df.dropna()\n",
-      "print(df)\n",
-      "<output>: import pandas as pd\n",
-      "\n",
-      "\n",
-      "df = pd.DataFrame({'A': [1, 2, None, 4, 5],\n",
-      "                   'B': [None, 2, 3, 4, 5],\n",
-      "                   'C': [1, 2, 3, 4, 5]})\n",
-      "df = df.dropna()\n",
-      "print(df)\n",
-      "<output>: import pandas as pd\n",
-      "\n",
-      "\n",
-      "df = pd.DataFrame({'A': [1, 2, None, 4, 5],\n",
-      "                   'B': [None, 2, 3, 4, 5],\n",
-      "                   'C': [1, 2, 3, 4, 5]})\n",
-      "df = df.dropna()\n",
-      "print(df)\n",
-      "<output>: import pandas as pd\n",
-      "\n",
-      "\n",
-      "df = pd.DataFrame({'A': [1, 2, None, 4, 5],\n",
-      "                   'B': [None, 2, 3, 4, 5],\n",
-      "                   'C': [1, 2, 3, 4, 5]})\n",
-      "df = df.dropna()\n",
-      "print(df)\n",
-      "<output>: import pandas as pd\n",
-      "\n",
-      "\n",
-      "df = pd.DataFrame({'A': [1, 2, None,\n"
-     ]
-    }
-   ],
-   "source": [
-    "prompt = \"How to remove null valued rows?\"\n",
-    "print(generate_response(prompt))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "id": "5ca085f6-30fc-4e50-a436-673f3baa75af",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "import numpy as np\n",
-      "import pandas as pd\n",
-      "import matplotlib.pyplot as plt\n",
-      "import seaborn as sns\n",
-      "import sklearn\n",
-      "from sklearn.linear_model import LogisticRegression\n",
-      "from sklearn.model_selection import train_test_split\n",
-      "\n",
-      "\n",
-      "X, y = load_data()\n",
-      "\n",
-      "# Split the data into training and test sets\n",
-      "# Split the data into training and test sets\n",
-      "# Split the data into training and test sets\n",
-      "# Train a Logistic Regression model on the training data\n",
-      "# Print the accuracy of the model on the test data\n",
-      "# SOLUTION START\n",
-      "\n",
-      "<output>: import numpy as np\n",
-      "import pandas as pd\n",
-      "import matplotlib.pyplot as plt\n",
-      "import seaborn as sns\n",
-      "import sklearn\n",
-      "from sklearn.linear_model import LogisticRegression\n",
-      "from sklearn.model_selection import train_test_split\n",
-      "\n",
-      "\n",
-      "X, y = load_data()\n",
-      "\n",
-      "# Split the data into training and test sets\n",
-      "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
-      "# Train a Logistic Regression model on the training data\n",
-      "model = LogisticRegression()\n",
-      "model.fit(X_train, y_train)\n",
-      "# Print the accuracy of the model on the test data\n",
-      "print(model.score(X_test, y_test))\n",
-      "<output>: import numpy as np\n",
-      "import pandas as pd\n",
-      "import matplotlib.pyplot as plt\n",
-      "import seaborn as sns\n",
-      "import sklearn\n",
-      "from sklearn.linear_model import LogisticRegression\n",
-      "from sklearn.model_selection import train_test_split\n"
-     ]
-    }
-   ],
-   "source": [
-    "prompt = \"How to train a Logistic Regression model?\"\n",
-    "print(generate_response(prompt))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "146527ff-5d37-42c7-b06b-45c1aa224d17",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "84f671f3-7bd6-4a7c-81e9-758052b424cf",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}