OotoroMS
/

llm-jp-3-13b-it_lora

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MljifiTVCT0_"
+      },
+      "source": [
+        "# 推論用コード\n",
+        "Hugging Faceにアップロードしたモデルを用いてELYZA-tasks-100-TVの出力を得るためのコードです。  \n",
+        "このコードはunslothライブラリを用いてモデルを読み込み、推論するためのコードとなります。\n",
+        "このコードはサンプルの推論用コードとほぼ同一であり、実行すればそのまま提出用のjsonlファイルが作成されます。\n",
+        "\n",
+        "なお、HuggingFaceのトークンの部分は実行者に合わせ適宜変更してください。"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "I5B5MOHuBy8b"
+      },
+      "outputs": [],
+      "source": [
+        "%%capture\n",
+        "!pip install unsloth\n",
+        "!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GM7SNRtACg9V",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "607ad0ee-6e25-4e07-8c45-33ef0161e656"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+            "🦥 Unsloth Zoo will now patch everything to make training faster!\n"
+          ]
+        }
+      ],
+      "source": [
+        "from unsloth import FastLanguageModel\n",
+        "import torch\n",
+        "import json"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "JmdUATTVCtyr"
+      },
+      "outputs": [],
+      "source": [
+        "user_name = \"OotoroMS\"\n",
+        "model_name = \"llm-jp-3-13b-it_lora\"\n",
+        "\n",
+        "model_address = f\"{user_name}/{model_name}\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TB6Hzx-2B5g8"
+      },
+      "outputs": [],
+      "source": [
+        "max_seq_length = 2048\n",
+        "dtype = None\n",
+        "load_in_4bit = True\n",
+        "\n",
+        "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+        "    model_name = model_address,\n",
+        "    max_seq_length = max_seq_length,\n",
+        "    dtype = dtype,\n",
+        "    load_in_4bit = load_in_4bit,\n",
+        "    token = \"HF_TOKEN\",\n",
+        ")\n",
+        "FastLanguageModel.for_inference(model)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fg_yURyiB8o6"
+      },
+      "outputs": [],
+      "source": [
+        "# データセットの読み込み。\n",
+        "# omnicampusの開発環境では、左にタスクのjsonlをドラッグアンドドロップしてから実行。\n",
+        "datasets = []\n",
+        "with open(\"./elyza-tasks-100-TV_0.jsonl\", \"r\") as f:\n",
+        "    item = \"\"\n",
+        "    for line in f:\n",
+        "      line = line.strip()\n",
+        "      item += line\n",
+        "      if item.endswith(\"}\"):\n",
+        "        datasets.append(json.loads(item))\n",
+        "        item = \"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "TwfZEra1CEJo",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "bca86293-c1f2-45bf-f19c-9910d4e0118f"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "100%|██████████| 100/100 [10:31<00:00,  6.31s/it]\n"
+          ]
+        }
+      ],
+      "source": [
+        "from tqdm import tqdm\n",
+        "\n",
+        "# 推論\n",
+        "results = []\n",
+        "for dt in tqdm(datasets):\n",
+        "  input = dt[\"input\"]\n",
+        "\n",
+        "  prompt = f\"\"\"### 指示\\n{input}\\n### 回答\\n\"\"\"\n",
+        "\n",
+        "  inputs = tokenizer([prompt], return_tensors = \"pt\").to(model.device)\n",
+        "\n",
+        "  outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2)\n",
+        "  prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\\n### 回答')[-1]\n",
+        "\n",
+        "  results.append({\"task_id\": dt[\"task_id\"], \"input\": input, \"output\": prediction})"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "voAPnXp5CKRL"
+      },
+      "outputs": [],
+      "source": [
+        "with open(f\"./{model_name}_output.jsonl\", 'w', encoding='utf-8') as f:\n",
+        "    for result in results:\n",
+        "        json.dump(result, f, ensure_ascii=False)\n",
+        "        f.write('\\n')"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "L4",
+      "machine_shape": "hm"
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}