File size: 5,499 Bytes

391257f

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "MljifiTVCT0_"
      },
      "source": [
        "# 推論用コード\n",
        "Hugging Faceにアップロードしたモデルを用いてELYZA-tasks-100-TVの出力を得るためのコードです。  \n",
        "このコードはunslothライブラリを用いてモデルを読み込み、推論するためのコードとなります。\n",
        "このコードはサンプルの推論用コードとほぼ同一であり、実行すればそのまま提出用のjsonlファイルが作成されます。\n",
        "\n",
        "なお、HuggingFaceのトークンの部分は実行者に合わせ適宜変更してください。"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "I5B5MOHuBy8b"
      },
      "outputs": [],
      "source": [
        "%%capture\n",
        "!pip install unsloth\n",
        "!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "GM7SNRtACg9V",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "607ad0ee-6e25-4e07-8c45-33ef0161e656"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
            "🦥 Unsloth Zoo will now patch everything to make training faster!\n"
          ]
        }
      ],
      "source": [
        "from unsloth import FastLanguageModel\n",
        "import torch\n",
        "import json"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "JmdUATTVCtyr"
      },
      "outputs": [],
      "source": [
        "user_name = \"OotoroMS\"\n",
        "model_name = \"llm-jp-3-13b-it_lora\"\n",
        "\n",
        "model_address = f\"{user_name}/{model_name}\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "TB6Hzx-2B5g8"
      },
      "outputs": [],
      "source": [
        "max_seq_length = 2048\n",
        "dtype = None\n",
        "load_in_4bit = True\n",
        "\n",
        "model, tokenizer = FastLanguageModel.from_pretrained(\n",
        "    model_name = model_address,\n",
        "    max_seq_length = max_seq_length,\n",
        "    dtype = dtype,\n",
        "    load_in_4bit = load_in_4bit,\n",
        "    token = \"HF_TOKEN\",\n",
        ")\n",
        "FastLanguageModel.for_inference(model)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "fg_yURyiB8o6"
      },
      "outputs": [],
      "source": [
        "# データセットの読み込み。\n",
        "# omnicampusの開発環境では、左にタスクのjsonlをドラッグアンドドロップしてから実行。\n",
        "datasets = []\n",
        "with open(\"./elyza-tasks-100-TV_0.jsonl\", \"r\") as f:\n",
        "    item = \"\"\n",
        "    for line in f:\n",
        "      line = line.strip()\n",
        "      item += line\n",
        "      if item.endswith(\"}\"):\n",
        "        datasets.append(json.loads(item))\n",
        "        item = \"\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "TwfZEra1CEJo",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "bca86293-c1f2-45bf-f19c-9910d4e0118f"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "100%|██████████| 100/100 [10:31<00:00,  6.31s/it]\n"
          ]
        }
      ],
      "source": [
        "from tqdm import tqdm\n",
        "\n",
        "# 推論\n",
        "results = []\n",
        "for dt in tqdm(datasets):\n",
        "  input = dt[\"input\"]\n",
        "\n",
        "  prompt = f\"\"\"### 指示\\n{input}\\n### 回答\\n\"\"\"\n",
        "\n",
        "  inputs = tokenizer([prompt], return_tensors = \"pt\").to(model.device)\n",
        "\n",
        "  outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2)\n",
        "  prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\\n### 回答')[-1]\n",
        "\n",
        "  results.append({\"task_id\": dt[\"task_id\"], \"input\": input, \"output\": prediction})"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "voAPnXp5CKRL"
      },
      "outputs": [],
      "source": [
        "with open(f\"./{model_name}_output.jsonl\", 'w', encoding='utf-8') as f:\n",
        "    for result in results:\n",
        "        json.dump(result, f, ensure_ascii=False)\n",
        "        f.write('\\n')"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "L4",
      "machine_shape": "hm"
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "nbformat": 4,
  "nbformat_minor": 0
}