File size: 5,943 Bytes

71a6dfa

{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "id": "JvMRbVLEJlZT"
      },
      "outputs": [],
      "source": [
        "#@title 🤗 AutoTrain LLM\n",
        "#@markdown In order to use this colab\n",
        "#@markdown - upload train.csv to a folder named `data/`\n",
        "#@markdown - train.csv must contain a `text` column\n",
        "#@markdown - choose a project name if you wish\n",
        "#@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub\n",
        "#@markdown - add huggingface information (token and repo_id) if you wish to push trained model to huggingface hub\n",
        "#@markdown - update hyperparameters if you wish\n",
        "#@markdown - click `Runtime > Run all` or run each cell individually\n",
        "\n",
        "import os\n",
        "!pip install -U autotrain-advanced > install_logs.txt\n",
        "!autotrain setup --colab > setup_logs.txt"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "A2-_lkBS1WKA"
      },
      "outputs": [],
      "source": [
        "#@markdown ---\n",
        "#@markdown #### Project Config\n",
        "#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.\n",
        "project_name = 'my_autotrain_llm' # @param {type:\"string\"}\n",
        "model_name = 'abhishek/llama-2-7b-hf-small-shards' # @param {type:\"string\"}\n",
        "\n",
        "#@markdown ---\n",
        "#@markdown #### Push to Hub?\n",
        "#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account\n",
        "#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.\n",
        "#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.\n",
        "#@markdown You can find your token here: https://huggingface.co/settings/tokens\n",
        "push_to_hub = False # @param [\"False\", \"True\"] {type:\"raw\"}\n",
        "hf_token = \"hf_XXX\" #@param {type:\"string\"}\n",
        "repo_id = \"username/repo_name\" #@param {type:\"string\"}\n",
        "\n",
        "#@markdown ---\n",
        "#@markdown #### Hyperparameters\n",
        "learning_rate = 2e-4 # @param {type:\"number\"}\n",
        "num_epochs = 1 #@param {type:\"number\"}\n",
        "batch_size = 7 # @param {type:\"slider\", min:1, max:32, step:1}\n",
        "block_size = 1024 # @param {type:\"number\"}\n",
        "trainer = \"sft\" # @param [\"default\", \"sft\"] {type:\"raw\"}\n",
        "warmup_ratio = 0.1 # @param {type:\"number\"}\n",
        "weight_decay = 0.01 # @param {type:\"number\"}\n",
        "gradient_accumulation = 4 # @param {type:\"number\"}\n",
        "use_fp16 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
        "use_peft = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
        "use_int4 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
        "lora_r = 16 #@param {type:\"number\"}\n",
        "lora_alpha = 32 #@param {type:\"number\"}\n",
        "lora_dropout = 0.05 #@param {type:\"number\"}\n",
        "\n",
        "os.environ[\"PROJECT_NAME\"] = project_name\n",
        "os.environ[\"MODEL_NAME\"] = model_name\n",
        "os.environ[\"PUSH_TO_HUB\"] = str(push_to_hub)\n",
        "os.environ[\"HF_TOKEN\"] = hf_token\n",
        "os.environ[\"REPO_ID\"] = repo_id\n",
        "os.environ[\"LEARNING_RATE\"] = str(learning_rate)\n",
        "os.environ[\"NUM_EPOCHS\"] = str(num_epochs)\n",
        "os.environ[\"BATCH_SIZE\"] = str(batch_size)\n",
        "os.environ[\"BLOCK_SIZE\"] = str(block_size)\n",
        "os.environ[\"WARMUP_RATIO\"] = str(warmup_ratio)\n",
        "os.environ[\"WEIGHT_DECAY\"] = str(weight_decay)\n",
        "os.environ[\"GRADIENT_ACCUMULATION\"] = str(gradient_accumulation)\n",
        "os.environ[\"USE_FP16\"] = str(use_fp16)\n",
        "os.environ[\"USE_PEFT\"] = str(use_peft)\n",
        "os.environ[\"USE_INT4\"] = str(use_int4)\n",
        "os.environ[\"LORA_R\"] = str(lora_r)\n",
        "os.environ[\"LORA_ALPHA\"] = str(lora_alpha)\n",
        "os.environ[\"LORA_DROPOUT\"] = str(lora_dropout)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "id": "g3cd_ED_yXXt"
      },
      "outputs": [],
      "source": [
        "!autotrain llm \\\n",
        "--train \\\n",
        "--model ${MODEL_NAME} \\\n",
        "--project-name ${PROJECT_NAME} \\\n",
        "--data-path data/ \\\n",
        "--text-column text \\\n",
        "--lr ${LEARNING_RATE} \\\n",
        "--batch-size ${BATCH_SIZE} \\\n",
        "--epochs ${NUM_EPOCHS} \\\n",
        "--block-size ${BLOCK_SIZE} \\\n",
        "--warmup-ratio ${WARMUP_RATIO} \\\n",
        "--lora-r ${LORA_R} \\\n",
        "--lora-alpha ${LORA_ALPHA} \\\n",
        "--lora-dropout ${LORA_DROPOUT} \\\n",
        "--weight-decay ${WEIGHT_DECAY} \\\n",
        "--gradient-accumulation ${GRADIENT_ACCUMULATION} \\\n",
        "$( [[ \"$USE_FP16\" == \"True\" ]] && echo \"--fp16\" ) \\\n",
        "$( [[ \"$USE_PEFT\" == \"True\" ]] && echo \"--use-peft\" ) \\\n",
        "$( [[ \"$USE_INT4\" == \"True\" ]] && echo \"--use-int4\" ) \\\n",
        "$( [[ \"$PUSH_TO_HUB\" == \"True\" ]] && echo \"--push-to-hub --token ${HF_TOKEN} --repo-id ${REPO_ID}\" )"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.10.11"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}