{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "id": "JvMRbVLEJlZT" }, "outputs": [], "source": [ "#@title 🤗 AutoTrain LLM\n", "#@markdown In order to use this colab\n", "#@markdown - upload train.csv to a folder named `data/`\n", "#@markdown - train.csv must contain a `text` column\n", "#@markdown - choose a project name if you wish\n", "#@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub\n", "#@markdown - add huggingface information (token and repo_id) if you wish to push trained model to huggingface hub\n", "#@markdown - update hyperparameters if you wish\n", "#@markdown - click `Runtime > Run all` or run each cell individually\n", "\n", "import os\n", "!pip install -U autotrain-advanced > install_logs.txt\n", "!autotrain setup --colab > setup_logs.txt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "A2-_lkBS1WKA" }, "outputs": [], "source": [ "#@markdown ---\n", "#@markdown #### Project Config\n", "#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.\n", "project_name = 'my_autotrain_llm' # @param {type:\"string\"}\n", "model_name = 'abhishek/llama-2-7b-hf-small-shards' # @param {type:\"string\"}\n", "\n", "#@markdown ---\n", "#@markdown #### Push to Hub?\n", "#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account\n", "#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.\n", "#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.\n", "#@markdown You can find your token here: https://huggingface.co/settings/tokens\n", "push_to_hub = False # @param [\"False\", \"True\"] {type:\"raw\"}\n", "hf_token = \"hf_XXX\" #@param {type:\"string\"}\n", "repo_id = \"username/repo_name\" #@param {type:\"string\"}\n", "\n", "#@markdown ---\n", "#@markdown #### Hyperparameters\n", "learning_rate = 2e-4 # @param {type:\"number\"}\n", "num_epochs = 1 #@param {type:\"number\"}\n", "batch_size = 7 # @param {type:\"slider\", min:1, max:32, step:1}\n", "block_size = 1024 # @param {type:\"number\"}\n", "trainer = \"sft\" # @param [\"default\", \"sft\"] {type:\"raw\"}\n", "warmup_ratio = 0.1 # @param {type:\"number\"}\n", "weight_decay = 0.01 # @param {type:\"number\"}\n", "gradient_accumulation = 4 # @param {type:\"number\"}\n", "use_fp16 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n", "use_peft = True # @param [\"False\", \"True\"] {type:\"raw\"}\n", "use_int4 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n", "lora_r = 16 #@param {type:\"number\"}\n", "lora_alpha = 32 #@param {type:\"number\"}\n", "lora_dropout = 0.05 #@param {type:\"number\"}\n", "\n", "os.environ[\"PROJECT_NAME\"] = project_name\n", "os.environ[\"MODEL_NAME\"] = model_name\n", "os.environ[\"PUSH_TO_HUB\"] = str(push_to_hub)\n", "os.environ[\"HF_TOKEN\"] = hf_token\n", "os.environ[\"REPO_ID\"] = repo_id\n", "os.environ[\"LEARNING_RATE\"] = str(learning_rate)\n", "os.environ[\"NUM_EPOCHS\"] = str(num_epochs)\n", "os.environ[\"BATCH_SIZE\"] = str(batch_size)\n", "os.environ[\"BLOCK_SIZE\"] = str(block_size)\n", "os.environ[\"WARMUP_RATIO\"] = str(warmup_ratio)\n", "os.environ[\"WEIGHT_DECAY\"] = str(weight_decay)\n", "os.environ[\"GRADIENT_ACCUMULATION\"] = str(gradient_accumulation)\n", "os.environ[\"USE_FP16\"] = str(use_fp16)\n", "os.environ[\"USE_PEFT\"] = str(use_peft)\n", "os.environ[\"USE_INT4\"] = str(use_int4)\n", "os.environ[\"LORA_R\"] = str(lora_r)\n", "os.environ[\"LORA_ALPHA\"] = str(lora_alpha)\n", "os.environ[\"LORA_DROPOUT\"] = str(lora_dropout)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "id": "g3cd_ED_yXXt" }, "outputs": [], "source": [ "!autotrain llm \\\n", "--train \\\n", "--model ${MODEL_NAME} \\\n", "--project-name ${PROJECT_NAME} \\\n", "--data-path data/ \\\n", "--text-column text \\\n", "--lr ${LEARNING_RATE} \\\n", "--batch-size ${BATCH_SIZE} \\\n", "--epochs ${NUM_EPOCHS} \\\n", "--block-size ${BLOCK_SIZE} \\\n", "--warmup-ratio ${WARMUP_RATIO} \\\n", "--lora-r ${LORA_R} \\\n", "--lora-alpha ${LORA_ALPHA} \\\n", "--lora-dropout ${LORA_DROPOUT} \\\n", "--weight-decay ${WEIGHT_DECAY} \\\n", "--gradient-accumulation ${GRADIENT_ACCUMULATION} \\\n", "$( [[ \"$USE_FP16\" == \"True\" ]] && echo \"--fp16\" ) \\\n", "$( [[ \"$USE_PEFT\" == \"True\" ]] && echo \"--use-peft\" ) \\\n", "$( [[ \"$USE_INT4\" == \"True\" ]] && echo \"--use-int4\" ) \\\n", "$( [[ \"$PUSH_TO_HUB\" == \"True\" ]] && echo \"--push-to-hub --token ${HF_TOKEN} --repo-id ${REPO_ID}\" )" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 0 }