Andyrasika
/

mistral_autotrain_llm

Text Generation

English

Trained with AutoTrain

conversational

Model card Files Files and versions Community

Andyrasika commited on Nov 15, 2023

Commit

71a6dfa

1 Parent(s): 345bf63

Upload AutoTrain_LLM.ipynb

Browse files

Files changed (1) hide show

AutoTrain_LLM.ipynb +137 -0

AutoTrain_LLM.ipynb ADDED Viewed

	@@ -0,0 +1,137 @@

+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": true,
+        "id": "JvMRbVLEJlZT"
+      },
+      "outputs": [],
+      "source": [
+        "#@title 🤗 AutoTrain LLM\n",
+        "#@markdown In order to use this colab\n",
+        "#@markdown - upload train.csv to a folder named `data/`\n",
+        "#@markdown - train.csv must contain a `text` column\n",
+        "#@markdown - choose a project name if you wish\n",
+        "#@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub\n",
+        "#@markdown - add huggingface information (token and repo_id) if you wish to push trained model to huggingface hub\n",
+        "#@markdown - update hyperparameters if you wish\n",
+        "#@markdown - click `Runtime > Run all` or run each cell individually\n",
+        "\n",
+        "import os\n",
+        "!pip install -U autotrain-advanced > install_logs.txt\n",
+        "!autotrain setup --colab > setup_logs.txt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "A2-_lkBS1WKA"
+      },
+      "outputs": [],
+      "source": [
+        "#@markdown ---\n",
+        "#@markdown #### Project Config\n",
+        "#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.\n",
+        "project_name = 'my_autotrain_llm' # @param {type:\"string\"}\n",
+        "model_name = 'abhishek/llama-2-7b-hf-small-shards' # @param {type:\"string\"}\n",
+        "\n",
+        "#@markdown ---\n",
+        "#@markdown #### Push to Hub?\n",
+        "#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account\n",
+        "#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.\n",
+        "#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.\n",
+        "#@markdown You can find your token here: https://huggingface.co/settings/tokens\n",
+        "push_to_hub = False # @param [\"False\", \"True\"] {type:\"raw\"}\n",
+        "hf_token = \"hf_XXX\" #@param {type:\"string\"}\n",
+        "repo_id = \"username/repo_name\" #@param {type:\"string\"}\n",
+        "\n",
+        "#@markdown ---\n",
+        "#@markdown #### Hyperparameters\n",
+        "learning_rate = 2e-4 # @param {type:\"number\"}\n",
+        "num_epochs = 1 #@param {type:\"number\"}\n",
+        "batch_size = 7 # @param {type:\"slider\", min:1, max:32, step:1}\n",
+        "block_size = 1024 # @param {type:\"number\"}\n",
+        "trainer = \"sft\" # @param [\"default\", \"sft\"] {type:\"raw\"}\n",
+        "warmup_ratio = 0.1 # @param {type:\"number\"}\n",
+        "weight_decay = 0.01 # @param {type:\"number\"}\n",
+        "gradient_accumulation = 4 # @param {type:\"number\"}\n",
+        "use_fp16 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
+        "use_peft = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
+        "use_int4 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
+        "lora_r = 16 #@param {type:\"number\"}\n",
+        "lora_alpha = 32 #@param {type:\"number\"}\n",
+        "lora_dropout = 0.05 #@param {type:\"number\"}\n",
+        "\n",
+        "os.environ[\"PROJECT_NAME\"] = project_name\n",
+        "os.environ[\"MODEL_NAME\"] = model_name\n",
+        "os.environ[\"PUSH_TO_HUB\"] = str(push_to_hub)\n",
+        "os.environ[\"HF_TOKEN\"] = hf_token\n",
+        "os.environ[\"REPO_ID\"] = repo_id\n",
+        "os.environ[\"LEARNING_RATE\"] = str(learning_rate)\n",
+        "os.environ[\"NUM_EPOCHS\"] = str(num_epochs)\n",
+        "os.environ[\"BATCH_SIZE\"] = str(batch_size)\n",
+        "os.environ[\"BLOCK_SIZE\"] = str(block_size)\n",
+        "os.environ[\"WARMUP_RATIO\"] = str(warmup_ratio)\n",
+        "os.environ[\"WEIGHT_DECAY\"] = str(weight_decay)\n",
+        "os.environ[\"GRADIENT_ACCUMULATION\"] = str(gradient_accumulation)\n",
+        "os.environ[\"USE_FP16\"] = str(use_fp16)\n",
+        "os.environ[\"USE_PEFT\"] = str(use_peft)\n",
+        "os.environ[\"USE_INT4\"] = str(use_int4)\n",
+        "os.environ[\"LORA_R\"] = str(lora_r)\n",
+        "os.environ[\"LORA_ALPHA\"] = str(lora_alpha)\n",
+        "os.environ[\"LORA_DROPOUT\"] = str(lora_dropout)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": true,
+        "id": "g3cd_ED_yXXt"
+      },
+      "outputs": [],
+      "source": [
+        "!autotrain llm \\\n",
+        "--train \\\n",
+        "--model ${MODEL_NAME} \\\n",
+        "--project-name ${PROJECT_NAME} \\\n",
+        "--data-path data/ \\\n",
+        "--text-column text \\\n",
+        "--lr ${LEARNING_RATE} \\\n",
+        "--batch-size ${BATCH_SIZE} \\\n",
+        "--epochs ${NUM_EPOCHS} \\\n",
+        "--block-size ${BLOCK_SIZE} \\\n",
+        "--warmup-ratio ${WARMUP_RATIO} \\\n",
+        "--lora-r ${LORA_R} \\\n",
+        "--lora-alpha ${LORA_ALPHA} \\\n",
+        "--lora-dropout ${LORA_DROPOUT} \\\n",
+        "--weight-decay ${WEIGHT_DECAY} \\\n",
+        "--gradient-accumulation ${GRADIENT_ACCUMULATION} \\\n",
+        "$( [[ \"$USE_FP16\" == \"True\" ]] && echo \"--fp16\" ) \\\n",
+        "$( [[ \"$USE_PEFT\" == \"True\" ]] && echo \"--use-peft\" ) \\\n",
+        "$( [[ \"$USE_INT4\" == \"True\" ]] && echo \"--use-int4\" ) \\\n",
+        "$( [[ \"$PUSH_TO_HUB\" == \"True\" ]] && echo \"--push-to-hub --token ${HF_TOKEN} --repo-id ${REPO_ID}\" )"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.10.11"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}