Andyrasika commited on
Commit
71a6dfa
·
1 Parent(s): 345bf63

Upload AutoTrain_LLM.ipynb

Browse files
Files changed (1) hide show
  1. AutoTrain_LLM.ipynb +137 -0
AutoTrain_LLM.ipynb ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "id": "JvMRbVLEJlZT"
9
+ },
10
+ "outputs": [],
11
+ "source": [
12
+ "#@title 🤗 AutoTrain LLM\n",
13
+ "#@markdown In order to use this colab\n",
14
+ "#@markdown - upload train.csv to a folder named `data/`\n",
15
+ "#@markdown - train.csv must contain a `text` column\n",
16
+ "#@markdown - choose a project name if you wish\n",
17
+ "#@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub\n",
18
+ "#@markdown - add huggingface information (token and repo_id) if you wish to push trained model to huggingface hub\n",
19
+ "#@markdown - update hyperparameters if you wish\n",
20
+ "#@markdown - click `Runtime > Run all` or run each cell individually\n",
21
+ "\n",
22
+ "import os\n",
23
+ "!pip install -U autotrain-advanced > install_logs.txt\n",
24
+ "!autotrain setup --colab > setup_logs.txt"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": null,
30
+ "metadata": {
31
+ "id": "A2-_lkBS1WKA"
32
+ },
33
+ "outputs": [],
34
+ "source": [
35
+ "#@markdown ---\n",
36
+ "#@markdown #### Project Config\n",
37
+ "#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.\n",
38
+ "project_name = 'my_autotrain_llm' # @param {type:\"string\"}\n",
39
+ "model_name = 'abhishek/llama-2-7b-hf-small-shards' # @param {type:\"string\"}\n",
40
+ "\n",
41
+ "#@markdown ---\n",
42
+ "#@markdown #### Push to Hub?\n",
43
+ "#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account\n",
44
+ "#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.\n",
45
+ "#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.\n",
46
+ "#@markdown You can find your token here: https://huggingface.co/settings/tokens\n",
47
+ "push_to_hub = False # @param [\"False\", \"True\"] {type:\"raw\"}\n",
48
+ "hf_token = \"hf_XXX\" #@param {type:\"string\"}\n",
49
+ "repo_id = \"username/repo_name\" #@param {type:\"string\"}\n",
50
+ "\n",
51
+ "#@markdown ---\n",
52
+ "#@markdown #### Hyperparameters\n",
53
+ "learning_rate = 2e-4 # @param {type:\"number\"}\n",
54
+ "num_epochs = 1 #@param {type:\"number\"}\n",
55
+ "batch_size = 7 # @param {type:\"slider\", min:1, max:32, step:1}\n",
56
+ "block_size = 1024 # @param {type:\"number\"}\n",
57
+ "trainer = \"sft\" # @param [\"default\", \"sft\"] {type:\"raw\"}\n",
58
+ "warmup_ratio = 0.1 # @param {type:\"number\"}\n",
59
+ "weight_decay = 0.01 # @param {type:\"number\"}\n",
60
+ "gradient_accumulation = 4 # @param {type:\"number\"}\n",
61
+ "use_fp16 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
62
+ "use_peft = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
63
+ "use_int4 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
64
+ "lora_r = 16 #@param {type:\"number\"}\n",
65
+ "lora_alpha = 32 #@param {type:\"number\"}\n",
66
+ "lora_dropout = 0.05 #@param {type:\"number\"}\n",
67
+ "\n",
68
+ "os.environ[\"PROJECT_NAME\"] = project_name\n",
69
+ "os.environ[\"MODEL_NAME\"] = model_name\n",
70
+ "os.environ[\"PUSH_TO_HUB\"] = str(push_to_hub)\n",
71
+ "os.environ[\"HF_TOKEN\"] = hf_token\n",
72
+ "os.environ[\"REPO_ID\"] = repo_id\n",
73
+ "os.environ[\"LEARNING_RATE\"] = str(learning_rate)\n",
74
+ "os.environ[\"NUM_EPOCHS\"] = str(num_epochs)\n",
75
+ "os.environ[\"BATCH_SIZE\"] = str(batch_size)\n",
76
+ "os.environ[\"BLOCK_SIZE\"] = str(block_size)\n",
77
+ "os.environ[\"WARMUP_RATIO\"] = str(warmup_ratio)\n",
78
+ "os.environ[\"WEIGHT_DECAY\"] = str(weight_decay)\n",
79
+ "os.environ[\"GRADIENT_ACCUMULATION\"] = str(gradient_accumulation)\n",
80
+ "os.environ[\"USE_FP16\"] = str(use_fp16)\n",
81
+ "os.environ[\"USE_PEFT\"] = str(use_peft)\n",
82
+ "os.environ[\"USE_INT4\"] = str(use_int4)\n",
83
+ "os.environ[\"LORA_R\"] = str(lora_r)\n",
84
+ "os.environ[\"LORA_ALPHA\"] = str(lora_alpha)\n",
85
+ "os.environ[\"LORA_DROPOUT\"] = str(lora_dropout)\n"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": null,
91
+ "metadata": {
92
+ "collapsed": true,
93
+ "id": "g3cd_ED_yXXt"
94
+ },
95
+ "outputs": [],
96
+ "source": [
97
+ "!autotrain llm \\\n",
98
+ "--train \\\n",
99
+ "--model ${MODEL_NAME} \\\n",
100
+ "--project-name ${PROJECT_NAME} \\\n",
101
+ "--data-path data/ \\\n",
102
+ "--text-column text \\\n",
103
+ "--lr ${LEARNING_RATE} \\\n",
104
+ "--batch-size ${BATCH_SIZE} \\\n",
105
+ "--epochs ${NUM_EPOCHS} \\\n",
106
+ "--block-size ${BLOCK_SIZE} \\\n",
107
+ "--warmup-ratio ${WARMUP_RATIO} \\\n",
108
+ "--lora-r ${LORA_R} \\\n",
109
+ "--lora-alpha ${LORA_ALPHA} \\\n",
110
+ "--lora-dropout ${LORA_DROPOUT} \\\n",
111
+ "--weight-decay ${WEIGHT_DECAY} \\\n",
112
+ "--gradient-accumulation ${GRADIENT_ACCUMULATION} \\\n",
113
+ "$( [[ \"$USE_FP16\" == \"True\" ]] && echo \"--fp16\" ) \\\n",
114
+ "$( [[ \"$USE_PEFT\" == \"True\" ]] && echo \"--use-peft\" ) \\\n",
115
+ "$( [[ \"$USE_INT4\" == \"True\" ]] && echo \"--use-int4\" ) \\\n",
116
+ "$( [[ \"$PUSH_TO_HUB\" == \"True\" ]] && echo \"--push-to-hub --token ${HF_TOKEN} --repo-id ${REPO_ID}\" )"
117
+ ]
118
+ }
119
+ ],
120
+ "metadata": {
121
+ "accelerator": "GPU",
122
+ "colab": {
123
+ "gpuType": "T4",
124
+ "provenance": []
125
+ },
126
+ "kernelspec": {
127
+ "display_name": "Python 3",
128
+ "name": "python3"
129
+ },
130
+ "language_info": {
131
+ "name": "python",
132
+ "version": "3.10.11"
133
+ }
134
+ },
135
+ "nbformat": 4,
136
+ "nbformat_minor": 0
137
+ }