Commit
·
71a6dfa
1
Parent(s):
345bf63
Upload AutoTrain_LLM.ipynb
Browse files- AutoTrain_LLM.ipynb +137 -0
AutoTrain_LLM.ipynb
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {
|
7 |
+
"collapsed": true,
|
8 |
+
"id": "JvMRbVLEJlZT"
|
9 |
+
},
|
10 |
+
"outputs": [],
|
11 |
+
"source": [
|
12 |
+
"#@title 🤗 AutoTrain LLM\n",
|
13 |
+
"#@markdown In order to use this colab\n",
|
14 |
+
"#@markdown - upload train.csv to a folder named `data/`\n",
|
15 |
+
"#@markdown - train.csv must contain a `text` column\n",
|
16 |
+
"#@markdown - choose a project name if you wish\n",
|
17 |
+
"#@markdown - change model if you wish, you can use most of the text-generation models from Hugging Face Hub\n",
|
18 |
+
"#@markdown - add huggingface information (token and repo_id) if you wish to push trained model to huggingface hub\n",
|
19 |
+
"#@markdown - update hyperparameters if you wish\n",
|
20 |
+
"#@markdown - click `Runtime > Run all` or run each cell individually\n",
|
21 |
+
"\n",
|
22 |
+
"import os\n",
|
23 |
+
"!pip install -U autotrain-advanced > install_logs.txt\n",
|
24 |
+
"!autotrain setup --colab > setup_logs.txt"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": null,
|
30 |
+
"metadata": {
|
31 |
+
"id": "A2-_lkBS1WKA"
|
32 |
+
},
|
33 |
+
"outputs": [],
|
34 |
+
"source": [
|
35 |
+
"#@markdown ---\n",
|
36 |
+
"#@markdown #### Project Config\n",
|
37 |
+
"#@markdown Note: if you are using a restricted/private model, you need to enter your Hugging Face token in the next step.\n",
|
38 |
+
"project_name = 'my_autotrain_llm' # @param {type:\"string\"}\n",
|
39 |
+
"model_name = 'abhishek/llama-2-7b-hf-small-shards' # @param {type:\"string\"}\n",
|
40 |
+
"\n",
|
41 |
+
"#@markdown ---\n",
|
42 |
+
"#@markdown #### Push to Hub?\n",
|
43 |
+
"#@markdown Use these only if you want to push your trained model to a private repo in your Hugging Face Account\n",
|
44 |
+
"#@markdown If you dont use these, the model will be saved in Google Colab and you are required to download it manually.\n",
|
45 |
+
"#@markdown Please enter your Hugging Face write token. The trained model will be saved to your Hugging Face account.\n",
|
46 |
+
"#@markdown You can find your token here: https://huggingface.co/settings/tokens\n",
|
47 |
+
"push_to_hub = False # @param [\"False\", \"True\"] {type:\"raw\"}\n",
|
48 |
+
"hf_token = \"hf_XXX\" #@param {type:\"string\"}\n",
|
49 |
+
"repo_id = \"username/repo_name\" #@param {type:\"string\"}\n",
|
50 |
+
"\n",
|
51 |
+
"#@markdown ---\n",
|
52 |
+
"#@markdown #### Hyperparameters\n",
|
53 |
+
"learning_rate = 2e-4 # @param {type:\"number\"}\n",
|
54 |
+
"num_epochs = 1 #@param {type:\"number\"}\n",
|
55 |
+
"batch_size = 7 # @param {type:\"slider\", min:1, max:32, step:1}\n",
|
56 |
+
"block_size = 1024 # @param {type:\"number\"}\n",
|
57 |
+
"trainer = \"sft\" # @param [\"default\", \"sft\"] {type:\"raw\"}\n",
|
58 |
+
"warmup_ratio = 0.1 # @param {type:\"number\"}\n",
|
59 |
+
"weight_decay = 0.01 # @param {type:\"number\"}\n",
|
60 |
+
"gradient_accumulation = 4 # @param {type:\"number\"}\n",
|
61 |
+
"use_fp16 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
|
62 |
+
"use_peft = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
|
63 |
+
"use_int4 = True # @param [\"False\", \"True\"] {type:\"raw\"}\n",
|
64 |
+
"lora_r = 16 #@param {type:\"number\"}\n",
|
65 |
+
"lora_alpha = 32 #@param {type:\"number\"}\n",
|
66 |
+
"lora_dropout = 0.05 #@param {type:\"number\"}\n",
|
67 |
+
"\n",
|
68 |
+
"os.environ[\"PROJECT_NAME\"] = project_name\n",
|
69 |
+
"os.environ[\"MODEL_NAME\"] = model_name\n",
|
70 |
+
"os.environ[\"PUSH_TO_HUB\"] = str(push_to_hub)\n",
|
71 |
+
"os.environ[\"HF_TOKEN\"] = hf_token\n",
|
72 |
+
"os.environ[\"REPO_ID\"] = repo_id\n",
|
73 |
+
"os.environ[\"LEARNING_RATE\"] = str(learning_rate)\n",
|
74 |
+
"os.environ[\"NUM_EPOCHS\"] = str(num_epochs)\n",
|
75 |
+
"os.environ[\"BATCH_SIZE\"] = str(batch_size)\n",
|
76 |
+
"os.environ[\"BLOCK_SIZE\"] = str(block_size)\n",
|
77 |
+
"os.environ[\"WARMUP_RATIO\"] = str(warmup_ratio)\n",
|
78 |
+
"os.environ[\"WEIGHT_DECAY\"] = str(weight_decay)\n",
|
79 |
+
"os.environ[\"GRADIENT_ACCUMULATION\"] = str(gradient_accumulation)\n",
|
80 |
+
"os.environ[\"USE_FP16\"] = str(use_fp16)\n",
|
81 |
+
"os.environ[\"USE_PEFT\"] = str(use_peft)\n",
|
82 |
+
"os.environ[\"USE_INT4\"] = str(use_int4)\n",
|
83 |
+
"os.environ[\"LORA_R\"] = str(lora_r)\n",
|
84 |
+
"os.environ[\"LORA_ALPHA\"] = str(lora_alpha)\n",
|
85 |
+
"os.environ[\"LORA_DROPOUT\"] = str(lora_dropout)\n"
|
86 |
+
]
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"cell_type": "code",
|
90 |
+
"execution_count": null,
|
91 |
+
"metadata": {
|
92 |
+
"collapsed": true,
|
93 |
+
"id": "g3cd_ED_yXXt"
|
94 |
+
},
|
95 |
+
"outputs": [],
|
96 |
+
"source": [
|
97 |
+
"!autotrain llm \\\n",
|
98 |
+
"--train \\\n",
|
99 |
+
"--model ${MODEL_NAME} \\\n",
|
100 |
+
"--project-name ${PROJECT_NAME} \\\n",
|
101 |
+
"--data-path data/ \\\n",
|
102 |
+
"--text-column text \\\n",
|
103 |
+
"--lr ${LEARNING_RATE} \\\n",
|
104 |
+
"--batch-size ${BATCH_SIZE} \\\n",
|
105 |
+
"--epochs ${NUM_EPOCHS} \\\n",
|
106 |
+
"--block-size ${BLOCK_SIZE} \\\n",
|
107 |
+
"--warmup-ratio ${WARMUP_RATIO} \\\n",
|
108 |
+
"--lora-r ${LORA_R} \\\n",
|
109 |
+
"--lora-alpha ${LORA_ALPHA} \\\n",
|
110 |
+
"--lora-dropout ${LORA_DROPOUT} \\\n",
|
111 |
+
"--weight-decay ${WEIGHT_DECAY} \\\n",
|
112 |
+
"--gradient-accumulation ${GRADIENT_ACCUMULATION} \\\n",
|
113 |
+
"$( [[ \"$USE_FP16\" == \"True\" ]] && echo \"--fp16\" ) \\\n",
|
114 |
+
"$( [[ \"$USE_PEFT\" == \"True\" ]] && echo \"--use-peft\" ) \\\n",
|
115 |
+
"$( [[ \"$USE_INT4\" == \"True\" ]] && echo \"--use-int4\" ) \\\n",
|
116 |
+
"$( [[ \"$PUSH_TO_HUB\" == \"True\" ]] && echo \"--push-to-hub --token ${HF_TOKEN} --repo-id ${REPO_ID}\" )"
|
117 |
+
]
|
118 |
+
}
|
119 |
+
],
|
120 |
+
"metadata": {
|
121 |
+
"accelerator": "GPU",
|
122 |
+
"colab": {
|
123 |
+
"gpuType": "T4",
|
124 |
+
"provenance": []
|
125 |
+
},
|
126 |
+
"kernelspec": {
|
127 |
+
"display_name": "Python 3",
|
128 |
+
"name": "python3"
|
129 |
+
},
|
130 |
+
"language_info": {
|
131 |
+
"name": "python",
|
132 |
+
"version": "3.10.11"
|
133 |
+
}
|
134 |
+
},
|
135 |
+
"nbformat": 4,
|
136 |
+
"nbformat_minor": 0
|
137 |
+
}
|