,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=[],\n",
"resume_from_checkpoint=None,\n",
"run_name=/content/artifacts/checkpoints,\n",
"save_on_each_node=False,\n",
"save_safetensors=False,\n",
"save_steps=50,\n",
"save_strategy=steps,\n",
"save_total_limit=None,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
"torch_compile_backend=None,\n",
"torch_compile_mode=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_cpu=False,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.02,\n",
"warmup_steps=100,\n",
"weight_decay=0.0,\n",
")"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from utils import parse_training_args\n",
"\n",
"training_args = parse_training_args(params)\n",
"training_args"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2ae3e5f9-e28e-457b-b6bf-a62a472241bf",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T04:34:09.671203Z",
"iopub.status.busy": "2023-10-20T04:34:09.670480Z",
"iopub.status.idle": "2023-10-20T04:34:09.673797Z",
"shell.execute_reply": "2023-10-20T04:34:09.673184Z"
},
"papermill": {
"duration": 1.114255,
"end_time": "2023-10-20T04:34:09.675322",
"exception": false,
"start_time": "2023-10-20T04:34:08.561067",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# data = data[\"train\"].train_test_split(test_size=0.1)\n",
"# data\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "5bc91439-6108-445c-8f85-e6558c9f0677",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T04:34:11.808129Z",
"iopub.status.busy": "2023-10-20T04:34:11.807696Z",
"iopub.status.idle": "2023-10-20T04:34:12.065605Z",
"shell.execute_reply": "2023-10-20T04:34:12.064728Z"
},
"papermill": {
"duration": 1.353165,
"end_time": "2023-10-20T04:34:12.067207",
"exception": false,
"start_time": "2023-10-20T04:34:10.714042",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! mkdir -p {trained_model_path_lora}"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "b33e407a-9d4f-49f6-a74b-b80db8cc3a8a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T04:34:14.147929Z",
"iopub.status.busy": "2023-10-20T04:34:14.147060Z",
"iopub.status.idle": "2023-10-20T08:45:44.939895Z",
"shell.execute_reply": "2023-10-20T08:45:44.939204Z"
},
"papermill": {
"duration": 15092.697329,
"end_time": "2023-10-20T08:45:45.801991",
"exception": false,
"start_time": "2023-10-20T04:34:13.104662",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [2391/2391 4:11:21, Epoch 2/3]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 50 | \n",
" 1.069500 | \n",
"
\n",
" \n",
" 100 | \n",
" 0.515400 | \n",
"
\n",
" \n",
" 150 | \n",
" 0.274500 | \n",
"
\n",
" \n",
" 200 | \n",
" 0.173600 | \n",
"
\n",
" \n",
" 250 | \n",
" 0.118500 | \n",
"
\n",
" \n",
" 300 | \n",
" 0.084000 | \n",
"
\n",
" \n",
" 350 | \n",
" 0.065700 | \n",
"
\n",
" \n",
" 400 | \n",
" 0.054700 | \n",
"
\n",
" \n",
" 450 | \n",
" 0.048400 | \n",
"
\n",
" \n",
" 500 | \n",
" 0.044000 | \n",
"
\n",
" \n",
" 550 | \n",
" 0.039800 | \n",
"
\n",
" \n",
" 600 | \n",
" 0.039200 | \n",
"
\n",
" \n",
" 650 | \n",
" 0.038100 | \n",
"
\n",
" \n",
" 700 | \n",
" 0.034400 | \n",
"
\n",
" \n",
" 750 | \n",
" 0.034300 | \n",
"
\n",
" \n",
" 800 | \n",
" 0.032600 | \n",
"
\n",
" \n",
" 850 | \n",
" 0.027300 | \n",
"
\n",
" \n",
" 900 | \n",
" 0.026600 | \n",
"
\n",
" \n",
" 950 | \n",
" 0.027700 | \n",
"
\n",
" \n",
" 1000 | \n",
" 0.026800 | \n",
"
\n",
" \n",
" 1050 | \n",
" 0.026100 | \n",
"
\n",
" \n",
" 1100 | \n",
" 0.026900 | \n",
"
\n",
" \n",
" 1150 | \n",
" 0.026200 | \n",
"
\n",
" \n",
" 1200 | \n",
" 0.025400 | \n",
"
\n",
" \n",
" 1250 | \n",
" 0.023900 | \n",
"
\n",
" \n",
" 1300 | \n",
" 0.025000 | \n",
"
\n",
" \n",
" 1350 | \n",
" 0.024000 | \n",
"
\n",
" \n",
" 1400 | \n",
" 0.025500 | \n",
"
\n",
" \n",
" 1450 | \n",
" 0.024200 | \n",
"
\n",
" \n",
" 1500 | \n",
" 0.023000 | \n",
"
\n",
" \n",
" 1550 | \n",
" 0.024700 | \n",
"
\n",
" \n",
" 1600 | \n",
" 0.023400 | \n",
"
\n",
" \n",
" 1650 | \n",
" 0.019500 | \n",
"
\n",
" \n",
" 1700 | \n",
" 0.019600 | \n",
"
\n",
" \n",
" 1750 | \n",
" 0.020400 | \n",
"
\n",
" \n",
" 1800 | \n",
" 0.019600 | \n",
"
\n",
" \n",
" 1850 | \n",
" 0.019200 | \n",
"
\n",
" \n",
" 1900 | \n",
" 0.019600 | \n",
"
\n",
" \n",
" 1950 | \n",
" 0.018700 | \n",
"
\n",
" \n",
" 2000 | \n",
" 0.019400 | \n",
"
\n",
" \n",
" 2050 | \n",
" 0.020000 | \n",
"
\n",
" \n",
" 2100 | \n",
" 0.020300 | \n",
"
\n",
" \n",
" 2150 | \n",
" 0.019400 | \n",
"
\n",
" \n",
" 2200 | \n",
" 0.019400 | \n",
"
\n",
" \n",
" 2250 | \n",
" 0.019700 | \n",
"
\n",
" \n",
" 2300 | \n",
" 0.019300 | \n",
"
\n",
" \n",
" 2350 | \n",
" 0.019500 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=2391, training_loss=0.07075796158230988, metrics={'train_runtime': 15090.3717, 'train_samples_per_second': 0.634, 'train_steps_per_second': 0.158, 'total_flos': 3.0420974601928704e+17, 'train_loss': 0.07075796158230988, 'epoch': 3.0})"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trainer = transformers.Trainer(\n",
" model=model,\n",
" train_dataset=data[\"train\"],\n",
"# eval_dataset=data[\"test\"],\n",
" args=training_args,\n",
" data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n",
")\n",
"model.config.use_cache = False # silence the warnings. Please re-enable for inference!\n",
"\n",
"checkpoint_path = Path(\"/content/artifacts/checkpoints\")\n",
"\n",
"# Only set resume_from_checkpoint True when directory exists and contains files\n",
"resume_from_checkpoint = checkpoint_path.is_dir() and any(checkpoint_path.iterdir())\n",
"if resume_from_checkpoint:\n",
" print(\"Resuming from checkpoint:\", list(checkpoint_path.rglob(\"\")))\n",
"trainer.train(resume_from_checkpoint=resume_from_checkpoint)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "172e47a7-400e-4f82-a5e3-38135ecf532f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T08:45:47.918275Z",
"iopub.status.busy": "2023-10-20T08:45:47.917652Z",
"iopub.status.idle": "2023-10-20T08:46:06.598970Z",
"shell.execute_reply": "2023-10-20T08:46:06.598308Z"
},
"papermill": {
"duration": 19.754456,
"end_time": "2023-10-20T08:46:06.600631",
"exception": false,
"start_time": "2023-10-20T08:45:46.846175",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"PeftModelForCausalLM(\n",
" (base_model): LoraModel(\n",
" (model): LlamaForCausalLM(\n",
" (model): LlamaModel(\n",
" (embed_tokens): ModulesToSaveWrapper(\n",
" (original_module): Embedding(32001, 4096)\n",
" (modules_to_save): ModuleDict(\n",
" (default): Embedding(32001, 4096)\n",
" )\n",
" )\n",
" (layers): ModuleList(\n",
" (0-31): 32 x LlamaDecoderLayer(\n",
" (self_attn): LlamaAttention(\n",
" (q_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (k_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (v_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (o_proj): Linear(\n",
" in_features=4096, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (rotary_emb): LlamaRotaryEmbedding()\n",
" )\n",
" (mlp): LlamaMLP(\n",
" (gate_proj): Linear(\n",
" in_features=4096, out_features=11008, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=11008, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (up_proj): Linear(\n",
" in_features=4096, out_features=11008, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=11008, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (down_proj): Linear(\n",
" in_features=11008, out_features=4096, bias=False\n",
" (lora_dropout): ModuleDict(\n",
" (default): Dropout(p=0.05, inplace=False)\n",
" )\n",
" (lora_A): ModuleDict(\n",
" (default): Linear(in_features=11008, out_features=16, bias=False)\n",
" )\n",
" (lora_B): ModuleDict(\n",
" (default): Linear(in_features=16, out_features=4096, bias=False)\n",
" )\n",
" (lora_embedding_A): ParameterDict()\n",
" (lora_embedding_B): ParameterDict()\n",
" )\n",
" (act_fn): SiLUActivation()\n",
" )\n",
" (input_layernorm): LlamaRMSNorm()\n",
" (post_attention_layernorm): LlamaRMSNorm()\n",
" )\n",
" )\n",
" (norm): LlamaRMSNorm()\n",
" )\n",
" (lm_head): ModulesToSaveWrapper(\n",
" (original_module): Linear(in_features=4096, out_features=32001, bias=False)\n",
" (modules_to_save): ModuleDict(\n",
" (default): Linear(in_features=4096, out_features=32001, bias=False)\n",
" )\n",
" )\n",
" )\n",
" )\n",
")"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.save_pretrained(trained_model_path_lora)\n",
"model"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "dea4e68e-57a7-48bd-bad9-f03dfe3f8a06",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T08:46:08.783717Z",
"iopub.status.busy": "2023-10-20T08:46:08.782966Z",
"iopub.status.idle": "2023-10-20T08:46:09.033621Z",
"shell.execute_reply": "2023-10-20T08:46:09.032713Z"
},
"papermill": {
"duration": 1.261451,
"end_time": "2023-10-20T08:46:09.035288",
"exception": false,
"start_time": "2023-10-20T08:46:07.773837",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 1.2G\r\n",
" 512 -rw-r--r-- 1 root 3003 88 Oct 20 08:45 README.md\r\n",
"1.0K -rw-r--r-- 1 root 3003 550 Oct 20 08:46 adapter_config.json\r\n",
"1.2G -rw-r--r-- 1 root 3003 1.2G Oct 20 08:45 adapter_model.bin\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -lash {trained_model_path_lora}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "09db36b7-ead6-4368-9bfb-13ba1ba800a5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T08:46:11.071508Z",
"iopub.status.busy": "2023-10-20T08:46:11.070823Z",
"iopub.status.idle": "2023-10-20T08:47:03.388936Z",
"shell.execute_reply": "2023-10-20T08:47:03.388264Z"
},
"papermill": {
"duration": 54.544709,
"end_time": "2023-10-20T08:47:04.578229",
"exception": false,
"start_time": "2023-10-20T08:46:10.033520",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"LlamaForCausalLM(\n",
" (model): LlamaModel(\n",
" (embed_tokens): Embedding(32001, 4096)\n",
" (layers): ModuleList(\n",
" (0-31): 32 x LlamaDecoderLayer(\n",
" (self_attn): LlamaAttention(\n",
" (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (k_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (v_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
" (rotary_emb): LlamaRotaryEmbedding()\n",
" )\n",
" (mlp): LlamaMLP(\n",
" (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (up_proj): Linear(in_features=4096, out_features=11008, bias=False)\n",
" (down_proj): Linear(in_features=11008, out_features=4096, bias=False)\n",
" (act_fn): SiLUActivation()\n",
" )\n",
" (input_layernorm): LlamaRMSNorm()\n",
" (post_attention_layernorm): LlamaRMSNorm()\n",
" )\n",
" )\n",
" (norm): LlamaRMSNorm()\n",
" )\n",
" (lm_head): Linear(in_features=4096, out_features=32001, bias=False)\n",
")"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = model.merge_and_unload().half()\n",
"model"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "270a9a72-3a12-4d83-aa7d-2d167cb28cb4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T08:47:06.891191Z",
"iopub.status.busy": "2023-10-20T08:47:06.890837Z",
"iopub.status.idle": "2023-10-20T08:47:07.137287Z",
"shell.execute_reply": "2023-10-20T08:47:07.136391Z"
},
"papermill": {
"duration": 1.395578,
"end_time": "2023-10-20T08:47:07.138853",
"exception": false,
"start_time": "2023-10-20T08:47:05.743275",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 0\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 20 04:34 checkpoints\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 20 04:34 lora\r\n",
"drwxr-xr-x 1 root 3003 0 Oct 20 04:29 src\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -l {trained_model_path}"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "260e9d79-6eb8-4516-bf8f-825a25606391",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T08:47:09.318793Z",
"iopub.status.busy": "2023-10-20T08:47:09.317959Z",
"iopub.status.idle": "2023-10-20T08:50:32.295853Z",
"shell.execute_reply": "2023-10-20T08:50:32.295187Z"
},
"papermill": {
"duration": 205.2148,
"end_time": "2023-10-20T08:50:33.467533",
"exception": false,
"start_time": "2023-10-20T08:47:08.252733",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"('/content/artifacts/tokenizer_config.json',\n",
" '/content/artifacts/special_tokens_map.json',\n",
" '/content/artifacts/tokenizer.model',\n",
" '/content/artifacts/added_tokens.json',\n",
" '/content/artifacts/tokenizer.json')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.save_pretrained(trained_model_path)\n",
"tokenizer.save_pretrained(trained_model_path)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "a575ff52-a6ff-4322-af8e-2629c0e110a0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T08:50:35.318692Z",
"iopub.status.busy": "2023-10-20T08:50:35.317875Z",
"iopub.status.idle": "2023-10-20T08:50:35.798065Z",
"shell.execute_reply": "2023-10-20T08:50:35.797213Z"
},
"papermill": {
"duration": 1.406588,
"end_time": "2023-10-20T08:50:35.799961",
"exception": false,
"start_time": "2023-10-20T08:50:34.393373",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 13G\r\n",
" 512 -rw-r--r-- 1 root 3003 21 Oct 20 08:50 added_tokens.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 20 04:34 checkpoints\r\n",
"1.0K -rw-r--r-- 1 root 3003 648 Oct 20 08:47 config.json\r\n",
" 512 -rw-r--r-- 1 root 3003 183 Oct 20 08:47 generation_config.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 20 04:34 lora\r\n",
"9.3G -rw-r--r-- 1 root 3003 9.3G Oct 20 08:47 pytorch_model-00001-of-00002.bin\r\n",
"3.3G -rw-r--r-- 1 root 3003 3.3G Oct 20 08:49 pytorch_model-00002-of-00002.bin\r\n",
" 24K -rw-r--r-- 1 root 3003 24K Oct 20 08:50 pytorch_model.bin.index.json\r\n",
"1.0K -rw-r--r-- 1 root 3003 552 Oct 20 08:50 special_tokens_map.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 20 04:29 src\r\n",
"1.8M -rw-r--r-- 1 root 3003 1.8M Oct 20 08:50 tokenizer.json\r\n",
"489K -rw-r--r-- 1 root 3003 489K Oct 20 08:50 tokenizer.model\r\n",
"1.5K -rw-r--r-- 1 root 3003 1.1K Oct 20 08:50 tokenizer_config.json\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 13G\r\n",
"2.0K -rw-r--r-- 1 root 3003 1.6K Oct 5 05:21 .gitattributes\r\n",
"7.0K -rw-r--r-- 1 root 3003 6.9K Oct 5 05:21 LICENSE.txt\r\n",
" 11K -rw-r--r-- 1 root 3003 11K Oct 5 05:21 README.md\r\n",
"1.2M -rw-r--r-- 1 root 3003 1.2M Oct 5 05:21 Responsible-Use-Guide.pdf\r\n",
"5.0K -rw-r--r-- 1 root 3003 4.7K Oct 5 05:21 USE_POLICY.md\r\n",
"1.0K -rw-r--r-- 1 root 3003 609 Oct 5 05:21 config.json\r\n",
" 512 -rw-r--r-- 1 root 3003 188 Oct 5 05:21 generation_config.json\r\n",
"9.3G -rw-r--r-- 1 root 3003 9.3G Oct 5 05:22 model-00001-of-00002.safetensors\r\n",
"3.3G -rw-r--r-- 1 root 3003 3.3G Oct 5 05:22 model-00002-of-00002.safetensors\r\n",
" 27K -rw-r--r-- 1 root 3003 27K Oct 5 05:21 model.safetensors.index.json\r\n",
" 512 -rw-r--r-- 1 root 3003 414 Oct 5 05:21 special_tokens_map.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 20 08:50 src\r\n",
"1.8M -rw-r--r-- 1 root 3003 1.8M Oct 5 05:21 tokenizer.json\r\n",
"489K -rw-r--r-- 1 root 3003 489K Oct 5 05:21 tokenizer.model\r\n",
"1.0K -rw-r--r-- 1 root 3003 776 Oct 5 05:21 tokenizer_config.json\r\n"
]
}
],
"source": [
"! ls -lash {trained_model_path}\n",
"! ls -lash {model_path}"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "6d90a920-fb22-4291-8466-411ff41e31be",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T08:50:37.854282Z",
"iopub.status.busy": "2023-10-20T08:50:37.853439Z",
"iopub.status.idle": "2023-10-20T08:50:38.103769Z",
"shell.execute_reply": "2023-10-20T08:50:38.102831Z"
},
"papermill": {
"duration": 1.245211,
"end_time": "2023-10-20T08:50:38.105374",
"exception": false,
"start_time": "2023-10-20T08:50:36.860163",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"total 13G\r\n",
" 512 -rw-r--r-- 1 root 3003 21 Oct 20 08:50 added_tokens.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 20 04:34 checkpoints\r\n",
"1.0K -rw-r--r-- 1 root 3003 648 Oct 20 08:47 config.json\r\n",
" 512 -rw-r--r-- 1 root 3003 183 Oct 20 08:47 generation_config.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 20 04:34 lora\r\n",
"9.3G -rw-r--r-- 1 root 3003 9.3G Oct 20 08:47 pytorch_model-00001-of-00002.bin\r\n",
"3.3G -rw-r--r-- 1 root 3003 3.3G Oct 20 08:49 pytorch_model-00002-of-00002.bin\r\n",
" 24K -rw-r--r-- 1 root 3003 24K Oct 20 08:50 pytorch_model.bin.index.json\r\n",
"1.0K -rw-r--r-- 1 root 3003 552 Oct 20 08:50 special_tokens_map.json\r\n",
" 0 drwxr-xr-x 1 root 3003 0 Oct 20 04:29 src\r\n",
"1.8M -rw-r--r-- 1 root 3003 1.8M Oct 20 08:50 tokenizer.json\r\n",
"489K -rw-r--r-- 1 root 3003 489K Oct 20 08:50 tokenizer.model\r\n",
"1.5K -rw-r--r-- 1 root 3003 1.1K Oct 20 08:50 tokenizer_config.json\r\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
}
],
"source": [
"! ls -lash {trained_model_path}"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "202a694a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-10-20T08:50:39.994565Z",
"iopub.status.busy": "2023-10-20T08:50:39.993719Z"
},
"papermill": {
"duration": null,
"end_time": null,
"exception": false,
"start_time": "2023-10-20T08:50:39.040596",
"status": "running"
},
"tags": []
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "79038243991f481395b9433f4dcf59b5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"pytorch_model-00001-of-00002.bin: 0%| | 0.00/9.98G [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f9a6e0650c66418d8b8a8370e9a956e0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"pytorch_model-00002-of-00002.bin: 0%| | 0.00/3.50G [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c4b11f630b62451e9f885874a6247781",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Upload 2 LFS files: 0%| | 0/2 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from huggingface_hub import HfApi\n",
"import shutil\n",
"\n",
"tokenizer_model_path_base = Path(model_path) / \"tokenizer.model\"\n",
"tokenizer_model_path_trained = Path(trained_model_path) / \"tokenizer.model\"\n",
"if tokenizer_model_path_base.exists() and not tokenizer_model_path_trained.exists():\n",
" shutil.copy(tokenizer_model_path_base, tokenizer_model_path_trained)\n",
"\n",
"repo_id = params.get(\"push_to_hub\")\n",
"if repo_id:\n",
" model.push_to_hub(repo_id)\n",
" tokenizer.push_to_hub(repo_id)\n",
" hf_api = HfApi()\n",
" # Upload tokenizer.model if it was in base model\n",
" if tokenizer_model_path_base.exists():\n",
" hf_api.upload_file(\n",
" path_or_fileobj=tokenizer_model_path_base,\n",
" path_in_repo=tokenizer_model_path_base.name,\n",
" repo_id=repo_id,\n",
" )\n",
" logs_path = Path(\"/content/artifacts/src/train.ipynb\")\n",
" if logs_path.exists():\n",
" hf_api.upload_file(\n",
" path_or_fileobj=logs_path,\n",
" path_in_repo=logs_path.name,\n",
" repo_id=repo_id,\n",
" )\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1528b32-3e4d-463c-9473-97908d7dfcf5",
"metadata": {
"papermill": {
"duration": null,
"end_time": null,
"exception": null,
"start_time": null,
"status": "pending"
},
"tags": []
},
"outputs": [],
"source": [
"! nvidia-smi"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04651d47",
"metadata": {
"papermill": {
"duration": null,
"end_time": null,
"exception": null,
"start_time": null,
"status": "pending"
},
"tags": []
},
"outputs": [],
"source": [
"inference_prompt = params.get(\"inference_prompt_template\")\n",
"if inference_prompt:\n",
" model = AutoModelForCausalLM.from_pretrained(\n",
" trained_model_path, torch_dtype=torch.float16, device_map=\"auto\", trust_remote_code=True,\n",
" load_in_8bit=True)\n",
" device = \"cuda\"\n",
" model_inputs = tokenizer([inference_prompt.format_map(data[\"train\"][0])],\n",
" return_tensors=\"pt\").to(device)\n",
"\n",
" generated_ids = model.generate(**model_inputs,\n",
" max_new_tokens=300, temperature=0.6)\n",
"\n",
" print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f5cc20fc-5aaf-4e4c-92f8-cce0ed0fe1ee",
"metadata": {
"papermill": {
"duration": null,
"end_time": null,
"exception": null,
"start_time": null,
"status": "pending"
},
"tags": []
},
"outputs": [],
"source": [
"repo_id = params.get(\"push_to_hub\")\n",
"if repo_id:\n",
" hf_api = HfApi()\n",
" logs_path = Path(\"/content/artifacts/src/train.ipynb\")\n",
" if logs_path.exists():\n",
" hf_api.upload_file(\n",
" path_or_fileobj=logs_path,\n",
" path_in_repo=logs_path.name,\n",
" repo_id=repo_id,\n",
" )"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"papermill": {
"default_parameters": {},
"duration": null,
"end_time": null,
"environment_variables": {},
"exception": null,
"input_path": "/content/src/train.ipynb",
"output_path": "/content/artifacts/src/train.ipynb",
"parameters": {},
"start_time": "2023-10-20T04:29:06.977195",
"version": "2.4.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}