File size: 7,102 Bytes
7dea5d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a6b9b65
7dea5d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a6b9b65
7dea5d2
a6b9b65
 
7dea5d2
 
 
 
 
a6b9b65
 
 
7dea5d2
a6b9b65
 
7dea5d2
 
 
1c7fdd9
 
9aed8ae
 
ba161e2
 
96581a7
 
d107b22
 
b23934f
 
13f7733
 
2879749
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
INFO:__main__:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=IntervalStrategy.EPOCH,
eval_use_gather_object=False,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
gradient_checkpointing_kwargs=None,
greater_is_better=False,
group_by_length=False,
half_precision_backend=auto,
hub_always_push=False,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,
include_num_input_tokens_seen=False,
include_tokens_per_second=False,
jit_mode_eval=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=5e-05,
length_column_name=length,
load_best_model_at_end=True,
local_rank=0,
log_level=passive,
log_level_replica=warning,
log_on_each_node=True,
logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_eg/runs/Sep01_15-14-35_lmgpu-node-07,
logging_first_step=False,
logging_nan_inf_filter=True,
logging_steps=500,
logging_strategy=IntervalStrategy.EPOCH,
lr_scheduler_kwargs={},
lr_scheduler_type=SchedulerType.LINEAR,
max_grad_norm=1.0,
max_steps=-1,
metric_for_best_model=loss,
mp_parameters=,
neftune_noise_alpha=None,
no_cuda=False,
num_train_epochs=20.0,
optim=OptimizerNames.ADAMW_TORCH,
optim_args=None,
optim_target_modules=None,
output_dir=/home/iais_marenpielka/Bouthaina/res_nw_eg,
overwrite_output_dir=False,
past_index=-1,
per_device_eval_batch_size=8,
per_device_train_batch_size=8,
prediction_loss_only=False,
push_to_hub=True,
push_to_hub_model_id=None,
push_to_hub_organization=None,
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
ray_scope=last,
remove_unused_columns=True,
report_to=[],
restore_callback_states_from_checkpoint=False,
resume_from_checkpoint=None,
run_name=/home/iais_marenpielka/Bouthaina/res_nw_eg,
save_on_each_node=False,
save_only_model=False,
save_safetensors=True,
save_steps=500,
save_strategy=IntervalStrategy.EPOCH,
save_total_limit=None,
seed=42,
skip_memory_metrics=True,
split_batches=None,
tf32=None,
torch_compile=False,
torch_compile_backend=None,
torch_compile_mode=None,
torch_empty_cache_steps=None,
torchdynamo=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
use_cpu=False,
use_ipex=False,
use_legacy_prediction_loop=False,
use_mps_device=False,
warmup_ratio=0.0,
warmup_steps=500,
weight_decay=0.0,
)
INFO:datasets.builder:Using custom data configuration default-8c97581fc2299c6f
INFO:datasets.info:Loading Dataset Infos from /home/iais_marenpielka/Bouthaina/miniconda3/lib/python3.12/site-packages/datasets/packaged_modules/text
INFO:datasets.builder:Generating dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-8c97581fc2299c6f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
INFO:datasets.builder:Downloading and preparing dataset text/default to /home/iais_marenpielka/.cache/huggingface/datasets/text/default-8c97581fc2299c6f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101...
INFO:datasets.download.download_manager:Downloading took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min
INFO:datasets.builder:Generating train split
INFO:datasets.builder:Generating validation split
INFO:datasets.utils.info_utils:Unable to verify splits sizes.
INFO:datasets.builder:Dataset text downloaded and prepared to /home/iais_marenpielka/.cache/huggingface/datasets/text/default-8c97581fc2299c6f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101. Subsequent calls will reuse this data.
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-8c97581fc2299c6f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-c736ae48f9f531d9.arrow
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-8c97581fc2299c6f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-0ac9dd935c074d94.arrow
WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-8c97581fc2299c6f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-75a2a4e12a96c35b.arrow
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-8c97581fc2299c6f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-fac740983703b34b.arrow
WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
INFO:root:Epoch 1.0: Train Loss = None, Eval Loss = None
INFO:absl:Using default tokenizer.
INFO:root:Epoch 2.0: Train Loss = 1.1362, Eval Loss = 0.9122783541679382
INFO:absl:Using default tokenizer.
INFO:root:Epoch 3.0: Train Loss = 0.7501, Eval Loss = 0.8436459302902222
INFO:absl:Using default tokenizer.
INFO:root:Epoch 4.0: Train Loss = 0.6462, Eval Loss = 0.8298296928405762
INFO:absl:Using default tokenizer.
INFO:root:Epoch 5.0: Train Loss = 0.5705, Eval Loss = 0.8327584862709045
INFO:absl:Using default tokenizer.
INFO:root:Epoch 6.0: Train Loss = 0.5087, Eval Loss = 0.8406437635421753
INFO:absl:Using default tokenizer.
INFO:root:Epoch 7.0: Train Loss = 0.4559, Eval Loss = 0.8647022843360901
INFO:absl:Using default tokenizer.
INFO:root:Epoch 8.0: Train Loss = 0.4104, Eval Loss = 0.8769952058792114
INFO:absl:Using default tokenizer.
INFO:__main__:*** Evaluate ***
INFO:absl:Using default tokenizer.