|
You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. |
|
Using pad_token, but it is not set yet. |
|
Traceback (most recent call last): |
|
File "/mnt/storage/aatherton/hf_synth_trans/synth_translation.py", line 130, in <module> |
|
trainer.evaluate(max_length=max_length) |
|
File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 159, in evaluate |
|
return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 2972, in evaluate |
|
output = eval_loop( |
|
^^^^^^^^^^ |
|
File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 3151, in evaluation_loop |
|
for step, inputs in enumerate(dataloader): |
|
File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/accelerate/data_loader.py", line 384, in __iter__ |
|
current_batch = next(dataloader_iter) |
|
^^^^^^^^^^^^^^^^^^^^^ |
|
File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 633, in __next__ |
|
data = self._next_data() |
|
^^^^^^^^^^^^^^^^^ |
|
File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 677, in _next_data |
|
data = self._dataset_fetcher.fetch(index) # may raise StopIteration |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch |
|
return self.collate_fn(data) |
|
^^^^^^^^^^^^^^^^^^^^^ |
|
File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/data/data_collator.py", line 586, in __call__ |
|
features = self.tokenizer.pad( |
|
^^^^^^^^^^^^^^^^^^^ |
|
File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 3059, in pad |
|
padding_strategy, _, max_length, _ = self._get_padding_truncation_strategies( |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2507, in _get_padding_truncation_strategies |
|
raise ValueError( |
|
ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`. |
|
|