diff --git a/eng-nah-svo-cpt/merges.txt b/eng-nah-svo-cpt/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..fcecdc26f2eae6c453a0256e84a1245332d1307c --- /dev/null +++ b/eng-nah-svo-cpt/merges.txt @@ -0,0 +1,3 @@ +#version: 0.2 - Trained by `huggingface/tokenizers` +e n +f r diff --git a/eng-nah-svo-cpt/special_tokens_map.json b/eng-nah-svo-cpt/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0204ed10c186a4c7c68f55dff8f26087a45898d6 --- /dev/null +++ b/eng-nah-svo-cpt/special_tokens_map.json @@ -0,0 +1,5 @@ +{ + "bos_token": "<|endoftext|>", + "eos_token": "<|endoftext|>", + "unk_token": "<|endoftext|>" +} diff --git a/eng-nah-svo-cpt/tokenizer.json b/eng-nah-svo-cpt/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a7733f123f1c204c9e1c25b1292a721ac4f18214 --- /dev/null +++ b/eng-nah-svo-cpt/tokenizer.json @@ -0,0 +1,308 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "vocab": { + "<|endoftext|>": 0, + "!": 1, + "\"": 2, + "#": 3, + "$": 4, + "%": 5, + "&": 6, + "'": 7, + "(": 8, + ")": 9, + "*": 10, + "+": 11, + ",": 12, + "-": 13, + ".": 14, + "/": 15, + "0": 16, + "1": 17, + "2": 18, + "3": 19, + "4": 20, + "5": 21, + "6": 22, + "7": 23, + "8": 24, + "9": 25, + ":": 26, + ";": 27, + "<": 28, + "=": 29, + ">": 30, + "?": 31, + "@": 32, + "A": 33, + "B": 34, + "C": 35, + "D": 36, + "E": 37, + "F": 38, + "G": 39, + "H": 40, + "I": 41, + "J": 42, + "K": 43, + "L": 44, + "M": 45, + "N": 46, + "O": 47, + "P": 48, + "Q": 49, + "R": 50, + "S": 51, + "T": 52, + "U": 53, + "V": 54, + "W": 55, + "X": 56, + "Y": 57, + "Z": 58, + "[": 59, + "\\": 60, + "]": 61, + "^": 62, + "_": 63, + "`": 64, + "a": 65, + "b": 66, + "c": 67, + "d": 68, + "e": 69, + "f": 70, + "g": 71, + "h": 72, + "i": 73, + "j": 74, + "k": 75, + "l": 76, + "m": 77, + "n": 78, + "o": 79, + "p": 80, + "q": 81, + "r": 82, + "s": 83, + "t": 84, + "u": 85, + "v": 86, + "w": 87, + "x": 88, + "y": 89, + "z": 90, + "{": 91, + "|": 92, + "}": 93, + "~": 94, + "¡": 95, + "¢": 96, + "£": 97, + "¤": 98, + "¥": 99, + "¦": 100, + "§": 101, + "¨": 102, + "©": 103, + "ª": 104, + "«": 105, + "¬": 106, + "®": 107, + "¯": 108, + "°": 109, + "±": 110, + "²": 111, + "³": 112, + "´": 113, + "µ": 114, + "¶": 115, + "·": 116, + "¸": 117, + "¹": 118, + "º": 119, + "»": 120, + "¼": 121, + "½": 122, + "¾": 123, + "¿": 124, + "À": 125, + "Á": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "È": 133, + "É": 134, + "Ê": 135, + "Ë": 136, + "Ì": 137, + "Í": 138, + "Î": 139, + "Ï": 140, + "Ð": 141, + "Ñ": 142, + "Ò": 143, + "Ó": 144, + "Ô": 145, + "Õ": 146, + "Ö": 147, + "×": 148, + "Ø": 149, + "Ù": 150, + "Ú": 151, + "Û": 152, + "Ü": 153, + "Ý": 154, + "Þ": 155, + "ß": 156, + "à": 157, + "á": 158, + "â": 159, + "ã": 160, + "ä": 161, + "å": 162, + "æ": 163, + "ç": 164, + "è": 165, + "é": 166, + "ê": 167, + "ë": 168, + "ì": 169, + "í": 170, + "î": 171, + "ï": 172, + "ð": 173, + "ñ": 174, + "ò": 175, + "ó": 176, + "ô": 177, + "õ": 178, + "ö": 179, + "÷": 180, + "ø": 181, + "ù": 182, + "ú": 183, + "û": 184, + "ü": 185, + "ý": 186, + "þ": 187, + "ÿ": 188, + "Ā": 189, + "ā": 190, + "Ă": 191, + "ă": 192, + "Ą": 193, + "ą": 194, + "Ć": 195, + "ć": 196, + "Ĉ": 197, + "ĉ": 198, + "Ċ": 199, + "ċ": 200, + "Č": 201, + "č": 202, + "Ď": 203, + "ď": 204, + "Đ": 205, + "đ": 206, + "Ē": 207, + "ē": 208, + "Ĕ": 209, + "ĕ": 210, + "Ė": 211, + "ė": 212, + "Ę": 213, + "ę": 214, + "Ě": 215, + "ě": 216, + "Ĝ": 217, + "ĝ": 218, + "Ğ": 219, + "ğ": 220, + "Ġ": 221, + "ġ": 222, + "Ģ": 223, + "ģ": 224, + "Ĥ": 225, + "ĥ": 226, + "Ħ": 227, + "ħ": 228, + "Ĩ": 229, + "ĩ": 230, + "Ī": 231, + "ī": 232, + "Ĭ": 233, + "ĭ": 234, + "Į": 235, + "į": 236, + "İ": 237, + "ı": 238, + "IJ": 239, + "ij": 240, + "Ĵ": 241, + "ĵ": 242, + "Ķ": 243, + "ķ": 244, + "ĸ": 245, + "Ĺ": 246, + "ĺ": 247, + "Ļ": 248, + "ļ": 249, + "Ľ": 250, + "ľ": 251, + "Ŀ": 252, + "ŀ": 253, + "Ł": 254, + "ł": 255, + "Ń": 256, + "en": 257, + "fr": 258 + }, + "merges": [ + "e n", + "f r" + ] + } +} \ No newline at end of file diff --git a/eng-nah-svo-cpt/tokenizer_config.json b/eng-nah-svo-cpt/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..546efe6d18ae2ad7758d0c9ef51cacdb81c8dc9d --- /dev/null +++ b/eng-nah-svo-cpt/tokenizer_config.json @@ -0,0 +1,9 @@ +{ + "add_prefix_space": false, + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|endoftext|>", + "model_max_length": 1024, + "tokenizer_class": "GPT2Tokenizer", + "unk_token": "<|endoftext|>" +} diff --git a/eng-nah-svo-cpt/vocab.json b/eng-nah-svo-cpt/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..9cd5bf34d923be02a0eae563f9c595833f08bef3 --- /dev/null +++ b/eng-nah-svo-cpt/vocab.json @@ -0,0 +1 @@ +{"<|endoftext|>":0,"!":1,"\"":2,"#":3,"$":4,"%":5,"&":6,"'":7,"(":8,")":9,"*":10,"+":11,",":12,"-":13,".":14,"/":15,"0":16,"1":17,"2":18,"3":19,"4":20,"5":21,"6":22,"7":23,"8":24,"9":25,":":26,";":27,"<":28,"=":29,">":30,"?":31,"@":32,"A":33,"B":34,"C":35,"D":36,"E":37,"F":38,"G":39,"H":40,"I":41,"J":42,"K":43,"L":44,"M":45,"N":46,"O":47,"P":48,"Q":49,"R":50,"S":51,"T":52,"U":53,"V":54,"W":55,"X":56,"Y":57,"Z":58,"[":59,"\\":60,"]":61,"^":62,"_":63,"`":64,"a":65,"b":66,"c":67,"d":68,"e":69,"f":70,"g":71,"h":72,"i":73,"j":74,"k":75,"l":76,"m":77,"n":78,"o":79,"p":80,"q":81,"r":82,"s":83,"t":84,"u":85,"v":86,"w":87,"x":88,"y":89,"z":90,"{":91,"|":92,"}":93,"~":94,"¡":95,"¢":96,"£":97,"¤":98,"¥":99,"¦":100,"§":101,"¨":102,"©":103,"ª":104,"«":105,"¬":106,"®":107,"¯":108,"°":109,"±":110,"²":111,"³":112,"´":113,"µ":114,"¶":115,"·":116,"¸":117,"¹":118,"º":119,"»":120,"¼":121,"½":122,"¾":123,"¿":124,"À":125,"Á":126,"Â":127,"Ã":128,"Ä":129,"Å":130,"Æ":131,"Ç":132,"È":133,"É":134,"Ê":135,"Ë":136,"Ì":137,"Í":138,"Î":139,"Ï":140,"Ð":141,"Ñ":142,"Ò":143,"Ó":144,"Ô":145,"Õ":146,"Ö":147,"×":148,"Ø":149,"Ù":150,"Ú":151,"Û":152,"Ü":153,"Ý":154,"Þ":155,"ß":156,"à":157,"á":158,"â":159,"ã":160,"ä":161,"å":162,"æ":163,"ç":164,"è":165,"é":166,"ê":167,"ë":168,"ì":169,"í":170,"î":171,"ï":172,"ð":173,"ñ":174,"ò":175,"ó":176,"ô":177,"õ":178,"ö":179,"÷":180,"ø":181,"ù":182,"ú":183,"û":184,"ü":185,"ý":186,"þ":187,"ÿ":188,"Ā":189,"ā":190,"Ă":191,"ă":192,"Ą":193,"ą":194,"Ć":195,"ć":196,"Ĉ":197,"ĉ":198,"Ċ":199,"ċ":200,"Č":201,"č":202,"Ď":203,"ď":204,"Đ":205,"đ":206,"Ē":207,"ē":208,"Ĕ":209,"ĕ":210,"Ė":211,"ė":212,"Ę":213,"ę":214,"Ě":215,"ě":216,"Ĝ":217,"ĝ":218,"Ğ":219,"ğ":220,"Ġ":221,"ġ":222,"Ģ":223,"ģ":224,"Ĥ":225,"ĥ":226,"Ħ":227,"ħ":228,"Ĩ":229,"ĩ":230,"Ī":231,"ī":232,"Ĭ":233,"ĭ":234,"Į":235,"į":236,"İ":237,"ı":238,"IJ":239,"ij":240,"Ĵ":241,"ĵ":242,"Ķ":243,"ķ":244,"ĸ":245,"Ĺ":246,"ĺ":247,"Ļ":248,"ļ":249,"Ľ":250,"ľ":251,"Ŀ":252,"ŀ":253,"Ł":254,"ł":255,"Ń":256,"en":257,"fr":258} \ No newline at end of file diff --git a/eng-nah-svo-translation/README.md b/eng-nah-svo-translation/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ea189e3dfa21cee69cfcd5a4b4e1369eacb0e216 --- /dev/null +++ b/eng-nah-svo-translation/README.md @@ -0,0 +1,58 @@ +--- +license: apache-2.0 +base_model: Helsinki-NLP/opus-mt-en-fr +tags: +- translation +- generated_from_trainer +metrics: +- bleu +model-index: +- name: eng-nah-svo-translation + results: [] +--- + + + +# eng-nah-svo-translation + +This model is a fine-tuned version of [Helsinki-NLP/opus-mt-en-fr](https://huggingface.co/Helsinki-NLP/opus-mt-en-fr) on an unknown dataset. +It achieves the following results on the evaluation set: +- Loss: 0.7505 +- Bleu: 0.0 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-05 +- train_batch_size: 32 +- eval_batch_size: 64 +- seed: 42 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: linear +- num_epochs: 3 + +### Training results + + + +### Framework versions + +- Transformers 4.32.1 +- Pytorch 2.0.1 +- Datasets 2.14.5 +- Tokenizers 0.13.2 diff --git a/eng-nah-svo-translation/added_tokens.json b/eng-nah-svo-translation/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..20fcd9a180a63961398838e6227d85f489eb5b9e --- /dev/null +++ b/eng-nah-svo-translation/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 259 +} diff --git a/eng-nah-svo-translation/checkpoint-228/added_tokens.json b/eng-nah-svo-translation/checkpoint-228/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..20fcd9a180a63961398838e6227d85f489eb5b9e --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 259 +} diff --git a/eng-nah-svo-translation/checkpoint-228/config.json b/eng-nah-svo-translation/checkpoint-228/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f521a43cea2ef81e380c41b63acd9b8fbc437f1 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/config.json @@ -0,0 +1,62 @@ +{ + "_name_or_path": "Helsinki-NLP/opus-mt-en-fr", + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "swish", + "add_bias_logits": false, + "add_final_layer_norm": false, + "architectures": [ + "MarianMTModel" + ], + "attention_dropout": 0.0, + "bad_words_ids": [ + [ + 59513 + ] + ], + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 59513, + "decoder_vocab_size": 59514, + "dropout": 0.1, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 0, + "forced_eos_token_id": 0, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "marian", + "normalize_before": false, + "normalize_embedding": false, + "num_beams": 4, + "num_hidden_layers": 6, + "pad_token_id": 59513, + "scale_embedding": true, + "share_encoder_decoder_embeddings": true, + "static_position_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "use_cache": true, + "vocab_size": 59514 +} diff --git a/eng-nah-svo-translation/checkpoint-228/generation_config.json b/eng-nah-svo-translation/checkpoint-228/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..28de9cc7167cb6cabfab1a0afbf0fabb0717e902 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/generation_config.json @@ -0,0 +1,16 @@ +{ + "bad_words_ids": [ + [ + 59513 + ] + ], + "bos_token_id": 0, + "decoder_start_token_id": 59513, + "eos_token_id": 0, + "forced_eos_token_id": 0, + "max_length": 512, + "num_beams": 4, + "pad_token_id": 59513, + "renormalize_logits": true, + "transformers_version": "4.32.1" +} diff --git a/eng-nah-svo-translation/checkpoint-228/merges.txt b/eng-nah-svo-translation/checkpoint-228/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..fcecdc26f2eae6c453a0256e84a1245332d1307c --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/merges.txt @@ -0,0 +1,3 @@ +#version: 0.2 - Trained by `huggingface/tokenizers` +e n +f r diff --git a/eng-nah-svo-translation/checkpoint-228/optimizer.pt b/eng-nah-svo-translation/checkpoint-228/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fcd7e909e83fd1aae22c4deceb4437f136e493d --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59bd8d6a5080deae89e2f3af738d6e660f46d2030518a0b4eaba82fdb6509383 +size 597088389 diff --git a/eng-nah-svo-translation/checkpoint-228/pytorch_model.bin b/eng-nah-svo-translation/checkpoint-228/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..912be8c6175f8cea05e246be9e393ea9e69fb9ad --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c630d8e9c4466cfe5d61bacf6d7fe1ac796efef22faf9503d8fc73e57d641f6 +size 298763205 diff --git a/eng-nah-svo-translation/checkpoint-228/rng_state.pth b/eng-nah-svo-translation/checkpoint-228/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae3e509c15ab0734248d8d40387fcd30fa090b82 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eda1b4b24b034e66ad0e57eb524b7947ffef44dfca7258280a3294b6d703040c +size 14575 diff --git a/eng-nah-svo-translation/checkpoint-228/scheduler.pt b/eng-nah-svo-translation/checkpoint-228/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f0b39c18555c3e2e3443a63b925b6b3b9e8ab66 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7284c22e55ce6e785eabdf4699947d28e1466fc1f71401e601c833543f0ecbd +size 627 diff --git a/eng-nah-svo-translation/checkpoint-228/special_tokens_map.json b/eng-nah-svo-translation/checkpoint-228/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..342a2f53d97d4a8fe1422d2567e97ba8c525d05d --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "<|endoftext|>", + "eos_token": "<|endoftext|>", + "pad_token": "[PAD]", + "unk_token": "<|endoftext|>" +} diff --git a/eng-nah-svo-translation/checkpoint-228/tokenizer.json b/eng-nah-svo-translation/checkpoint-228/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3e5da92f3a16a9f960b6c8e3e0f9508c7e607ca5 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/tokenizer.json @@ -0,0 +1,317 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 259, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "vocab": { + "<|endoftext|>": 0, + "!": 1, + "\"": 2, + "#": 3, + "$": 4, + "%": 5, + "&": 6, + "'": 7, + "(": 8, + ")": 9, + "*": 10, + "+": 11, + ",": 12, + "-": 13, + ".": 14, + "/": 15, + "0": 16, + "1": 17, + "2": 18, + "3": 19, + "4": 20, + "5": 21, + "6": 22, + "7": 23, + "8": 24, + "9": 25, + ":": 26, + ";": 27, + "<": 28, + "=": 29, + ">": 30, + "?": 31, + "@": 32, + "A": 33, + "B": 34, + "C": 35, + "D": 36, + "E": 37, + "F": 38, + "G": 39, + "H": 40, + "I": 41, + "J": 42, + "K": 43, + "L": 44, + "M": 45, + "N": 46, + "O": 47, + "P": 48, + "Q": 49, + "R": 50, + "S": 51, + "T": 52, + "U": 53, + "V": 54, + "W": 55, + "X": 56, + "Y": 57, + "Z": 58, + "[": 59, + "\\": 60, + "]": 61, + "^": 62, + "_": 63, + "`": 64, + "a": 65, + "b": 66, + "c": 67, + "d": 68, + "e": 69, + "f": 70, + "g": 71, + "h": 72, + "i": 73, + "j": 74, + "k": 75, + "l": 76, + "m": 77, + "n": 78, + "o": 79, + "p": 80, + "q": 81, + "r": 82, + "s": 83, + "t": 84, + "u": 85, + "v": 86, + "w": 87, + "x": 88, + "y": 89, + "z": 90, + "{": 91, + "|": 92, + "}": 93, + "~": 94, + "¡": 95, + "¢": 96, + "£": 97, + "¤": 98, + "¥": 99, + "¦": 100, + "§": 101, + "¨": 102, + "©": 103, + "ª": 104, + "«": 105, + "¬": 106, + "®": 107, + "¯": 108, + "°": 109, + "±": 110, + "²": 111, + "³": 112, + "´": 113, + "µ": 114, + "¶": 115, + "·": 116, + "¸": 117, + "¹": 118, + "º": 119, + "»": 120, + "¼": 121, + "½": 122, + "¾": 123, + "¿": 124, + "À": 125, + "Á": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "È": 133, + "É": 134, + "Ê": 135, + "Ë": 136, + "Ì": 137, + "Í": 138, + "Î": 139, + "Ï": 140, + "Ð": 141, + "Ñ": 142, + "Ò": 143, + "Ó": 144, + "Ô": 145, + "Õ": 146, + "Ö": 147, + "×": 148, + "Ø": 149, + "Ù": 150, + "Ú": 151, + "Û": 152, + "Ü": 153, + "Ý": 154, + "Þ": 155, + "ß": 156, + "à": 157, + "á": 158, + "â": 159, + "ã": 160, + "ä": 161, + "å": 162, + "æ": 163, + "ç": 164, + "è": 165, + "é": 166, + "ê": 167, + "ë": 168, + "ì": 169, + "í": 170, + "î": 171, + "ï": 172, + "ð": 173, + "ñ": 174, + "ò": 175, + "ó": 176, + "ô": 177, + "õ": 178, + "ö": 179, + "÷": 180, + "ø": 181, + "ù": 182, + "ú": 183, + "û": 184, + "ü": 185, + "ý": 186, + "þ": 187, + "ÿ": 188, + "Ā": 189, + "ā": 190, + "Ă": 191, + "ă": 192, + "Ą": 193, + "ą": 194, + "Ć": 195, + "ć": 196, + "Ĉ": 197, + "ĉ": 198, + "Ċ": 199, + "ċ": 200, + "Č": 201, + "č": 202, + "Ď": 203, + "ď": 204, + "Đ": 205, + "đ": 206, + "Ē": 207, + "ē": 208, + "Ĕ": 209, + "ĕ": 210, + "Ė": 211, + "ė": 212, + "Ę": 213, + "ę": 214, + "Ě": 215, + "ě": 216, + "Ĝ": 217, + "ĝ": 218, + "Ğ": 219, + "ğ": 220, + "Ġ": 221, + "ġ": 222, + "Ģ": 223, + "ģ": 224, + "Ĥ": 225, + "ĥ": 226, + "Ħ": 227, + "ħ": 228, + "Ĩ": 229, + "ĩ": 230, + "Ī": 231, + "ī": 232, + "Ĭ": 233, + "ĭ": 234, + "Į": 235, + "į": 236, + "İ": 237, + "ı": 238, + "IJ": 239, + "ij": 240, + "Ĵ": 241, + "ĵ": 242, + "Ķ": 243, + "ķ": 244, + "ĸ": 245, + "Ĺ": 246, + "ĺ": 247, + "Ļ": 248, + "ļ": 249, + "Ľ": 250, + "ľ": 251, + "Ŀ": 252, + "ŀ": 253, + "Ł": 254, + "ł": 255, + "Ń": 256, + "en": 257, + "fr": 258 + }, + "merges": [ + "e n", + "f r" + ] + } +} \ No newline at end of file diff --git a/eng-nah-svo-translation/checkpoint-228/tokenizer_config.json b/eng-nah-svo-translation/checkpoint-228/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..546efe6d18ae2ad7758d0c9ef51cacdb81c8dc9d --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/tokenizer_config.json @@ -0,0 +1,9 @@ +{ + "add_prefix_space": false, + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|endoftext|>", + "model_max_length": 1024, + "tokenizer_class": "GPT2Tokenizer", + "unk_token": "<|endoftext|>" +} diff --git a/eng-nah-svo-translation/checkpoint-228/trainer_state.json b/eng-nah-svo-translation/checkpoint-228/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5b8815d4d0a1ead69943d491ff585e2e38488a38 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/trainer_state.json @@ -0,0 +1,18 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 228, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [], + "logging_steps": 500, + "max_steps": 684, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 50445004308480.0, + "trial_name": null, + "trial_params": null +} diff --git a/eng-nah-svo-translation/checkpoint-228/training_args.bin b/eng-nah-svo-translation/checkpoint-228/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8acb0269dd29c299627557f2024516755d820699 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a721cf0d37771b1d7220d33b1f17366183f3d518b2432e040b875a92c5be520b +size 4219 diff --git a/eng-nah-svo-translation/checkpoint-228/vocab.json b/eng-nah-svo-translation/checkpoint-228/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..9cd5bf34d923be02a0eae563f9c595833f08bef3 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-228/vocab.json @@ -0,0 +1 @@ +{"<|endoftext|>":0,"!":1,"\"":2,"#":3,"$":4,"%":5,"&":6,"'":7,"(":8,")":9,"*":10,"+":11,",":12,"-":13,".":14,"/":15,"0":16,"1":17,"2":18,"3":19,"4":20,"5":21,"6":22,"7":23,"8":24,"9":25,":":26,";":27,"<":28,"=":29,">":30,"?":31,"@":32,"A":33,"B":34,"C":35,"D":36,"E":37,"F":38,"G":39,"H":40,"I":41,"J":42,"K":43,"L":44,"M":45,"N":46,"O":47,"P":48,"Q":49,"R":50,"S":51,"T":52,"U":53,"V":54,"W":55,"X":56,"Y":57,"Z":58,"[":59,"\\":60,"]":61,"^":62,"_":63,"`":64,"a":65,"b":66,"c":67,"d":68,"e":69,"f":70,"g":71,"h":72,"i":73,"j":74,"k":75,"l":76,"m":77,"n":78,"o":79,"p":80,"q":81,"r":82,"s":83,"t":84,"u":85,"v":86,"w":87,"x":88,"y":89,"z":90,"{":91,"|":92,"}":93,"~":94,"¡":95,"¢":96,"£":97,"¤":98,"¥":99,"¦":100,"§":101,"¨":102,"©":103,"ª":104,"«":105,"¬":106,"®":107,"¯":108,"°":109,"±":110,"²":111,"³":112,"´":113,"µ":114,"¶":115,"·":116,"¸":117,"¹":118,"º":119,"»":120,"¼":121,"½":122,"¾":123,"¿":124,"À":125,"Á":126,"Â":127,"Ã":128,"Ä":129,"Å":130,"Æ":131,"Ç":132,"È":133,"É":134,"Ê":135,"Ë":136,"Ì":137,"Í":138,"Î":139,"Ï":140,"Ð":141,"Ñ":142,"Ò":143,"Ó":144,"Ô":145,"Õ":146,"Ö":147,"×":148,"Ø":149,"Ù":150,"Ú":151,"Û":152,"Ü":153,"Ý":154,"Þ":155,"ß":156,"à":157,"á":158,"â":159,"ã":160,"ä":161,"å":162,"æ":163,"ç":164,"è":165,"é":166,"ê":167,"ë":168,"ì":169,"í":170,"î":171,"ï":172,"ð":173,"ñ":174,"ò":175,"ó":176,"ô":177,"õ":178,"ö":179,"÷":180,"ø":181,"ù":182,"ú":183,"û":184,"ü":185,"ý":186,"þ":187,"ÿ":188,"Ā":189,"ā":190,"Ă":191,"ă":192,"Ą":193,"ą":194,"Ć":195,"ć":196,"Ĉ":197,"ĉ":198,"Ċ":199,"ċ":200,"Č":201,"č":202,"Ď":203,"ď":204,"Đ":205,"đ":206,"Ē":207,"ē":208,"Ĕ":209,"ĕ":210,"Ė":211,"ė":212,"Ę":213,"ę":214,"Ě":215,"ě":216,"Ĝ":217,"ĝ":218,"Ğ":219,"ğ":220,"Ġ":221,"ġ":222,"Ģ":223,"ģ":224,"Ĥ":225,"ĥ":226,"Ħ":227,"ħ":228,"Ĩ":229,"ĩ":230,"Ī":231,"ī":232,"Ĭ":233,"ĭ":234,"Į":235,"į":236,"İ":237,"ı":238,"IJ":239,"ij":240,"Ĵ":241,"ĵ":242,"Ķ":243,"ķ":244,"ĸ":245,"Ĺ":246,"ĺ":247,"Ļ":248,"ļ":249,"Ľ":250,"ľ":251,"Ŀ":252,"ŀ":253,"Ł":254,"ł":255,"Ń":256,"en":257,"fr":258} \ No newline at end of file diff --git a/eng-nah-svo-translation/checkpoint-456/added_tokens.json b/eng-nah-svo-translation/checkpoint-456/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..20fcd9a180a63961398838e6227d85f489eb5b9e --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 259 +} diff --git a/eng-nah-svo-translation/checkpoint-456/config.json b/eng-nah-svo-translation/checkpoint-456/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f521a43cea2ef81e380c41b63acd9b8fbc437f1 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/config.json @@ -0,0 +1,62 @@ +{ + "_name_or_path": "Helsinki-NLP/opus-mt-en-fr", + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "swish", + "add_bias_logits": false, + "add_final_layer_norm": false, + "architectures": [ + "MarianMTModel" + ], + "attention_dropout": 0.0, + "bad_words_ids": [ + [ + 59513 + ] + ], + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 59513, + "decoder_vocab_size": 59514, + "dropout": 0.1, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 0, + "forced_eos_token_id": 0, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "marian", + "normalize_before": false, + "normalize_embedding": false, + "num_beams": 4, + "num_hidden_layers": 6, + "pad_token_id": 59513, + "scale_embedding": true, + "share_encoder_decoder_embeddings": true, + "static_position_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "use_cache": true, + "vocab_size": 59514 +} diff --git a/eng-nah-svo-translation/checkpoint-456/generation_config.json b/eng-nah-svo-translation/checkpoint-456/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..28de9cc7167cb6cabfab1a0afbf0fabb0717e902 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/generation_config.json @@ -0,0 +1,16 @@ +{ + "bad_words_ids": [ + [ + 59513 + ] + ], + "bos_token_id": 0, + "decoder_start_token_id": 59513, + "eos_token_id": 0, + "forced_eos_token_id": 0, + "max_length": 512, + "num_beams": 4, + "pad_token_id": 59513, + "renormalize_logits": true, + "transformers_version": "4.32.1" +} diff --git a/eng-nah-svo-translation/checkpoint-456/merges.txt b/eng-nah-svo-translation/checkpoint-456/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..fcecdc26f2eae6c453a0256e84a1245332d1307c --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/merges.txt @@ -0,0 +1,3 @@ +#version: 0.2 - Trained by `huggingface/tokenizers` +e n +f r diff --git a/eng-nah-svo-translation/checkpoint-456/optimizer.pt b/eng-nah-svo-translation/checkpoint-456/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..460b6440ba2a6db5ded743984433b78c26884cfc --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e43e1b1fcf506facd572f3acb3d20cfee480bceb3b99287aefb50fee0d941268 +size 597088389 diff --git a/eng-nah-svo-translation/checkpoint-456/pytorch_model.bin b/eng-nah-svo-translation/checkpoint-456/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7dae2da9d6bada1c36af99e00b4a8db251dc9b5 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a52afa6f3c380dd31b380fb49419dd696f4b16855b1a1ccf159d762c25153e7 +size 298763205 diff --git a/eng-nah-svo-translation/checkpoint-456/rng_state.pth b/eng-nah-svo-translation/checkpoint-456/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae871141153cf0640a815d241d6a9443654d18cd --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2ef10ecbc4871818388bdbe84c0f41c64eab21669f3ca20fb8f5968e9369edc +size 14575 diff --git a/eng-nah-svo-translation/checkpoint-456/scheduler.pt b/eng-nah-svo-translation/checkpoint-456/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa562aebf9ce014df7943e6bdf5b32a5d14d27ce --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:242c2ba24c312c127ca25ea21b3ead4104a88a52230715c43e68a6909537e9d3 +size 627 diff --git a/eng-nah-svo-translation/checkpoint-456/special_tokens_map.json b/eng-nah-svo-translation/checkpoint-456/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..342a2f53d97d4a8fe1422d2567e97ba8c525d05d --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "<|endoftext|>", + "eos_token": "<|endoftext|>", + "pad_token": "[PAD]", + "unk_token": "<|endoftext|>" +} diff --git a/eng-nah-svo-translation/checkpoint-456/tokenizer.json b/eng-nah-svo-translation/checkpoint-456/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3e5da92f3a16a9f960b6c8e3e0f9508c7e607ca5 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/tokenizer.json @@ -0,0 +1,317 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 259, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "vocab": { + "<|endoftext|>": 0, + "!": 1, + "\"": 2, + "#": 3, + "$": 4, + "%": 5, + "&": 6, + "'": 7, + "(": 8, + ")": 9, + "*": 10, + "+": 11, + ",": 12, + "-": 13, + ".": 14, + "/": 15, + "0": 16, + "1": 17, + "2": 18, + "3": 19, + "4": 20, + "5": 21, + "6": 22, + "7": 23, + "8": 24, + "9": 25, + ":": 26, + ";": 27, + "<": 28, + "=": 29, + ">": 30, + "?": 31, + "@": 32, + "A": 33, + "B": 34, + "C": 35, + "D": 36, + "E": 37, + "F": 38, + "G": 39, + "H": 40, + "I": 41, + "J": 42, + "K": 43, + "L": 44, + "M": 45, + "N": 46, + "O": 47, + "P": 48, + "Q": 49, + "R": 50, + "S": 51, + "T": 52, + "U": 53, + "V": 54, + "W": 55, + "X": 56, + "Y": 57, + "Z": 58, + "[": 59, + "\\": 60, + "]": 61, + "^": 62, + "_": 63, + "`": 64, + "a": 65, + "b": 66, + "c": 67, + "d": 68, + "e": 69, + "f": 70, + "g": 71, + "h": 72, + "i": 73, + "j": 74, + "k": 75, + "l": 76, + "m": 77, + "n": 78, + "o": 79, + "p": 80, + "q": 81, + "r": 82, + "s": 83, + "t": 84, + "u": 85, + "v": 86, + "w": 87, + "x": 88, + "y": 89, + "z": 90, + "{": 91, + "|": 92, + "}": 93, + "~": 94, + "¡": 95, + "¢": 96, + "£": 97, + "¤": 98, + "¥": 99, + "¦": 100, + "§": 101, + "¨": 102, + "©": 103, + "ª": 104, + "«": 105, + "¬": 106, + "®": 107, + "¯": 108, + "°": 109, + "±": 110, + "²": 111, + "³": 112, + "´": 113, + "µ": 114, + "¶": 115, + "·": 116, + "¸": 117, + "¹": 118, + "º": 119, + "»": 120, + "¼": 121, + "½": 122, + "¾": 123, + "¿": 124, + "À": 125, + "Á": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "È": 133, + "É": 134, + "Ê": 135, + "Ë": 136, + "Ì": 137, + "Í": 138, + "Î": 139, + "Ï": 140, + "Ð": 141, + "Ñ": 142, + "Ò": 143, + "Ó": 144, + "Ô": 145, + "Õ": 146, + "Ö": 147, + "×": 148, + "Ø": 149, + "Ù": 150, + "Ú": 151, + "Û": 152, + "Ü": 153, + "Ý": 154, + "Þ": 155, + "ß": 156, + "à": 157, + "á": 158, + "â": 159, + "ã": 160, + "ä": 161, + "å": 162, + "æ": 163, + "ç": 164, + "è": 165, + "é": 166, + "ê": 167, + "ë": 168, + "ì": 169, + "í": 170, + "î": 171, + "ï": 172, + "ð": 173, + "ñ": 174, + "ò": 175, + "ó": 176, + "ô": 177, + "õ": 178, + "ö": 179, + "÷": 180, + "ø": 181, + "ù": 182, + "ú": 183, + "û": 184, + "ü": 185, + "ý": 186, + "þ": 187, + "ÿ": 188, + "Ā": 189, + "ā": 190, + "Ă": 191, + "ă": 192, + "Ą": 193, + "ą": 194, + "Ć": 195, + "ć": 196, + "Ĉ": 197, + "ĉ": 198, + "Ċ": 199, + "ċ": 200, + "Č": 201, + "č": 202, + "Ď": 203, + "ď": 204, + "Đ": 205, + "đ": 206, + "Ē": 207, + "ē": 208, + "Ĕ": 209, + "ĕ": 210, + "Ė": 211, + "ė": 212, + "Ę": 213, + "ę": 214, + "Ě": 215, + "ě": 216, + "Ĝ": 217, + "ĝ": 218, + "Ğ": 219, + "ğ": 220, + "Ġ": 221, + "ġ": 222, + "Ģ": 223, + "ģ": 224, + "Ĥ": 225, + "ĥ": 226, + "Ħ": 227, + "ħ": 228, + "Ĩ": 229, + "ĩ": 230, + "Ī": 231, + "ī": 232, + "Ĭ": 233, + "ĭ": 234, + "Į": 235, + "į": 236, + "İ": 237, + "ı": 238, + "IJ": 239, + "ij": 240, + "Ĵ": 241, + "ĵ": 242, + "Ķ": 243, + "ķ": 244, + "ĸ": 245, + "Ĺ": 246, + "ĺ": 247, + "Ļ": 248, + "ļ": 249, + "Ľ": 250, + "ľ": 251, + "Ŀ": 252, + "ŀ": 253, + "Ł": 254, + "ł": 255, + "Ń": 256, + "en": 257, + "fr": 258 + }, + "merges": [ + "e n", + "f r" + ] + } +} \ No newline at end of file diff --git a/eng-nah-svo-translation/checkpoint-456/tokenizer_config.json b/eng-nah-svo-translation/checkpoint-456/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..546efe6d18ae2ad7758d0c9ef51cacdb81c8dc9d --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/tokenizer_config.json @@ -0,0 +1,9 @@ +{ + "add_prefix_space": false, + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|endoftext|>", + "model_max_length": 1024, + "tokenizer_class": "GPT2Tokenizer", + "unk_token": "<|endoftext|>" +} diff --git a/eng-nah-svo-translation/checkpoint-456/trainer_state.json b/eng-nah-svo-translation/checkpoint-456/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1d8cdbc8c30e3bac7473146876de9199049ac3b0 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/trainer_state.json @@ -0,0 +1,18 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 456, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [], + "logging_steps": 500, + "max_steps": 684, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 100891067940864.0, + "trial_name": null, + "trial_params": null +} diff --git a/eng-nah-svo-translation/checkpoint-456/training_args.bin b/eng-nah-svo-translation/checkpoint-456/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8acb0269dd29c299627557f2024516755d820699 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a721cf0d37771b1d7220d33b1f17366183f3d518b2432e040b875a92c5be520b +size 4219 diff --git a/eng-nah-svo-translation/checkpoint-456/vocab.json b/eng-nah-svo-translation/checkpoint-456/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..9cd5bf34d923be02a0eae563f9c595833f08bef3 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-456/vocab.json @@ -0,0 +1 @@ +{"<|endoftext|>":0,"!":1,"\"":2,"#":3,"$":4,"%":5,"&":6,"'":7,"(":8,")":9,"*":10,"+":11,",":12,"-":13,".":14,"/":15,"0":16,"1":17,"2":18,"3":19,"4":20,"5":21,"6":22,"7":23,"8":24,"9":25,":":26,";":27,"<":28,"=":29,">":30,"?":31,"@":32,"A":33,"B":34,"C":35,"D":36,"E":37,"F":38,"G":39,"H":40,"I":41,"J":42,"K":43,"L":44,"M":45,"N":46,"O":47,"P":48,"Q":49,"R":50,"S":51,"T":52,"U":53,"V":54,"W":55,"X":56,"Y":57,"Z":58,"[":59,"\\":60,"]":61,"^":62,"_":63,"`":64,"a":65,"b":66,"c":67,"d":68,"e":69,"f":70,"g":71,"h":72,"i":73,"j":74,"k":75,"l":76,"m":77,"n":78,"o":79,"p":80,"q":81,"r":82,"s":83,"t":84,"u":85,"v":86,"w":87,"x":88,"y":89,"z":90,"{":91,"|":92,"}":93,"~":94,"¡":95,"¢":96,"£":97,"¤":98,"¥":99,"¦":100,"§":101,"¨":102,"©":103,"ª":104,"«":105,"¬":106,"®":107,"¯":108,"°":109,"±":110,"²":111,"³":112,"´":113,"µ":114,"¶":115,"·":116,"¸":117,"¹":118,"º":119,"»":120,"¼":121,"½":122,"¾":123,"¿":124,"À":125,"Á":126,"Â":127,"Ã":128,"Ä":129,"Å":130,"Æ":131,"Ç":132,"È":133,"É":134,"Ê":135,"Ë":136,"Ì":137,"Í":138,"Î":139,"Ï":140,"Ð":141,"Ñ":142,"Ò":143,"Ó":144,"Ô":145,"Õ":146,"Ö":147,"×":148,"Ø":149,"Ù":150,"Ú":151,"Û":152,"Ü":153,"Ý":154,"Þ":155,"ß":156,"à":157,"á":158,"â":159,"ã":160,"ä":161,"å":162,"æ":163,"ç":164,"è":165,"é":166,"ê":167,"ë":168,"ì":169,"í":170,"î":171,"ï":172,"ð":173,"ñ":174,"ò":175,"ó":176,"ô":177,"õ":178,"ö":179,"÷":180,"ø":181,"ù":182,"ú":183,"û":184,"ü":185,"ý":186,"þ":187,"ÿ":188,"Ā":189,"ā":190,"Ă":191,"ă":192,"Ą":193,"ą":194,"Ć":195,"ć":196,"Ĉ":197,"ĉ":198,"Ċ":199,"ċ":200,"Č":201,"č":202,"Ď":203,"ď":204,"Đ":205,"đ":206,"Ē":207,"ē":208,"Ĕ":209,"ĕ":210,"Ė":211,"ė":212,"Ę":213,"ę":214,"Ě":215,"ě":216,"Ĝ":217,"ĝ":218,"Ğ":219,"ğ":220,"Ġ":221,"ġ":222,"Ģ":223,"ģ":224,"Ĥ":225,"ĥ":226,"Ħ":227,"ħ":228,"Ĩ":229,"ĩ":230,"Ī":231,"ī":232,"Ĭ":233,"ĭ":234,"Į":235,"į":236,"İ":237,"ı":238,"IJ":239,"ij":240,"Ĵ":241,"ĵ":242,"Ķ":243,"ķ":244,"ĸ":245,"Ĺ":246,"ĺ":247,"Ļ":248,"ļ":249,"Ľ":250,"ľ":251,"Ŀ":252,"ŀ":253,"Ł":254,"ł":255,"Ń":256,"en":257,"fr":258} \ No newline at end of file diff --git a/eng-nah-svo-translation/checkpoint-684/added_tokens.json b/eng-nah-svo-translation/checkpoint-684/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..20fcd9a180a63961398838e6227d85f489eb5b9e --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/added_tokens.json @@ -0,0 +1,3 @@ +{ + "[PAD]": 259 +} diff --git a/eng-nah-svo-translation/checkpoint-684/config.json b/eng-nah-svo-translation/checkpoint-684/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f521a43cea2ef81e380c41b63acd9b8fbc437f1 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/config.json @@ -0,0 +1,62 @@ +{ + "_name_or_path": "Helsinki-NLP/opus-mt-en-fr", + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "swish", + "add_bias_logits": false, + "add_final_layer_norm": false, + "architectures": [ + "MarianMTModel" + ], + "attention_dropout": 0.0, + "bad_words_ids": [ + [ + 59513 + ] + ], + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 59513, + "decoder_vocab_size": 59514, + "dropout": 0.1, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 0, + "forced_eos_token_id": 0, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "marian", + "normalize_before": false, + "normalize_embedding": false, + "num_beams": 4, + "num_hidden_layers": 6, + "pad_token_id": 59513, + "scale_embedding": true, + "share_encoder_decoder_embeddings": true, + "static_position_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "use_cache": true, + "vocab_size": 59514 +} diff --git a/eng-nah-svo-translation/checkpoint-684/generation_config.json b/eng-nah-svo-translation/checkpoint-684/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..28de9cc7167cb6cabfab1a0afbf0fabb0717e902 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/generation_config.json @@ -0,0 +1,16 @@ +{ + "bad_words_ids": [ + [ + 59513 + ] + ], + "bos_token_id": 0, + "decoder_start_token_id": 59513, + "eos_token_id": 0, + "forced_eos_token_id": 0, + "max_length": 512, + "num_beams": 4, + "pad_token_id": 59513, + "renormalize_logits": true, + "transformers_version": "4.32.1" +} diff --git a/eng-nah-svo-translation/checkpoint-684/merges.txt b/eng-nah-svo-translation/checkpoint-684/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..fcecdc26f2eae6c453a0256e84a1245332d1307c --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/merges.txt @@ -0,0 +1,3 @@ +#version: 0.2 - Trained by `huggingface/tokenizers` +e n +f r diff --git a/eng-nah-svo-translation/checkpoint-684/optimizer.pt b/eng-nah-svo-translation/checkpoint-684/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff140757113b0f0370694155a3b4f83070d8a48e --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac5f706f0d09f4867580538158e03c320797c1671d0b3f38cad52bfa2874d1d +size 597088389 diff --git a/eng-nah-svo-translation/checkpoint-684/pytorch_model.bin b/eng-nah-svo-translation/checkpoint-684/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..df3990d715023bce55319036d40aafb96142da7d --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8668eed40a7cfcfc0587457eb0a6d8466a902ba9586f114dd71080246ca1b412 +size 298763205 diff --git a/eng-nah-svo-translation/checkpoint-684/rng_state.pth b/eng-nah-svo-translation/checkpoint-684/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d37fecc488e1be2a295c579c96c54aa62cfe154f --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04fe1caeb610a60aa4e79505acaadaac660c70ac1894dca9fc23e4540a43c20c +size 14575 diff --git a/eng-nah-svo-translation/checkpoint-684/scheduler.pt b/eng-nah-svo-translation/checkpoint-684/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0fcdeeb06eb209b7098b483fbdf5dab042fd1e3 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:901a1df7fc3c05c8382adf9ed1f028dd9e296bda0dbc80b55a9441d0c011fdb9 +size 627 diff --git a/eng-nah-svo-translation/checkpoint-684/special_tokens_map.json b/eng-nah-svo-translation/checkpoint-684/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..342a2f53d97d4a8fe1422d2567e97ba8c525d05d --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "<|endoftext|>", + "eos_token": "<|endoftext|>", + "pad_token": "[PAD]", + "unk_token": "<|endoftext|>" +} diff --git a/eng-nah-svo-translation/checkpoint-684/tokenizer.json b/eng-nah-svo-translation/checkpoint-684/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3e5da92f3a16a9f960b6c8e3e0f9508c7e607ca5 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/tokenizer.json @@ -0,0 +1,317 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 259, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "vocab": { + "<|endoftext|>": 0, + "!": 1, + "\"": 2, + "#": 3, + "$": 4, + "%": 5, + "&": 6, + "'": 7, + "(": 8, + ")": 9, + "*": 10, + "+": 11, + ",": 12, + "-": 13, + ".": 14, + "/": 15, + "0": 16, + "1": 17, + "2": 18, + "3": 19, + "4": 20, + "5": 21, + "6": 22, + "7": 23, + "8": 24, + "9": 25, + ":": 26, + ";": 27, + "<": 28, + "=": 29, + ">": 30, + "?": 31, + "@": 32, + "A": 33, + "B": 34, + "C": 35, + "D": 36, + "E": 37, + "F": 38, + "G": 39, + "H": 40, + "I": 41, + "J": 42, + "K": 43, + "L": 44, + "M": 45, + "N": 46, + "O": 47, + "P": 48, + "Q": 49, + "R": 50, + "S": 51, + "T": 52, + "U": 53, + "V": 54, + "W": 55, + "X": 56, + "Y": 57, + "Z": 58, + "[": 59, + "\\": 60, + "]": 61, + "^": 62, + "_": 63, + "`": 64, + "a": 65, + "b": 66, + "c": 67, + "d": 68, + "e": 69, + "f": 70, + "g": 71, + "h": 72, + "i": 73, + "j": 74, + "k": 75, + "l": 76, + "m": 77, + "n": 78, + "o": 79, + "p": 80, + "q": 81, + "r": 82, + "s": 83, + "t": 84, + "u": 85, + "v": 86, + "w": 87, + "x": 88, + "y": 89, + "z": 90, + "{": 91, + "|": 92, + "}": 93, + "~": 94, + "¡": 95, + "¢": 96, + "£": 97, + "¤": 98, + "¥": 99, + "¦": 100, + "§": 101, + "¨": 102, + "©": 103, + "ª": 104, + "«": 105, + "¬": 106, + "®": 107, + "¯": 108, + "°": 109, + "±": 110, + "²": 111, + "³": 112, + "´": 113, + "µ": 114, + "¶": 115, + "·": 116, + "¸": 117, + "¹": 118, + "º": 119, + "»": 120, + "¼": 121, + "½": 122, + "¾": 123, + "¿": 124, + "À": 125, + "Á": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "È": 133, + "É": 134, + "Ê": 135, + "Ë": 136, + "Ì": 137, + "Í": 138, + "Î": 139, + "Ï": 140, + "Ð": 141, + "Ñ": 142, + "Ò": 143, + "Ó": 144, + "Ô": 145, + "Õ": 146, + "Ö": 147, + "×": 148, + "Ø": 149, + "Ù": 150, + "Ú": 151, + "Û": 152, + "Ü": 153, + "Ý": 154, + "Þ": 155, + "ß": 156, + "à": 157, + "á": 158, + "â": 159, + "ã": 160, + "ä": 161, + "å": 162, + "æ": 163, + "ç": 164, + "è": 165, + "é": 166, + "ê": 167, + "ë": 168, + "ì": 169, + "í": 170, + "î": 171, + "ï": 172, + "ð": 173, + "ñ": 174, + "ò": 175, + "ó": 176, + "ô": 177, + "õ": 178, + "ö": 179, + "÷": 180, + "ø": 181, + "ù": 182, + "ú": 183, + "û": 184, + "ü": 185, + "ý": 186, + "þ": 187, + "ÿ": 188, + "Ā": 189, + "ā": 190, + "Ă": 191, + "ă": 192, + "Ą": 193, + "ą": 194, + "Ć": 195, + "ć": 196, + "Ĉ": 197, + "ĉ": 198, + "Ċ": 199, + "ċ": 200, + "Č": 201, + "č": 202, + "Ď": 203, + "ď": 204, + "Đ": 205, + "đ": 206, + "Ē": 207, + "ē": 208, + "Ĕ": 209, + "ĕ": 210, + "Ė": 211, + "ė": 212, + "Ę": 213, + "ę": 214, + "Ě": 215, + "ě": 216, + "Ĝ": 217, + "ĝ": 218, + "Ğ": 219, + "ğ": 220, + "Ġ": 221, + "ġ": 222, + "Ģ": 223, + "ģ": 224, + "Ĥ": 225, + "ĥ": 226, + "Ħ": 227, + "ħ": 228, + "Ĩ": 229, + "ĩ": 230, + "Ī": 231, + "ī": 232, + "Ĭ": 233, + "ĭ": 234, + "Į": 235, + "į": 236, + "İ": 237, + "ı": 238, + "IJ": 239, + "ij": 240, + "Ĵ": 241, + "ĵ": 242, + "Ķ": 243, + "ķ": 244, + "ĸ": 245, + "Ĺ": 246, + "ĺ": 247, + "Ļ": 248, + "ļ": 249, + "Ľ": 250, + "ľ": 251, + "Ŀ": 252, + "ŀ": 253, + "Ł": 254, + "ł": 255, + "Ń": 256, + "en": 257, + "fr": 258 + }, + "merges": [ + "e n", + "f r" + ] + } +} \ No newline at end of file diff --git a/eng-nah-svo-translation/checkpoint-684/tokenizer_config.json b/eng-nah-svo-translation/checkpoint-684/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..546efe6d18ae2ad7758d0c9ef51cacdb81c8dc9d --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/tokenizer_config.json @@ -0,0 +1,9 @@ +{ + "add_prefix_space": false, + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|endoftext|>", + "model_max_length": 1024, + "tokenizer_class": "GPT2Tokenizer", + "unk_token": "<|endoftext|>" +} diff --git a/eng-nah-svo-translation/checkpoint-684/trainer_state.json b/eng-nah-svo-translation/checkpoint-684/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..98bb69cb3a4caabe5c57c71d4c322fcb6577cba9 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/trainer_state.json @@ -0,0 +1,25 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 684, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 2.19, + "learning_rate": 5.380116959064328e-06, + "loss": 1.2688, + "step": 500 + } + ], + "logging_steps": 500, + "max_steps": 684, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 151058529386496.0, + "trial_name": null, + "trial_params": null +} diff --git a/eng-nah-svo-translation/checkpoint-684/training_args.bin b/eng-nah-svo-translation/checkpoint-684/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8acb0269dd29c299627557f2024516755d820699 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a721cf0d37771b1d7220d33b1f17366183f3d518b2432e040b875a92c5be520b +size 4219 diff --git a/eng-nah-svo-translation/checkpoint-684/vocab.json b/eng-nah-svo-translation/checkpoint-684/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..9cd5bf34d923be02a0eae563f9c595833f08bef3 --- /dev/null +++ b/eng-nah-svo-translation/checkpoint-684/vocab.json @@ -0,0 +1 @@ +{"<|endoftext|>":0,"!":1,"\"":2,"#":3,"$":4,"%":5,"&":6,"'":7,"(":8,")":9,"*":10,"+":11,",":12,"-":13,".":14,"/":15,"0":16,"1":17,"2":18,"3":19,"4":20,"5":21,"6":22,"7":23,"8":24,"9":25,":":26,";":27,"<":28,"=":29,">":30,"?":31,"@":32,"A":33,"B":34,"C":35,"D":36,"E":37,"F":38,"G":39,"H":40,"I":41,"J":42,"K":43,"L":44,"M":45,"N":46,"O":47,"P":48,"Q":49,"R":50,"S":51,"T":52,"U":53,"V":54,"W":55,"X":56,"Y":57,"Z":58,"[":59,"\\":60,"]":61,"^":62,"_":63,"`":64,"a":65,"b":66,"c":67,"d":68,"e":69,"f":70,"g":71,"h":72,"i":73,"j":74,"k":75,"l":76,"m":77,"n":78,"o":79,"p":80,"q":81,"r":82,"s":83,"t":84,"u":85,"v":86,"w":87,"x":88,"y":89,"z":90,"{":91,"|":92,"}":93,"~":94,"¡":95,"¢":96,"£":97,"¤":98,"¥":99,"¦":100,"§":101,"¨":102,"©":103,"ª":104,"«":105,"¬":106,"®":107,"¯":108,"°":109,"±":110,"²":111,"³":112,"´":113,"µ":114,"¶":115,"·":116,"¸":117,"¹":118,"º":119,"»":120,"¼":121,"½":122,"¾":123,"¿":124,"À":125,"Á":126,"Â":127,"Ã":128,"Ä":129,"Å":130,"Æ":131,"Ç":132,"È":133,"É":134,"Ê":135,"Ë":136,"Ì":137,"Í":138,"Î":139,"Ï":140,"Ð":141,"Ñ":142,"Ò":143,"Ó":144,"Ô":145,"Õ":146,"Ö":147,"×":148,"Ø":149,"Ù":150,"Ú":151,"Û":152,"Ü":153,"Ý":154,"Þ":155,"ß":156,"à":157,"á":158,"â":159,"ã":160,"ä":161,"å":162,"æ":163,"ç":164,"è":165,"é":166,"ê":167,"ë":168,"ì":169,"í":170,"î":171,"ï":172,"ð":173,"ñ":174,"ò":175,"ó":176,"ô":177,"õ":178,"ö":179,"÷":180,"ø":181,"ù":182,"ú":183,"û":184,"ü":185,"ý":186,"þ":187,"ÿ":188,"Ā":189,"ā":190,"Ă":191,"ă":192,"Ą":193,"ą":194,"Ć":195,"ć":196,"Ĉ":197,"ĉ":198,"Ċ":199,"ċ":200,"Č":201,"č":202,"Ď":203,"ď":204,"Đ":205,"đ":206,"Ē":207,"ē":208,"Ĕ":209,"ĕ":210,"Ė":211,"ė":212,"Ę":213,"ę":214,"Ě":215,"ě":216,"Ĝ":217,"ĝ":218,"Ğ":219,"ğ":220,"Ġ":221,"ġ":222,"Ģ":223,"ģ":224,"Ĥ":225,"ĥ":226,"Ħ":227,"ħ":228,"Ĩ":229,"ĩ":230,"Ī":231,"ī":232,"Ĭ":233,"ĭ":234,"Į":235,"į":236,"İ":237,"ı":238,"IJ":239,"ij":240,"Ĵ":241,"ĵ":242,"Ķ":243,"ķ":244,"ĸ":245,"Ĺ":246,"ĺ":247,"Ļ":248,"ļ":249,"Ľ":250,"ľ":251,"Ŀ":252,"ŀ":253,"Ł":254,"ł":255,"Ń":256,"en":257,"fr":258} \ No newline at end of file diff --git a/eng-nah-svo-translation/config.json b/eng-nah-svo-translation/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f521a43cea2ef81e380c41b63acd9b8fbc437f1 --- /dev/null +++ b/eng-nah-svo-translation/config.json @@ -0,0 +1,62 @@ +{ + "_name_or_path": "Helsinki-NLP/opus-mt-en-fr", + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "swish", + "add_bias_logits": false, + "add_final_layer_norm": false, + "architectures": [ + "MarianMTModel" + ], + "attention_dropout": 0.0, + "bad_words_ids": [ + [ + 59513 + ] + ], + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 512, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "decoder_start_token_id": 59513, + "decoder_vocab_size": 59514, + "dropout": 0.1, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "eos_token_id": 0, + "forced_eos_token_id": 0, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": 512, + "max_position_embeddings": 512, + "model_type": "marian", + "normalize_before": false, + "normalize_embedding": false, + "num_beams": 4, + "num_hidden_layers": 6, + "pad_token_id": 59513, + "scale_embedding": true, + "share_encoder_decoder_embeddings": true, + "static_position_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.32.1", + "use_cache": true, + "vocab_size": 59514 +} diff --git a/eng-nah-svo-translation/generation_config.json b/eng-nah-svo-translation/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..28de9cc7167cb6cabfab1a0afbf0fabb0717e902 --- /dev/null +++ b/eng-nah-svo-translation/generation_config.json @@ -0,0 +1,16 @@ +{ + "bad_words_ids": [ + [ + 59513 + ] + ], + "bos_token_id": 0, + "decoder_start_token_id": 59513, + "eos_token_id": 0, + "forced_eos_token_id": 0, + "max_length": 512, + "num_beams": 4, + "pad_token_id": 59513, + "renormalize_logits": true, + "transformers_version": "4.32.1" +} diff --git a/eng-nah-svo-translation/merges.txt b/eng-nah-svo-translation/merges.txt new file mode 100644 index 0000000000000000000000000000000000000000..fcecdc26f2eae6c453a0256e84a1245332d1307c --- /dev/null +++ b/eng-nah-svo-translation/merges.txt @@ -0,0 +1,3 @@ +#version: 0.2 - Trained by `huggingface/tokenizers` +e n +f r diff --git a/eng-nah-svo-translation/pytorch_model.bin b/eng-nah-svo-translation/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..df3990d715023bce55319036d40aafb96142da7d --- /dev/null +++ b/eng-nah-svo-translation/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8668eed40a7cfcfc0587457eb0a6d8466a902ba9586f114dd71080246ca1b412 +size 298763205 diff --git a/eng-nah-svo-translation/special_tokens_map.json b/eng-nah-svo-translation/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..342a2f53d97d4a8fe1422d2567e97ba8c525d05d --- /dev/null +++ b/eng-nah-svo-translation/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "<|endoftext|>", + "eos_token": "<|endoftext|>", + "pad_token": "[PAD]", + "unk_token": "<|endoftext|>" +} diff --git a/eng-nah-svo-translation/tokenizer.json b/eng-nah-svo-translation/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3e5da92f3a16a9f960b6c8e3e0f9508c7e607ca5 --- /dev/null +++ b/eng-nah-svo-translation/tokenizer.json @@ -0,0 +1,317 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 259, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "vocab": { + "<|endoftext|>": 0, + "!": 1, + "\"": 2, + "#": 3, + "$": 4, + "%": 5, + "&": 6, + "'": 7, + "(": 8, + ")": 9, + "*": 10, + "+": 11, + ",": 12, + "-": 13, + ".": 14, + "/": 15, + "0": 16, + "1": 17, + "2": 18, + "3": 19, + "4": 20, + "5": 21, + "6": 22, + "7": 23, + "8": 24, + "9": 25, + ":": 26, + ";": 27, + "<": 28, + "=": 29, + ">": 30, + "?": 31, + "@": 32, + "A": 33, + "B": 34, + "C": 35, + "D": 36, + "E": 37, + "F": 38, + "G": 39, + "H": 40, + "I": 41, + "J": 42, + "K": 43, + "L": 44, + "M": 45, + "N": 46, + "O": 47, + "P": 48, + "Q": 49, + "R": 50, + "S": 51, + "T": 52, + "U": 53, + "V": 54, + "W": 55, + "X": 56, + "Y": 57, + "Z": 58, + "[": 59, + "\\": 60, + "]": 61, + "^": 62, + "_": 63, + "`": 64, + "a": 65, + "b": 66, + "c": 67, + "d": 68, + "e": 69, + "f": 70, + "g": 71, + "h": 72, + "i": 73, + "j": 74, + "k": 75, + "l": 76, + "m": 77, + "n": 78, + "o": 79, + "p": 80, + "q": 81, + "r": 82, + "s": 83, + "t": 84, + "u": 85, + "v": 86, + "w": 87, + "x": 88, + "y": 89, + "z": 90, + "{": 91, + "|": 92, + "}": 93, + "~": 94, + "¡": 95, + "¢": 96, + "£": 97, + "¤": 98, + "¥": 99, + "¦": 100, + "§": 101, + "¨": 102, + "©": 103, + "ª": 104, + "«": 105, + "¬": 106, + "®": 107, + "¯": 108, + "°": 109, + "±": 110, + "²": 111, + "³": 112, + "´": 113, + "µ": 114, + "¶": 115, + "·": 116, + "¸": 117, + "¹": 118, + "º": 119, + "»": 120, + "¼": 121, + "½": 122, + "¾": 123, + "¿": 124, + "À": 125, + "Á": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "È": 133, + "É": 134, + "Ê": 135, + "Ë": 136, + "Ì": 137, + "Í": 138, + "Î": 139, + "Ï": 140, + "Ð": 141, + "Ñ": 142, + "Ò": 143, + "Ó": 144, + "Ô": 145, + "Õ": 146, + "Ö": 147, + "×": 148, + "Ø": 149, + "Ù": 150, + "Ú": 151, + "Û": 152, + "Ü": 153, + "Ý": 154, + "Þ": 155, + "ß": 156, + "à": 157, + "á": 158, + "â": 159, + "ã": 160, + "ä": 161, + "å": 162, + "æ": 163, + "ç": 164, + "è": 165, + "é": 166, + "ê": 167, + "ë": 168, + "ì": 169, + "í": 170, + "î": 171, + "ï": 172, + "ð": 173, + "ñ": 174, + "ò": 175, + "ó": 176, + "ô": 177, + "õ": 178, + "ö": 179, + "÷": 180, + "ø": 181, + "ù": 182, + "ú": 183, + "û": 184, + "ü": 185, + "ý": 186, + "þ": 187, + "ÿ": 188, + "Ā": 189, + "ā": 190, + "Ă": 191, + "ă": 192, + "Ą": 193, + "ą": 194, + "Ć": 195, + "ć": 196, + "Ĉ": 197, + "ĉ": 198, + "Ċ": 199, + "ċ": 200, + "Č": 201, + "č": 202, + "Ď": 203, + "ď": 204, + "Đ": 205, + "đ": 206, + "Ē": 207, + "ē": 208, + "Ĕ": 209, + "ĕ": 210, + "Ė": 211, + "ė": 212, + "Ę": 213, + "ę": 214, + "Ě": 215, + "ě": 216, + "Ĝ": 217, + "ĝ": 218, + "Ğ": 219, + "ğ": 220, + "Ġ": 221, + "ġ": 222, + "Ģ": 223, + "ģ": 224, + "Ĥ": 225, + "ĥ": 226, + "Ħ": 227, + "ħ": 228, + "Ĩ": 229, + "ĩ": 230, + "Ī": 231, + "ī": 232, + "Ĭ": 233, + "ĭ": 234, + "Į": 235, + "į": 236, + "İ": 237, + "ı": 238, + "IJ": 239, + "ij": 240, + "Ĵ": 241, + "ĵ": 242, + "Ķ": 243, + "ķ": 244, + "ĸ": 245, + "Ĺ": 246, + "ĺ": 247, + "Ļ": 248, + "ļ": 249, + "Ľ": 250, + "ľ": 251, + "Ŀ": 252, + "ŀ": 253, + "Ł": 254, + "ł": 255, + "Ń": 256, + "en": 257, + "fr": 258 + }, + "merges": [ + "e n", + "f r" + ] + } +} \ No newline at end of file diff --git a/eng-nah-svo-translation/tokenizer_config.json b/eng-nah-svo-translation/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..546efe6d18ae2ad7758d0c9ef51cacdb81c8dc9d --- /dev/null +++ b/eng-nah-svo-translation/tokenizer_config.json @@ -0,0 +1,9 @@ +{ + "add_prefix_space": false, + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|endoftext|>", + "model_max_length": 1024, + "tokenizer_class": "GPT2Tokenizer", + "unk_token": "<|endoftext|>" +} diff --git a/eng-nah-svo-translation/training_args.bin b/eng-nah-svo-translation/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8acb0269dd29c299627557f2024516755d820699 --- /dev/null +++ b/eng-nah-svo-translation/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a721cf0d37771b1d7220d33b1f17366183f3d518b2432e040b875a92c5be520b +size 4219 diff --git a/eng-nah-svo-translation/vocab.json b/eng-nah-svo-translation/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..9cd5bf34d923be02a0eae563f9c595833f08bef3 --- /dev/null +++ b/eng-nah-svo-translation/vocab.json @@ -0,0 +1 @@ +{"<|endoftext|>":0,"!":1,"\"":2,"#":3,"$":4,"%":5,"&":6,"'":7,"(":8,")":9,"*":10,"+":11,",":12,"-":13,".":14,"/":15,"0":16,"1":17,"2":18,"3":19,"4":20,"5":21,"6":22,"7":23,"8":24,"9":25,":":26,";":27,"<":28,"=":29,">":30,"?":31,"@":32,"A":33,"B":34,"C":35,"D":36,"E":37,"F":38,"G":39,"H":40,"I":41,"J":42,"K":43,"L":44,"M":45,"N":46,"O":47,"P":48,"Q":49,"R":50,"S":51,"T":52,"U":53,"V":54,"W":55,"X":56,"Y":57,"Z":58,"[":59,"\\":60,"]":61,"^":62,"_":63,"`":64,"a":65,"b":66,"c":67,"d":68,"e":69,"f":70,"g":71,"h":72,"i":73,"j":74,"k":75,"l":76,"m":77,"n":78,"o":79,"p":80,"q":81,"r":82,"s":83,"t":84,"u":85,"v":86,"w":87,"x":88,"y":89,"z":90,"{":91,"|":92,"}":93,"~":94,"¡":95,"¢":96,"£":97,"¤":98,"¥":99,"¦":100,"§":101,"¨":102,"©":103,"ª":104,"«":105,"¬":106,"®":107,"¯":108,"°":109,"±":110,"²":111,"³":112,"´":113,"µ":114,"¶":115,"·":116,"¸":117,"¹":118,"º":119,"»":120,"¼":121,"½":122,"¾":123,"¿":124,"À":125,"Á":126,"Â":127,"Ã":128,"Ä":129,"Å":130,"Æ":131,"Ç":132,"È":133,"É":134,"Ê":135,"Ë":136,"Ì":137,"Í":138,"Î":139,"Ï":140,"Ð":141,"Ñ":142,"Ò":143,"Ó":144,"Ô":145,"Õ":146,"Ö":147,"×":148,"Ø":149,"Ù":150,"Ú":151,"Û":152,"Ü":153,"Ý":154,"Þ":155,"ß":156,"à":157,"á":158,"â":159,"ã":160,"ä":161,"å":162,"æ":163,"ç":164,"è":165,"é":166,"ê":167,"ë":168,"ì":169,"í":170,"î":171,"ï":172,"ð":173,"ñ":174,"ò":175,"ó":176,"ô":177,"õ":178,"ö":179,"÷":180,"ø":181,"ù":182,"ú":183,"û":184,"ü":185,"ý":186,"þ":187,"ÿ":188,"Ā":189,"ā":190,"Ă":191,"ă":192,"Ą":193,"ą":194,"Ć":195,"ć":196,"Ĉ":197,"ĉ":198,"Ċ":199,"ċ":200,"Č":201,"č":202,"Ď":203,"ď":204,"Đ":205,"đ":206,"Ē":207,"ē":208,"Ĕ":209,"ĕ":210,"Ė":211,"ė":212,"Ę":213,"ę":214,"Ě":215,"ě":216,"Ĝ":217,"ĝ":218,"Ğ":219,"ğ":220,"Ġ":221,"ġ":222,"Ģ":223,"ģ":224,"Ĥ":225,"ĥ":226,"Ħ":227,"ħ":228,"Ĩ":229,"ĩ":230,"Ī":231,"ī":232,"Ĭ":233,"ĭ":234,"Į":235,"į":236,"İ":237,"ı":238,"IJ":239,"ij":240,"Ĵ":241,"ĵ":242,"Ķ":243,"ķ":244,"ĸ":245,"Ĺ":246,"ĺ":247,"Ļ":248,"ļ":249,"Ľ":250,"ľ":251,"Ŀ":252,"ŀ":253,"Ł":254,"ł":255,"Ń":256,"en":257,"fr":258} \ No newline at end of file diff --git a/myerrors_1551.out b/myerrors_1551.out new file mode 100644 index 0000000000000000000000000000000000000000..ab65a5a7401763e92b51f249db9d4702764cdce0 --- /dev/null +++ b/myerrors_1551.out @@ -0,0 +1,34 @@ +You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. +Using pad_token, but it is not set yet. +Traceback (most recent call last): + File "/mnt/storage/aatherton/hf_synth_trans/synth_translation.py", line 130, in + trainer.evaluate(max_length=max_length) + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 159, in evaluate + return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 2972, in evaluate + output = eval_loop( + ^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 3151, in evaluation_loop + for step, inputs in enumerate(dataloader): + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/accelerate/data_loader.py", line 384, in __iter__ + current_batch = next(dataloader_iter) + ^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 633, in __next__ + data = self._next_data() + ^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 677, in _next_data + data = self._dataset_fetcher.fetch(index) # may raise StopIteration + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch + return self.collate_fn(data) + ^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/data/data_collator.py", line 586, in __call__ + features = self.tokenizer.pad( + ^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 3059, in pad + padding_strategy, _, max_length, _ = self._get_padding_truncation_strategies( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2507, in _get_padding_truncation_strategies + raise ValueError( +ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`. diff --git a/myerrors_1552.out b/myerrors_1552.out new file mode 100644 index 0000000000000000000000000000000000000000..f6f885aa01e57e2d5410aa3d681f4a024427cb0c --- /dev/null +++ b/myerrors_1552.out @@ -0,0 +1,29 @@ + Map: 0%| | 0/7292 [00:00 + trainer.evaluate(max_length=max_length) + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 159, in evaluate + return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 2972, in evaluate + output = eval_loop( + ^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 3161, in evaluation_loop + loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 282, in prediction_step + generated_tokens = self.model.generate(**inputs, **gen_kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1402, in generate + self._validate_model_class() + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1197, in _validate_model_class + raise TypeError(exception_message) +TypeError: The current model class (BertModel) is not compatible with `.generate()`, as it doesn't have a language model head. Please use one of the following classes instead: {'BertLMHeadModel'} diff --git a/myerrors_1553.out b/myerrors_1553.out new file mode 100644 index 0000000000000000000000000000000000000000..f0089959e2078c121c14d02c46798e99bb2f5bc7 --- /dev/null +++ b/myerrors_1553.out @@ -0,0 +1,25 @@ + Map: 0%| | 0/1001 [00:00 + trainer.evaluate(max_length=max_length) + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 159, in evaluate + return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 2972, in evaluate + output = eval_loop( + ^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 3161, in evaluation_loop + loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 282, in prediction_step + generated_tokens = self.model.generate(**inputs, **gen_kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1402, in generate + self._validate_model_class() + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1197, in _validate_model_class + raise TypeError(exception_message) +TypeError: The current model class (BertModel) is not compatible with `.generate()`, as it doesn't have a language model head. Please use one of the following classes instead: {'BertLMHeadModel'} diff --git a/myerrors_1554.out b/myerrors_1554.out new file mode 100644 index 0000000000000000000000000000000000000000..4c8a4883571c0064f7dd045ef0e867085b2f6fa5 --- /dev/null +++ b/myerrors_1554.out @@ -0,0 +1,165 @@ + Map: 0%| | 0/1001 [00:00 + trainer.evaluate(max_length=max_length) + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 159, in evaluate + return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 2972, in evaluate + output = eval_loop( + ^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 3161, in evaluation_loop + loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 282, in prediction_step + generated_tokens = self.model.generate(**inputs, **gen_kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1596, in generate + return self.greedy_search( + ^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 2444, in greedy_search + outputs = self( + ^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 1235, in forward + outputs = self.bert( + ^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 1022, in forward + encoder_outputs = self.encoder( + ^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 612, in forward + layer_outputs = layer_module( + ^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward + self_attention_outputs = self.attention( + ^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward + self_outputs = self.self( + ^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 286, in forward + mixed_query_layer = self.query(hidden_states) + ^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/linear.py", line 114, in forward + return F.linear(input, self.weight, self.bias) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)` +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [96,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [97,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [98,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [99,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [100,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [101,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [102,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [103,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [104,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [105,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [106,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [107,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [108,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [109,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [110,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [111,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [112,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [113,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [114,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [115,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [116,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [117,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [118,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [119,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [120,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [121,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [122,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [123,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [124,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [125,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [126,0,0] Assertion `srcIndex < srcSelectDimSize` failed. +/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [127,0,0] Assertion `srcIndex < srcSelectDimSize` failed. diff --git a/myerrors_1555.out b/myerrors_1555.out new file mode 100644 index 0000000000000000000000000000000000000000..85e68c9fc635ed6872b05f9a4dbb9af44f37d515 --- /dev/null +++ b/myerrors_1555.out @@ -0,0 +1,46 @@ +You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. + 0%| | 0/16 [00:00 + model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 482, in from_pretrained + config, kwargs = AutoConfig.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/auto/configuration_auto.py", line 1007, in from_pretrained + config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/configuration_utils.py", line 620, in get_config_dict + config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/configuration_utils.py", line 675, in _get_config_dict + resolved_config_file = cached_file( + ^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/utils/hub.py", line 479, in cached_file + raise EnvironmentError( +OSError: aatherton2024/eng-nah-svo-cpt does not appear to have a file named config.json. Checkout 'https://huggingface.co/aatherton2024/eng-nah-svo-cpt/main' for available files. diff --git a/myoutput_1551.out b/myoutput_1551.out new file mode 100644 index 0000000000000000000000000000000000000000..922e54b747c5a72439d13ea889cb145b1f78b10a --- /dev/null +++ b/myoutput_1551.out @@ -0,0 +1 @@ +evaluate1 diff --git a/myoutput_1552.out b/myoutput_1552.out new file mode 100644 index 0000000000000000000000000000000000000000..922e54b747c5a72439d13ea889cb145b1f78b10a --- /dev/null +++ b/myoutput_1552.out @@ -0,0 +1 @@ +evaluate1 diff --git a/myoutput_1553.out b/myoutput_1553.out new file mode 100644 index 0000000000000000000000000000000000000000..922e54b747c5a72439d13ea889cb145b1f78b10a --- /dev/null +++ b/myoutput_1553.out @@ -0,0 +1 @@ +evaluate1 diff --git a/myoutput_1554.out b/myoutput_1554.out new file mode 100644 index 0000000000000000000000000000000000000000..922e54b747c5a72439d13ea889cb145b1f78b10a --- /dev/null +++ b/myoutput_1554.out @@ -0,0 +1 @@ +evaluate1 diff --git a/myoutput_1555.out b/myoutput_1555.out new file mode 100644 index 0000000000000000000000000000000000000000..1c5fb65ab4a88e7713dc7837f56a50a607d60f5b --- /dev/null +++ b/myoutput_1555.out @@ -0,0 +1,5 @@ +evaluate1 +trainer train 1 +{'loss': 1.2688, 'learning_rate': 5.380116959064328e-06, 'epoch': 2.19} +{'train_runtime': 56.3764, 'train_samples_per_second': 388.035, 'train_steps_per_second': 12.133, 'train_loss': 1.1421493842587833, 'epoch': 3.0} +evaluate 2 diff --git a/run.sh b/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..0cf9fbd94ebac03dc25a8049099b59a27e69bcf6 --- /dev/null +++ b/run.sh @@ -0,0 +1,9 @@ +#!/bin/sh +#SBATCH -c 1 +#SBATCH -t 0-12:00 +#SBATCH -p dl +#SBATCH --mem=10G +#SBATCH -o myoutput_%j.out +#SBATCH -e myerrors_%j.out +#SBATCH --gres=gpu:1 +python synth_translation.py \ No newline at end of file diff --git a/synth_translation.py b/synth_translation.py new file mode 100644 index 0000000000000000000000000000000000000000..71e2b1dfa953bf77deb3eebfe48bb8d233d78099 --- /dev/null +++ b/synth_translation.py @@ -0,0 +1,261 @@ +import transformers +import numpy as np +from datasets import load_dataset +from transformers import AutoModelForSeq2SeqLM +from transformers import AutoTokenizer +from transformers import DataCollatorForSeq2Seq +import evaluate +import numpy as np +from transformers import Seq2SeqTrainingArguments +from transformers import Seq2SeqTrainer +from torch.utils.data import DataLoader +from transformers import pipeline +from transformers import AdamW +from accelerate import Accelerator +from transformers import get_scheduler +from huggingface_hub import Repository, get_full_repo_name +from tqdm.auto import tqdm +import torch +from torch import Tensor + +#load in dataset, setup tokenizer + +raw_datasets = load_dataset("aatherton2024/eng-nah-svo") +model_checkpoint = "aatherton2024/eng-nah-svo-cpt" + +if False: + def get_training_corpus(raw_datasets): + return ( + raw_datasets["train"][i : i + 1000] + for i in range(0, len(raw_datasets["train"]), 1000) + ) + + training_corpus = get_training_corpus(raw_datasets) + old_tokenizer = AutoTokenizer.from_pretrained("gpt2") + tokenizer = old_tokenizer.train_new_from_iterator(training_corpus, 52000) + + tokenizer.save_pretrained("eng-nah-svo-cpt") + tokenizer.push_to_hub("eng-nah-svo-cpt") + +tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) +tokenizer.add_special_tokens({'pad_token': '[PAD]'}) +#contants +max_length = 128 + +#scan dataset, storing lists of english and french words then returning the tokenization of them +def preprocess_function(examples): + inputs = examples["en"] + targets = examples["fr"] + model_inputs = tokenizer( + inputs, text_target=targets, max_length=max_length, truncation=True + ) + return model_inputs + +#apply preprocessing in one go to all splits of the dataset +tokenized_datasets = raw_datasets.map( + preprocess_function, + batched=True, + remove_columns=raw_datasets["train"].column_names, +) + +# #model choice for this problem +if False: + model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint) + +else: + from transformers import BertConfig, BertLMHeadModel + from transformers import AutoModel + # config = BertConfig(tokenizer.vocab_size, hidden_size=300, + # num_hidden_layers=2, num_attention_heads=2, is_decoder=True, + # add_cross_attention=True) + # model = BertLMHeadModel(config) + #model = AutoModel.from_pretrained("bert-base-cased") + model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-fr") + +#data collator takes tokenizer and the model to deal with padding for dynamic batching +data_collator = DataCollatorForSeq2Seq(tokenizer, model=model) + +#Using BLEU as our metric for this problem +metric = evaluate.load("sacrebleu") + +#simple method to return test metrics +def compute_metrics(eval_preds): + preds, labels = eval_preds + # In case the model returns more than the prediction logits + if isinstance(preds, tuple): + preds = preds[0] + + decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True) + + # Replace -100s in the labels as we can't decode them + labels = np.where(labels != -100, labels, tokenizer.pad_token_id) + decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True) + + # Some simple post-processing + decoded_preds = [pred.strip() for pred in decoded_preds] + decoded_labels = [[label.strip()] for label in decoded_labels] + + result = metric.compute(predictions=decoded_preds, references=decoded_labels) + return {"bleu": result["score"]} + +### We now enter the fine-tuning phase of our model structure ### + + +#definition of seq2seq training arguments --- figure what these are/use case +args = Seq2SeqTrainingArguments( + f"eng-nah-svo-translation", + evaluation_strategy="no", + save_strategy="epoch", + learning_rate=2e-5, + per_device_train_batch_size=32, + per_device_eval_batch_size=64, + weight_decay=0.01, + save_total_limit=3, + num_train_epochs=3, + predict_with_generate=True, + fp16=False, + push_to_hub=True, +) + +#pass all information to trainer +trainer = Seq2SeqTrainer( + model, + args, + train_dataset=tokenized_datasets["train"], + eval_dataset=tokenized_datasets["test"], + data_collator=data_collator, + tokenizer=tokenizer, + compute_metrics=compute_metrics, +) + +print("evaluate1") +trainer.evaluate(max_length=max_length) +print("trainer train 1") +trainer.train() +print("evaluate 2") +trainer.evaluate(max_length=max_length) +trainer.push_to_hub(tags="translation", commit_message="Training complete") + + + +tokenized_datasets.set_format("torch") +train_dataloader = DataLoader( + tokenized_datasets["train"], + shuffle=True, + collate_fn=data_collator, + batch_size=8, +) +eval_dataloader = DataLoader( + tokenized_datasets["test"], collate_fn=data_collator, batch_size=8 +) + +model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint) + + +optimizer = AdamW(model.parameters(), lr=2e-5) + + + +accelerator = Accelerator() +model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare( + model, optimizer, train_dataloader, eval_dataloader +) + + + +num_train_epochs = 3 +num_update_steps_per_epoch = len(train_dataloader) +num_training_steps = num_train_epochs * num_update_steps_per_epoch + +lr_scheduler = get_scheduler( + "linear", + optimizer=optimizer, + num_warmup_steps=0, + num_training_steps=num_training_steps, +) + + + +model_name = "model" + +output_dir = "./output" +repo = Repository("/mnt/storage/aatherton/hf_eng_fra_trans", clone_from="aatherton2024/hf_eng_fra_trans") + + +def postprocess(predictions, labels): + predictions = predictions.cpu().numpy() + labels = labels.cpu().numpy() + + decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True) + + # Replace -100 in the labels as we can't decode them. + labels = np.where(labels != -100, labels, tokenizer.pad_token_id) + decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True) + + # Some simple post-processing + decoded_preds = [pred.strip() for pred in decoded_preds] + decoded_labels = [[label.strip()] for label in decoded_labels] + return decoded_preds, decoded_labels + + + +progress_bar = tqdm(range(num_training_steps)) + +for epoch in range(num_train_epochs): + # Training + model.train() + for batch in train_dataloader: + outputs = model(**batch) + loss = outputs.loss + accelerator.backward(loss) + + optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + progress_bar.update(1) + + # Evaluation + model.eval() + for batch in tqdm(eval_dataloader): + with torch.no_grad(): + generated_tokens = accelerator.unwrap_model(model).generate( + batch["input_ids"], + attention_mask=batch["attention_mask"], + max_length=128, + ) + labels = batch["labels"] + + # Necessary to pad predictions and labels for being gathered + generated_tokens = accelerator.pad_across_processes( + generated_tokens, dim=1, pad_index=tokenizer.pad_token_id + ) + labels = accelerator.pad_across_processes(labels, dim=1, pad_index=-100) + + predictions_gathered = accelerator.gather(generated_tokens) + labels_gathered = accelerator.gather(labels) + + decoded_preds, decoded_labels = postprocess(predictions_gathered, labels_gathered) + metric.add_batch(predictions=decoded_preds, references=decoded_labels) + + results = metric.compute() + print(f"epoch {epoch}, BLEU score: {results['score']:.2f}") + + # Save and upload + accelerator.wait_for_everyone() + unwrapped_model = accelerator.unwrap_model(model) + unwrapped_model.save_pretrained(output_dir, save_function=accelerator.save) + if accelerator.is_main_process: + tokenizer.save_pretrained(output_dir) + repo.push_to_hub( + commit_message=f"Training in progress epoch {epoch}", blocking=False + ) + + + +# Replace this with your own checkpoint +model_checkpoint = "aatherton2024/hf_eng_fra_reproduction" +translator = pipeline("translation", model=model_checkpoint) +translator("Default to expanded threads") +translator( + "Unable to import %1 using the OFX importer plugin. This file is not the correct format." +) \ No newline at end of file