|
{ |
|
"output_root": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38", |
|
"files": "/gpfs/projects/bsc88/corpus-utils-lm/23-12-2020-72f8c7e/output/model-ready_output/2020-12-23-1900-daf4-ab38/train_valid_test_split_output/2020-12-23-1905-daf4-a0e0/train.txt", |
|
"vocab_name": "roberta-ca", |
|
"clean_text": true, |
|
"handle_chinese_chars": true, |
|
"strip_accents": false, |
|
"lowercase": false, |
|
"vocab_size": 52000, |
|
"limit_alphabet": 1000, |
|
"show_progress": true, |
|
"min_frequency": 2, |
|
"extra_tokens": [], |
|
"reserve_tokens": 0, |
|
"tokenizer": "bbpe", |
|
"commit_hash": "daf4d660ec8a4b28d2bc29b3063779100ab85796\n" |
|
} |