diff --git "a/logs/l40_p2_eval.txt" "b/logs/l40_p2_eval.txt" new file mode 100644--- /dev/null +++ "b/logs/l40_p2_eval.txt" @@ -0,0 +1,523 @@ +Submitting job: /common/home/users/d/dh.huang.2023/code/logical-reasoning/scripts/eval-mgtv.sh +Current Directory: +/common/home/users/d/dh.huang.2023/code/logical-reasoning +Sun Jul 14 10:45:51 2024 ++-----------------------------------------------------------------------------------------+ +| NVIDIA-SMI 550.90.07 Driver Version: 550.90.07 CUDA Version: 12.4 | +|-----------------------------------------+------------------------+----------------------+ +| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+========================+======================| +| 0 NVIDIA L40 On | 00000000:81:00.0 Off | 0 | +| N/A 31C P8 35W / 300W | 1MiB / 46068MiB | 0% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ + ++-----------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=========================================================================================| +| No running processes found | ++-----------------------------------------------------------------------------------------+ +Linux lagoon 4.18.0-553.5.1.el8_10.x86_64 #1 SMP Thu Jun 6 09:41:19 UTC 2024 x86_64 x86_64 x86_64 GNU/Linux +NAME="Rocky Linux" +VERSION="8.10 (Green Obsidian)" +ID="rocky" +ID_LIKE="rhel centos fedora" +VERSION_ID="8.10" +PLATFORM_ID="platform:el8" +PRETTY_NAME="Rocky Linux 8.10 (Green Obsidian)" +ANSI_COLOR="0;32" +LOGO="fedora-logo-icon" +CPE_NAME="cpe:/o:rocky:rocky:8:GA" +HOME_URL="https://rockylinux.org/" +BUG_REPORT_URL="https://bugs.rockylinux.org/" +SUPPORT_END="2029-05-31" +ROCKY_SUPPORT_PRODUCT="Rocky-Linux-8" +ROCKY_SUPPORT_PRODUCT_VERSION="8.10" +REDHAT_SUPPORT_PRODUCT="Rocky Linux" +REDHAT_SUPPORT_PRODUCT_VERSION="8.10" +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +CPU(s): 128 +On-line CPU(s) list: 0-127 +Thread(s) per core: 2 +Core(s) per socket: 64 +Socket(s): 1 +NUMA node(s): 1 +Vendor ID: AuthenticAMD +CPU family: 25 +Model: 1 +Model name: AMD EPYC 7763 64-Core Processor +Stepping: 1 +CPU MHz: 3088.886 +CPU max MHz: 3529.0520 +CPU min MHz: 1500.0000 +BogoMIPS: 4891.15 +Virtualization: AMD-V +L1d cache: 32K +L1i cache: 32K +L2 cache: 512K +L3 cache: 32768K +NUMA node0 CPU(s): 0-127 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd amd_ppin brs arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm +MemTotal: 527669148 kB +Eval internlm/internlm2_5-7b-chat-1m with llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full +[INFO|tokenization_utils_base.py:2108] 2024-07-14 10:46:04,406 >> loading file ./tokenizer.model from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/./tokenizer.model +[INFO|tokenization_utils_base.py:2108] 2024-07-14 10:46:04,406 >> loading file added_tokens.json from cache at None +[INFO|tokenization_utils_base.py:2108] 2024-07-14 10:46:04,406 >> loading file special_tokens_map.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/special_tokens_map.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 10:46:04,406 >> loading file tokenizer_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/tokenizer_config.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 10:46:04,406 >> loading file tokenizer.json from cache at None +/common/home/users/d/dh.huang.2023/.conda/envs/llm-perf-bench/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + warnings.warn( +[INFO|configuration_utils.py:733] 2024-07-14 10:46:06,110 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:733] 2024-07-14 10:46:06,613 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:796] 2024-07-14 10:46:06,613 >> Model config InternLM2Config { + "_name_or_path": "internlm/internlm2_5-7b-chat-1m", + "architectures": [ + "InternLM2ForCausalLM" + ], + "attn_implementation": "eager", + "auto_map": { + "AutoConfig": "internlm/internlm2_5-7b-chat-1m--configuration_internlm2.InternLM2Config", + "AutoModel": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM", + "AutoModelForCausalLM": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM" + }, + "bias": false, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 262144, + "model_type": "internlm2", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pad_token_id": 2, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 2.5, + "type": "dynamic" + }, + "rope_theta": 50000000, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 92544 +} + +[INFO|modeling_utils.py:3474] 2024-07-14 10:46:06,944 >> loading weights file model.safetensors from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/model.safetensors.index.json +[INFO|modeling_utils.py:1519] 2024-07-14 10:46:06,945 >> Instantiating InternLM2ForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:962] 2024-07-14 10:46:06,946 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 2 +} + + Loading checkpoint shards: 0%| | 0/8 [00:00> All model checkpoint weights were used when initializing InternLM2ForCausalLM. + +[INFO|modeling_utils.py:4288] 2024-07-14 10:46:16,485 >> All the weights of InternLM2ForCausalLM were initialized from the model checkpoint at internlm/internlm2_5-7b-chat-1m. +If your task is similar to the task the model of the checkpoint was trained on, you can already use InternLM2ForCausalLM for predictions without further training. +[INFO|configuration_utils.py:917] 2024-07-14 10:46:16,739 >> loading configuration file generation_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/generation_config.json +[INFO|configuration_utils.py:962] 2024-07-14 10:46:16,739 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": [ + 2, + 92542 + ], + "pad_token_id": 2 +} + + Map: 0%| | 0/25000 [00:00> loading file ./tokenizer.model from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/./tokenizer.model +[INFO|tokenization_utils_base.py:2108] 2024-07-14 10:53:39,272 >> loading file added_tokens.json from cache at None +[INFO|tokenization_utils_base.py:2108] 2024-07-14 10:53:39,272 >> loading file special_tokens_map.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/special_tokens_map.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 10:53:39,272 >> loading file tokenizer_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/tokenizer_config.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 10:53:39,273 >> loading file tokenizer.json from cache at None +/common/home/users/d/dh.huang.2023/.conda/envs/llm-perf-bench/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + warnings.warn( +[INFO|configuration_utils.py:733] 2024-07-14 10:53:40,962 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:733] 2024-07-14 10:53:41,434 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:796] 2024-07-14 10:53:41,435 >> Model config InternLM2Config { + "_name_or_path": "internlm/internlm2_5-7b-chat-1m", + "architectures": [ + "InternLM2ForCausalLM" + ], + "attn_implementation": "eager", + "auto_map": { + "AutoConfig": "internlm/internlm2_5-7b-chat-1m--configuration_internlm2.InternLM2Config", + "AutoModel": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM", + "AutoModelForCausalLM": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM" + }, + "bias": false, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 262144, + "model_type": "internlm2", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pad_token_id": 2, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 2.5, + "type": "dynamic" + }, + "rope_theta": 50000000, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 92544 +} + +[INFO|modeling_utils.py:3474] 2024-07-14 10:53:41,757 >> loading weights file model.safetensors from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/model.safetensors.index.json +[INFO|modeling_utils.py:1519] 2024-07-14 10:53:41,759 >> Instantiating InternLM2ForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:962] 2024-07-14 10:53:41,760 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 2 +} + + Loading checkpoint shards: 0%| | 0/8 [00:00> All model checkpoint weights were used when initializing InternLM2ForCausalLM. + +[INFO|modeling_utils.py:4288] 2024-07-14 10:53:51,320 >> All the weights of InternLM2ForCausalLM were initialized from the model checkpoint at internlm/internlm2_5-7b-chat-1m. +If your task is similar to the task the model of the checkpoint was trained on, you can already use InternLM2ForCausalLM for predictions without further training. +[INFO|configuration_utils.py:917] 2024-07-14 10:53:51,579 >> loading configuration file generation_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/generation_config.json +[INFO|configuration_utils.py:962] 2024-07-14 10:53:51,579 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": [ + 2, + 92542 + ], + "pad_token_id": 2 +} + + 0%| | 0/3000 [00:00> loading file ./tokenizer.model from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/./tokenizer.model +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:01:15,801 >> loading file added_tokens.json from cache at None +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:01:15,801 >> loading file special_tokens_map.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/special_tokens_map.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:01:15,801 >> loading file tokenizer_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/tokenizer_config.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:01:15,801 >> loading file tokenizer.json from cache at None +/common/home/users/d/dh.huang.2023/.conda/envs/llm-perf-bench/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + warnings.warn( +[INFO|configuration_utils.py:733] 2024-07-14 11:01:17,506 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:733] 2024-07-14 11:01:17,983 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:796] 2024-07-14 11:01:17,983 >> Model config InternLM2Config { + "_name_or_path": "internlm/internlm2_5-7b-chat-1m", + "architectures": [ + "InternLM2ForCausalLM" + ], + "attn_implementation": "eager", + "auto_map": { + "AutoConfig": "internlm/internlm2_5-7b-chat-1m--configuration_internlm2.InternLM2Config", + "AutoModel": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM", + "AutoModelForCausalLM": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM" + }, + "bias": false, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 262144, + "model_type": "internlm2", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pad_token_id": 2, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 2.5, + "type": "dynamic" + }, + "rope_theta": 50000000, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 92544 +} + +[INFO|modeling_utils.py:3474] 2024-07-14 11:01:18,302 >> loading weights file model.safetensors from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/model.safetensors.index.json +[INFO|modeling_utils.py:1519] 2024-07-14 11:01:18,304 >> Instantiating InternLM2ForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:962] 2024-07-14 11:01:18,305 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 2 +} + + Loading checkpoint shards: 0%| | 0/8 [00:00> All model checkpoint weights were used when initializing InternLM2ForCausalLM. + +[INFO|modeling_utils.py:4288] 2024-07-14 11:01:27,931 >> All the weights of InternLM2ForCausalLM were initialized from the model checkpoint at internlm/internlm2_5-7b-chat-1m. +If your task is similar to the task the model of the checkpoint was trained on, you can already use InternLM2ForCausalLM for predictions without further training. +[INFO|configuration_utils.py:917] 2024-07-14 11:01:28,169 >> loading configuration file generation_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/generation_config.json +[INFO|configuration_utils.py:962] 2024-07-14 11:01:28,170 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": [ + 2, + 92542 + ], + "pad_token_id": 2 +} + + 0%| | 0/3000 [00:00> loading file ./tokenizer.model from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/./tokenizer.model +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:08:53,709 >> loading file added_tokens.json from cache at None +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:08:53,710 >> loading file special_tokens_map.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/special_tokens_map.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:08:53,710 >> loading file tokenizer_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/tokenizer_config.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:08:53,710 >> loading file tokenizer.json from cache at None +/common/home/users/d/dh.huang.2023/.conda/envs/llm-perf-bench/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + warnings.warn( +[INFO|configuration_utils.py:733] 2024-07-14 11:08:55,425 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:733] 2024-07-14 11:08:55,896 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:796] 2024-07-14 11:08:55,897 >> Model config InternLM2Config { + "_name_or_path": "internlm/internlm2_5-7b-chat-1m", + "architectures": [ + "InternLM2ForCausalLM" + ], + "attn_implementation": "eager", + "auto_map": { + "AutoConfig": "internlm/internlm2_5-7b-chat-1m--configuration_internlm2.InternLM2Config", + "AutoModel": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM", + "AutoModelForCausalLM": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM" + }, + "bias": false, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 262144, + "model_type": "internlm2", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pad_token_id": 2, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 2.5, + "type": "dynamic" + }, + "rope_theta": 50000000, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 92544 +} + +[INFO|modeling_utils.py:3474] 2024-07-14 11:08:56,216 >> loading weights file model.safetensors from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/model.safetensors.index.json +[INFO|modeling_utils.py:1519] 2024-07-14 11:08:56,218 >> Instantiating InternLM2ForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:962] 2024-07-14 11:08:56,219 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 2 +} + + Loading checkpoint shards: 0%| | 0/8 [00:00> All model checkpoint weights were used when initializing InternLM2ForCausalLM. + +[INFO|modeling_utils.py:4288] 2024-07-14 11:09:05,836 >> All the weights of InternLM2ForCausalLM were initialized from the model checkpoint at internlm/internlm2_5-7b-chat-1m. +If your task is similar to the task the model of the checkpoint was trained on, you can already use InternLM2ForCausalLM for predictions without further training. +[INFO|configuration_utils.py:917] 2024-07-14 11:09:06,070 >> loading configuration file generation_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/generation_config.json +[INFO|configuration_utils.py:962] 2024-07-14 11:09:06,070 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": [ + 2, + 92542 + ], + "pad_token_id": 2 +} + + 0%| | 0/3000 [00:00> loading file ./tokenizer.model from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/./tokenizer.model +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:16:31,341 >> loading file added_tokens.json from cache at None +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:16:31,341 >> loading file special_tokens_map.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/special_tokens_map.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:16:31,341 >> loading file tokenizer_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/tokenizer_config.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:16:31,341 >> loading file tokenizer.json from cache at None +/common/home/users/d/dh.huang.2023/.conda/envs/llm-perf-bench/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + warnings.warn( +[INFO|configuration_utils.py:733] 2024-07-14 11:16:33,020 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:733] 2024-07-14 11:16:33,640 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:796] 2024-07-14 11:16:33,640 >> Model config InternLM2Config { + "_name_or_path": "internlm/internlm2_5-7b-chat-1m", + "architectures": [ + "InternLM2ForCausalLM" + ], + "attn_implementation": "eager", + "auto_map": { + "AutoConfig": "internlm/internlm2_5-7b-chat-1m--configuration_internlm2.InternLM2Config", + "AutoModel": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM", + "AutoModelForCausalLM": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM" + }, + "bias": false, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 262144, + "model_type": "internlm2", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pad_token_id": 2, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 2.5, + "type": "dynamic" + }, + "rope_theta": 50000000, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 92544 +} + +[INFO|modeling_utils.py:3474] 2024-07-14 11:16:33,958 >> loading weights file model.safetensors from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/model.safetensors.index.json +[INFO|modeling_utils.py:1519] 2024-07-14 11:16:33,960 >> Instantiating InternLM2ForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:962] 2024-07-14 11:16:33,960 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 2 +} + + Loading checkpoint shards: 0%| | 0/8 [00:00> All model checkpoint weights were used when initializing InternLM2ForCausalLM. + +[INFO|modeling_utils.py:4288] 2024-07-14 11:16:43,316 >> All the weights of InternLM2ForCausalLM were initialized from the model checkpoint at internlm/internlm2_5-7b-chat-1m. +If your task is similar to the task the model of the checkpoint was trained on, you can already use InternLM2ForCausalLM for predictions without further training. +[INFO|configuration_utils.py:917] 2024-07-14 11:16:43,554 >> loading configuration file generation_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/generation_config.json +[INFO|configuration_utils.py:962] 2024-07-14 11:16:43,554 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": [ + 2, + 92542 + ], + "pad_token_id": 2 +} + + 0%| | 0/3000 [00:00> loading file ./tokenizer.model from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/./tokenizer.model +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:24:08,207 >> loading file added_tokens.json from cache at None +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:24:08,207 >> loading file special_tokens_map.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/special_tokens_map.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:24:08,207 >> loading file tokenizer_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/tokenizer_config.json +[INFO|tokenization_utils_base.py:2108] 2024-07-14 11:24:08,207 >> loading file tokenizer.json from cache at None +/common/home/users/d/dh.huang.2023/.conda/envs/llm-perf-bench/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + warnings.warn( +[INFO|configuration_utils.py:733] 2024-07-14 11:24:09,891 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:733] 2024-07-14 11:24:10,367 >> loading configuration file config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/config.json +[INFO|configuration_utils.py:796] 2024-07-14 11:24:10,368 >> Model config InternLM2Config { + "_name_or_path": "internlm/internlm2_5-7b-chat-1m", + "architectures": [ + "InternLM2ForCausalLM" + ], + "attn_implementation": "eager", + "auto_map": { + "AutoConfig": "internlm/internlm2_5-7b-chat-1m--configuration_internlm2.InternLM2Config", + "AutoModel": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM", + "AutoModelForCausalLM": "internlm/internlm2_5-7b-chat-1m--modeling_internlm2.InternLM2ForCausalLM" + }, + "bias": false, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 262144, + "model_type": "internlm2", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pad_token_id": 2, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 2.5, + "type": "dynamic" + }, + "rope_theta": 50000000, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 92544 +} + +[INFO|modeling_utils.py:3474] 2024-07-14 11:24:10,692 >> loading weights file model.safetensors from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/model.safetensors.index.json +[INFO|modeling_utils.py:1519] 2024-07-14 11:24:10,693 >> Instantiating InternLM2ForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:962] 2024-07-14 11:24:10,694 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 2 +} + + Loading checkpoint shards: 0%| | 0/8 [00:00> All model checkpoint weights were used when initializing InternLM2ForCausalLM. + +[INFO|modeling_utils.py:4288] 2024-07-14 11:24:20,121 >> All the weights of InternLM2ForCausalLM were initialized from the model checkpoint at internlm/internlm2_5-7b-chat-1m. +If your task is similar to the task the model of the checkpoint was trained on, you can already use InternLM2ForCausalLM for predictions without further training. +[INFO|configuration_utils.py:917] 2024-07-14 11:24:20,483 >> loading configuration file generation_config.json from cache at /common/scratch/users/d/dh.huang.2023/transformers/hub/models--internlm--internlm2_5-7b-chat-1m/snapshots/8d1a709a04d71440ef3df6ebbe204672f411c8b6/generation_config.json +[INFO|configuration_utils.py:962] 2024-07-14 11:24:20,483 >> Generate config GenerationConfig { + "bos_token_id": 1, + "eos_token_id": [ + 2, + 92542 + ], + "pad_token_id": 2 +} + + 0%| | 0/3000 [00:00