getting RuntimeError: weight model.layers.0.self_attn.rotary_emb.inv_freq does not exist

#10
by amangrover - opened

I am new to ML, and trying to deploy this model on Amazon Sagemaker, using the deploy script, Can someone guide

I am seeing error - RuntimeError: weight model.layers.0.self_attn.rotary_emb.inv_freq does not exist

Traceback (most recent call last):
File "/opt/conda/bin/text-generation-server", line 8, in
sys.exit(app())
File "/opt/conda/lib/python3.9/site-packages/typer/main.py", line 311, in call
return get_command(self)(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1130, in call
return self.main(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/typer/core.py", line 778, in main
return _main(
File "/opt/conda/lib/python3.9/site-packages/typer/core.py", line 216, in _main
rv = self.invoke(ctx)
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/opt/conda/lib/python3.9/site-packages/typer/main.py", line 683, in wrapper
return callback(**use_params) # type: ignore
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/cli.py", line 78, in serve
server.serve(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/server.py", line 175, in serve
asyncio.run(
File "/opt/conda/lib/python3.9/asyncio/runners.py", line 44, in run
return loop.run_until_complete(main)
File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 634, in run_until_complete
self.run_forever()
File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
self._run_once()
File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
handle._run()
File "/opt/conda/lib/python3.9/asyncio/events.py", line 80, in _run
self._context.run(self._callback, *self._args)
Traceback (most recent call last): File "/opt/conda/bin/text-generation-server", line 8, in sys.exit(app()) File "/opt/conda/lib/python3.9/site-packages/typer/main.py", line 311, in call return get_command(self)(*args, **kwargs) File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1130, in call return self.main(*args, **kwargs) File "/opt/conda/lib/python3.9/site-packages/typer/core.py", line 778, in main return _main( File "/opt/conda/lib/python3.9/site-packages/typer/core.py", line 216, in _main rv = self.invoke(ctx) File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1657, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 1404, in invoke return ctx.invoke(self.callback, **ctx.params) File "/opt/conda/lib/python3.9/site-packages/click/core.py", line 760, in invoke return __callback(*args, **kwargs) File "/opt/conda/lib/python3.9/site-packages/typer/main.py", line 683, in wrapper return callback(**use_params) # type: ignore File "/opt/conda/lib/python3.9/site-packages/text_generation_server/cli.py", line 78, in serve server.serve( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/server.py", line 175, in serve asyncio.run( File "/opt/conda/lib/python3.9/asyncio/runners.py", line 44, in run return loop.run_until_complete(main) File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 634, in run_until_complete self.run_forever() File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 601, in run_forever self._run_once() File "/opt/conda/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once handle._run() File "/opt/conda/lib/python3.9/asyncio/events.py", line 80, in _run self._context.run(self._callback, *self._args)

2023-09-07T01:02:07.830-04:00

Copy

File "/opt/conda/lib/python3.9/site-packages/text_generation_server/server.py", line 142, in serve_inner
model = get_model(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/init.py", line 185, in get_model
return FlashLlama(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/flash_llama.py", line 65, in init
model = FlashLlamaForCausalLM(config, weights)
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 452, in init
self.model = FlashLlamaModel(config, weights)
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 390, in init
[
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 391, in
FlashLlamaLayer(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 326, in init
self.self_attn = FlashLlamaAttention(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 183, in init
self.rotary_emb = PositionRotaryEmbedding.load(
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/layers.py", line 395, in load
inv_freq = weights.get_tensor(f"{prefix}.inv_freq")
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/weights.py", line 62, in get_tensor
filename, tensor_name = self.get_filename(tensor_name)
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/weights.py", line 49, in get_filename
raise RuntimeError(f"weight {tensor_name} does not exist")
File "/opt/conda/lib/python3.9/site-packages/text_generation_server/server.py", line 142, in serve_inner model = get_model( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/init.py", line 185, in get_model return FlashLlama( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/flash_llama.py", line 65, in init model = FlashLlamaForCausalLM(config, weights) File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 452, in init self.model = FlashLlamaModel(config, weights) File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 390, in init [ File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 391, in FlashLlamaLayer( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 326, in init self.self_attn = FlashLlamaAttention( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/models/custom_modeling/flash_llama_modeling.py", line 183, in init self.rotary_emb = PositionRotaryEmbedding.load( File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/layers.py", line 395, in load inv_freq = weights.get_tensor(f"{prefix}.inv_freq") File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/weights.py", line 62, in get_tensor filename, tensor_name = self.get_filename(tensor_name) File "/opt/conda/lib/python3.9/site-packages/text_generation_server/utils/weights.py", line 49, in get_filename raise RuntimeError(f"weight {tensor_name} does not exist")

2023-09-07T01:02:08.580-04:00

Copy
RuntimeError: weight model.layers.0.self_attn.rotary_emb.inv_freq does not exist
RuntimeError: weight model.layers.0.self_attn.rotary_emb.inv_freq does not exist

I'm sorry but without code examples it's a little difficult to diagnose what you're issue is. The trouble lies in not knowing why you're trying to use weight.model.layers.0.self_attn.rotary_emb.inv_freq. These are what are in the checkpoints, that can be run: "transformer.word_embeddings", "transformer.word_embeddings_layernorm", "model.embed_tokens.weight", "model.layers.0.self_attn.q_proj.weight", "model.layers.0.self_attn.k_proj.weight","model.layers.0.self_attn.v_proj.weight","model.layers.0.self_attn.o_proj.weight","model.layers.0.mlp.gate_proj.weight","model.layers.0.mlp.up_proj.weight","model.layers.0.mlp.down_proj.weight","model.layers.0.input_layernorm.weight", the same layers but instead of 0 they repeat up until 49, "model.norm.weight","lm_head","transformer.h","transformer.ln_f", not sure if there are more but I'm not getting any errors from not running them. That being said unless you're doing something similar to me where I'm running a specific quantization and need to set a custom device_map you should be able to simply run: model_path = "Phind/Phind-CodeLlama-34B-v2"
model = LlamaForCausalLM.from_pretrained(model_path, quantization_config=bnb_config, device_map="auto")
Also there's no guarantee that the device_map is actually your problem but given that it's trying to run layers I'm guessing it's trying to map those improperly.

Your need to confirm your account before you can post a new comment.

Sign up or log in to comment