semran1 commited on
Commit
ddefc34
·
verified ·
1 Parent(s): 8798b9e

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.json +1 -1
  2. modeling_qllama.py +1 -1
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_name_or_path": "semran1/test3-quantized",
3
  "architectures": [
4
- "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
 
1
  {
2
  "_name_or_path": "semran1/test3-quantized",
3
  "architectures": [
4
+ "QLlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
modeling_qllama.py CHANGED
@@ -750,7 +750,7 @@ class LlamaModel(LlamaPreTrainedModel):
750
  class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
751
 
752
 
753
- class LlamaForCausalLM(LlamaPreTrainedModel, GenerationMixin):
754
  _tied_weights_keys = ["lm_head.weight"]
755
  _tp_plan = {"lm_head": "colwise_rep"}
756
  _pp_plan = {"lm_head": (["hidden_states"], ["logits"])}
 
750
  class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
751
 
752
 
753
+ class QLlamaForCausalLM(LlamaPreTrainedModel, GenerationMixin):
754
  _tied_weights_keys = ["lm_head.weight"]
755
  _tp_plan = {"lm_head": "colwise_rep"}
756
  _pp_plan = {"lm_head": (["hidden_states"], ["logits"])}