semran1
/

test-ql

semran1 commited on Mar 9

Commit

ddefc34

verified ·

1 Parent(s): 8798b9e

Upload folder using huggingface_hub

Files changed (2) hide show

config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_name_or_path": "semran1/test3-quantized",
   "architectures": [
-    "LlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,

 {
   "_name_or_path": "semran1/test3-quantized",
   "architectures": [
+    "QLlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,

modeling_qllama.py CHANGED Viewed

@@ -750,7 +750,7 @@ class LlamaModel(LlamaPreTrainedModel):
 class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
-class LlamaForCausalLM(LlamaPreTrainedModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]
     _tp_plan = {"lm_head": "colwise_rep"}
     _pp_plan = {"lm_head": (["hidden_states"], ["logits"])}

 class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
+class QLlamaForCausalLM(LlamaPreTrainedModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]
     _tp_plan = {"lm_head": "colwise_rep"}
     _pp_plan = {"lm_head": (["hidden_states"], ["logits"])}