Upload folder using huggingface_hub
Browse files- config.json +1 -1
- modeling_qllama.py +1 -1
config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"_name_or_path": "semran1/test3-quantized",
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
"attention_bias": false,
|
7 |
"attention_dropout": 0.0,
|
|
|
1 |
{
|
2 |
"_name_or_path": "semran1/test3-quantized",
|
3 |
"architectures": [
|
4 |
+
"QLlamaForCausalLM"
|
5 |
],
|
6 |
"attention_bias": false,
|
7 |
"attention_dropout": 0.0,
|
modeling_qllama.py
CHANGED
@@ -750,7 +750,7 @@ class LlamaModel(LlamaPreTrainedModel):
|
|
750 |
class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
|
751 |
|
752 |
|
753 |
-
class
|
754 |
_tied_weights_keys = ["lm_head.weight"]
|
755 |
_tp_plan = {"lm_head": "colwise_rep"}
|
756 |
_pp_plan = {"lm_head": (["hidden_states"], ["logits"])}
|
|
|
750 |
class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
|
751 |
|
752 |
|
753 |
+
class QLlamaForCausalLM(LlamaPreTrainedModel, GenerationMixin):
|
754 |
_tied_weights_keys = ["lm_head.weight"]
|
755 |
_tp_plan = {"lm_head": "colwise_rep"}
|
756 |
_pp_plan = {"lm_head": (["hidden_states"], ["logits"])}
|