haijunlv commited on
Commit
28c9941
·
verified ·
1 Parent(s): 011ff6d

Update modeling_internlm3.py (#18)

Browse files

- Update modeling_internlm3.py (94cd46f35e87e1b3b2b82df73230bdb5275cd652)
- Update tokenization_internlm3.py (0f3d7019880c0b6f7a9d35b392d21cbfca07478b)

modeling_internlm3.py CHANGED
@@ -793,7 +793,7 @@ class InternLM3Model(InternLM3PreTrainedModel):
793
  Args:
794
  config: InternLM3Config
795
  """
796
-
797
  def __init__(self, config: InternLM3Config):
798
  super().__init__(config)
799
  self.padding_idx = config.pad_token_id
@@ -1070,6 +1070,7 @@ class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs): ...
1070
 
1071
 
1072
  class InternLM3ForCausalLM(InternLM3PreTrainedModel, GenerationMixin):
 
1073
  _tied_weights_keys = ["lm_head.weight"]
1074
  _tp_plan = {"lm_head": "colwise_rep"}
1075
 
 
793
  Args:
794
  config: InternLM3Config
795
  """
796
+ _auto_class = "AutoModel"
797
  def __init__(self, config: InternLM3Config):
798
  super().__init__(config)
799
  self.padding_idx = config.pad_token_id
 
1070
 
1071
 
1072
  class InternLM3ForCausalLM(InternLM3PreTrainedModel, GenerationMixin):
1073
+ _auto_class = "AutoModelForCausalLM"
1074
  _tied_weights_keys = ["lm_head.weight"]
1075
  _tp_plan = {"lm_head": "colwise_rep"}
1076
 
tokenization_internlm3.py CHANGED
@@ -67,7 +67,7 @@ class InternLM3Tokenizer(PreTrainedTokenizer):
67
  Whether or not to add an initial space to the input. This allows to treat the leading word just as any
68
  other word. Again, this should be set with `from_slow=True` to make sure it's taken into account.
69
  """
70
-
71
  vocab_files_names = VOCAB_FILES_NAMES
72
  model_input_names = ["input_ids", "attention_mask"]
73
 
 
67
  Whether or not to add an initial space to the input. This allows to treat the leading word just as any
68
  other word. Again, this should be set with `from_slow=True` to make sure it's taken into account.
69
  """
70
+ _auto_class = "AutoTokenizer"
71
  vocab_files_names = VOCAB_FILES_NAMES
72
  model_input_names = ["input_ids", "attention_mask"]
73