KoichiYasuoka commited on
Commit
b6cd422
·
1 Parent(s): fdfb365

model improved for transformers 4.42

Browse files
config.json CHANGED
@@ -3,22 +3,11 @@
3
  "MistralForTokenClassification"
4
  ],
5
  "attention_dropout": 0.0,
6
- "auto_map": {
7
- "AutoModelForTokenClassification": "upos.MistralForTokenClassification"
8
- },
9
  "bos_token_id": 1,
10
  "custom_pipelines": {
11
  "upos": {
12
  "impl": "upos.BellmanFordTokenClassificationPipeline",
13
  "pt": "AutoModelForTokenClassification"
14
- },
15
- "token-classification": {
16
- "impl": "upos.RawTokenClassificationPipeline",
17
- "pt": "AutoModelForTokenClassification"
18
- },
19
- "ner": {
20
- "impl": "upos.RawTokenClassificationPipeline",
21
- "pt": "AutoModelForTokenClassification"
22
  }
23
  },
24
  "eos_token_id": 2,
@@ -159,9 +148,9 @@
159
  "rope_theta": 10000.0,
160
  "sliding_window": 4096,
161
  "tie_word_embeddings": false,
162
- "torch_dtype": "float32",
163
  "tokenizer_class": "LlamaTokenizerFast",
164
- "transformers_version": "4.41.2",
 
165
  "use_cache": true,
166
  "use_transformers_inputs": true,
167
  "vocab_size": 48000
 
3
  "MistralForTokenClassification"
4
  ],
5
  "attention_dropout": 0.0,
 
 
 
6
  "bos_token_id": 1,
7
  "custom_pipelines": {
8
  "upos": {
9
  "impl": "upos.BellmanFordTokenClassificationPipeline",
10
  "pt": "AutoModelForTokenClassification"
 
 
 
 
 
 
 
 
11
  }
12
  },
13
  "eos_token_id": 2,
 
148
  "rope_theta": 10000.0,
149
  "sliding_window": 4096,
150
  "tie_word_embeddings": false,
 
151
  "tokenizer_class": "LlamaTokenizerFast",
152
+ "torch_dtype": "float32",
153
+ "transformers_version": "4.42.4",
154
  "use_cache": true,
155
  "use_transformers_inputs": true,
156
  "vocab_size": 48000
maker.sh CHANGED
@@ -7,43 +7,7 @@ cat << 'EOF' > $TMP
7
  #! /usr/bin/env deepspeed
8
  src="KoichiYasuoka/RakutenAI-7B-upos"
9
  tgt="KoichiYasuoka/RakutenAI-7B-char-upos"
10
- from transformers import LlamaTokenizerFast,MistralModel,MistralPreTrainedModel,AutoConfig,DataCollatorForTokenClassification,TrainingArguments,Trainer
11
- from transformers.modeling_outputs import TokenClassifierOutput
12
-
13
- class MistralForTokenClassification(MistralPreTrainedModel):
14
- def __init__(self,config):
15
- from torch import nn
16
- super().__init__(config)
17
- self.num_labels=config.num_labels
18
- self.model=MistralModel(config)
19
- if hasattr(config,"classifier_dropout") and config.classifier_dropout is not None:
20
- classifier_dropout=config.classifier_dropout
21
- elif hasattr(config,"hidden_dropout") and config.hidden_dropout is not None:
22
- classifier_dropout=config.hidden_dropout
23
- else:
24
- classifier_dropout=0.1
25
- self.dropout=nn.Dropout(classifier_dropout)
26
- self.classifier=nn.Linear(config.hidden_size,config.num_labels)
27
- self.post_init()
28
- def get_input_embeddings(self):
29
- return self.model.embed_tokens
30
- def set_input_embeddings(self,value):
31
- self.model.embed_tokens=value
32
- def forward(self,input_ids=None,past_key_values=None,attention_mask=None,position_ids=None,inputs_embeds=None,labels=None,use_cache=None,output_attentions=None,output_hidden_states=None,return_dict=None):
33
- return_dict=return_dict if return_dict is not None else self.config.use_return_dict
34
- transformer_outputs=self.model(input_ids,past_key_values=past_key_values,attention_mask=attention_mask,position_ids=position_ids,inputs_embeds=inputs_embeds,use_cache=use_cache,output_attentions=output_attentions,output_hidden_states=output_hidden_states,return_dict=return_dict)
35
- hidden_states=transformer_outputs[0]
36
- hidden_states=self.dropout(hidden_states)
37
- logits=self.classifier(hidden_states)
38
- loss=None
39
- if labels is not None:
40
- from torch import nn
41
- loss_fct=nn.CrossEntropyLoss()
42
- loss=loss_fct(logits.view(-1,self.num_labels),labels.view(-1))
43
- if not return_dict:
44
- output=(logits,)+transformer_outputs[1:]
45
- return ((loss,)+output) if loss is not None else output
46
- return TokenClassifierOutput(loss=loss,logits=logits,hidden_states=transformer_outputs.hidden_states,attentions=transformer_outputs.attentions)
47
 
48
  class UPOSFileDataset(object):
49
  def __init__(self,conllu,tokenizer):
 
7
  #! /usr/bin/env deepspeed
8
  src="KoichiYasuoka/RakutenAI-7B-upos"
9
  tgt="KoichiYasuoka/RakutenAI-7B-char-upos"
10
+ from transformers import LlamaTokenizerFast,MistralForTokenClassification,AutoConfig,DataCollatorForTokenClassification,TrainingArguments,Trainer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  class UPOSFileDataset(object):
13
  def __init__(self,conllu,tokenizer):
pytorch_model-00001-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9390f326afba489dfb25938fe3e8f2f8d1d7227d1eb8b33d92a24069dced0b6f
3
  size 4913773120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e57c2c6739ec6e5cf7442d46b02f67ab16997b35987abc10e4dfc0476ffecc
3
  size 4913773120
pytorch_model-00002-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:138623c84c3e5d85b51ae8017f08b39958a38f3dfb3978e16dc8d50d87209476
3
  size 4999825256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea8b58c17ad22f5fbf33b7b887d6572a55b1e2f8e1fbc4d202cd4979f6504e83
3
  size 4999825256
pytorch_model-00003-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:708976207133843ca3ae6c243cca9d5f54e8e089c695dcd910234e78149fb472
3
  size 4999825316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff9c56ec56c78cab1dc779aed4488eec481789f1c02cb6f38b7ee0f19f110a20
3
  size 4999825316
pytorch_model-00004-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50c0d1273e3cc6e45ccdc865e3d78bac97353b6181bb640a6664f5e831f80d8c
3
  size 4832018324
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d735a8bc7052b2432f8d3b3b2facc38f3a479c26795921ba2b82756a3af77080
3
  size 4832018324
pytorch_model-00005-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b0c0e4fdb9b4893ca0ab5ff41d3cff0162f54a20823b13369720e68308fca53
3
  size 4999825320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16cbd68930b892b79b995dc7749719655ce3d19ceea5d3fa4233200c8ce7c4c2
3
  size 4999825320
pytorch_model-00006-of-00006.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:076923a6b328a83f9c8030cfa6fbd91d8f30d9f6a9fedda4ba069152e0f16a81
3
  size 3960601264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a86e2932a529df7ecbc9fa0870bf79967830a33e97f4b023b891519663b591f
3
  size 3960601264
pytorch_model.bin.index.json CHANGED
@@ -3,8 +3,6 @@
3
  "total_size": 28705767664
4
  },
5
  "weight_map": {
6
- "classifier.bias": "pytorch_model-00006-of-00006.bin",
7
- "classifier.weight": "pytorch_model-00006-of-00006.bin",
8
  "model.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
9
  "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
10
  "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
@@ -294,6 +292,8 @@
294
  "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
295
  "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
296
  "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
297
- "model.norm.weight": "pytorch_model-00006-of-00006.bin"
 
 
298
  }
299
  }
 
3
  "total_size": 28705767664
4
  },
5
  "weight_map": {
 
 
6
  "model.embed_tokens.weight": "pytorch_model-00001-of-00006.bin",
7
  "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00006.bin",
8
  "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00006.bin",
 
292
  "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00006.bin",
293
  "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00006.bin",
294
  "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00006.bin",
295
+ "model.norm.weight": "pytorch_model-00006-of-00006.bin",
296
+ "score.bias": "pytorch_model-00006-of-00006.bin",
297
+ "score.weight": "pytorch_model-00006-of-00006.bin"
298
  }
299
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
upos.py CHANGED
@@ -1,5 +1,4 @@
1
- from transformers import TokenClassificationPipeline,MistralModel,MistralPreTrainedModel
2
- from transformers.modeling_outputs import TokenClassifierOutput
3
 
4
  class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
5
  def __init__(self,**kwargs):
@@ -40,41 +39,3 @@ class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
40
  t["text"]=model_outputs["sentence"][t["start"]:t["end"]]
41
  return w
42
 
43
- class RawTokenClassificationPipeline(TokenClassificationPipeline):
44
- def check_model_type(self,supported_models):
45
- pass
46
-
47
- class MistralForTokenClassification(MistralPreTrainedModel):
48
- def __init__(self,config):
49
- from torch import nn
50
- super().__init__(config)
51
- self.num_labels=config.num_labels
52
- self.model=MistralModel(config)
53
- if hasattr(config,"classifier_dropout") and config.classifier_dropout is not None:
54
- classifier_dropout=config.classifier_dropout
55
- elif hasattr(config,"hidden_dropout") and config.hidden_dropout is not None:
56
- classifier_dropout=config.hidden_dropout
57
- else:
58
- classifier_dropout=0.1
59
- self.dropout=nn.Dropout(classifier_dropout)
60
- self.classifier=nn.Linear(config.hidden_size,config.num_labels)
61
- self.post_init()
62
- def get_input_embeddings(self):
63
- return self.model.embed_tokens
64
- def set_input_embeddings(self,value):
65
- self.model.embed_tokens=value
66
- def forward(self,input_ids=None,past_key_values=None,attention_mask=None,position_ids=None,inputs_embeds=None,labels=None,use_cache=None,output_attentions=None,output_hidden_states=None,return_dict=None):
67
- return_dict=return_dict if return_dict is not None else self.config.use_return_dict
68
- transformer_outputs=self.model(input_ids,past_key_values=past_key_values,attention_mask=attention_mask,position_ids=position_ids,inputs_embeds=inputs_embeds,use_cache=use_cache,output_attentions=output_attentions,output_hidden_states=output_hidden_states,return_dict=return_dict)
69
- hidden_states=transformer_outputs[0]
70
- hidden_states=self.dropout(hidden_states)
71
- logits=self.classifier(hidden_states)
72
- loss=None
73
- if labels is not None:
74
- from torch import nn
75
- loss_fct=nn.CrossEntropyLoss()
76
- loss=loss_fct(logits.view(-1,self.num_labels),labels.view(-1))
77
- if not return_dict:
78
- output=(logits,)+transformer_outputs[2:]
79
- return ((loss,)+output) if loss is not None else output
80
- return TokenClassifierOutput(loss=loss,logits=logits,hidden_states=transformer_outputs.hidden_states,attentions=transformer_outputs.attentions)
 
1
+ from transformers import TokenClassificationPipeline
 
2
 
3
  class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
4
  def __init__(self,**kwargs):
 
39
  t["text"]=model_outputs["sentence"][t["start"]:t["end"]]
40
  return w
41