Krish Patel commited on
Commit
948df4b
Β·
1 Parent(s): 4adafc2

Changed ML Model

Browse files
final.py CHANGED
@@ -12,7 +12,7 @@ dotenv.load_dotenv()
12
  nlp = spacy.load("en_core_web_sm")
13
 
14
  # Load the trained ML model
15
- model_path = "./results/checkpoint-5030" # Replace with the actual path to your model
16
  tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
17
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
18
  model.eval()
 
12
  nlp = spacy.load("en_core_web_sm")
13
 
14
  # Load the trained ML model
15
+ model_path = "./results/checkpoint-753" # Replace with the actual path to your model
16
  tokenizer = AutoTokenizer.from_pretrained('microsoft/deberta-v3-small')
17
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
18
  model.eval()
results/checkpoint-5030/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cddba7c9ed0694f75f418657613b8400183c22b1e86f0d5fac90de0153d72e5f
3
- size 1135260474
 
 
 
 
results/checkpoint-5030/trainer_state.json DELETED
@@ -1,143 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
- "eval_steps": 500,
6
- "global_step": 5030,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.4970178926441352,
13
- "grad_norm": 11.328213691711426,
14
- "learning_rate": 5e-05,
15
- "loss": 0.3471,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 0.9940357852882704,
20
- "grad_norm": 0.29149460792541504,
21
- "learning_rate": 4.448123620309051e-05,
22
- "loss": 0.1462,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 1.0,
27
- "eval_loss": 0.14880910515785217,
28
- "eval_runtime": 32.5193,
29
- "eval_samples_per_second": 61.871,
30
- "eval_steps_per_second": 7.749,
31
- "step": 1006
32
- },
33
- {
34
- "epoch": 1.4910536779324055,
35
- "grad_norm": 0.04432953894138336,
36
- "learning_rate": 3.896247240618102e-05,
37
- "loss": 0.0738,
38
- "step": 1500
39
- },
40
- {
41
- "epoch": 1.9880715705765408,
42
- "grad_norm": 0.004722778219729662,
43
- "learning_rate": 3.3443708609271526e-05,
44
- "loss": 0.0599,
45
- "step": 2000
46
- },
47
- {
48
- "epoch": 2.0,
49
- "eval_loss": 0.17704755067825317,
50
- "eval_runtime": 32.4526,
51
- "eval_samples_per_second": 61.998,
52
- "eval_steps_per_second": 7.765,
53
- "step": 2012
54
- },
55
- {
56
- "epoch": 2.485089463220676,
57
- "grad_norm": 0.0014285552315413952,
58
- "learning_rate": 2.792494481236203e-05,
59
- "loss": 0.0176,
60
- "step": 2500
61
- },
62
- {
63
- "epoch": 2.982107355864811,
64
- "grad_norm": 0.0008603875176049769,
65
- "learning_rate": 2.240618101545254e-05,
66
- "loss": 0.026,
67
- "step": 3000
68
- },
69
- {
70
- "epoch": 3.0,
71
- "eval_loss": 0.16322186589241028,
72
- "eval_runtime": 32.2403,
73
- "eval_samples_per_second": 62.406,
74
- "eval_steps_per_second": 7.816,
75
- "step": 3018
76
- },
77
- {
78
- "epoch": 3.4791252485089466,
79
- "grad_norm": 0.000587798363994807,
80
- "learning_rate": 1.688741721854305e-05,
81
- "loss": 0.0042,
82
- "step": 3500
83
- },
84
- {
85
- "epoch": 3.9761431411530817,
86
- "grad_norm": 0.00033068188349716365,
87
- "learning_rate": 1.1368653421633555e-05,
88
- "loss": 0.0012,
89
- "step": 4000
90
- },
91
- {
92
- "epoch": 4.0,
93
- "eval_loss": 0.20389850437641144,
94
- "eval_runtime": 33.2829,
95
- "eval_samples_per_second": 60.452,
96
- "eval_steps_per_second": 7.571,
97
- "step": 4024
98
- },
99
- {
100
- "epoch": 4.473161033797217,
101
- "grad_norm": 0.0048806252889335155,
102
- "learning_rate": 5.8498896247240626e-06,
103
- "loss": 0.0013,
104
- "step": 4500
105
- },
106
- {
107
- "epoch": 4.970178926441352,
108
- "grad_norm": 0.00042022508569061756,
109
- "learning_rate": 3.3112582781456954e-07,
110
- "loss": 0.0006,
111
- "step": 5000
112
- },
113
- {
114
- "epoch": 5.0,
115
- "eval_loss": 0.19458653032779694,
116
- "eval_runtime": 33.1006,
117
- "eval_samples_per_second": 60.784,
118
- "eval_steps_per_second": 7.613,
119
- "step": 5030
120
- }
121
- ],
122
- "logging_steps": 500,
123
- "max_steps": 5030,
124
- "num_input_tokens_seen": 0,
125
- "num_train_epochs": 5,
126
- "save_steps": 500,
127
- "stateful_callbacks": {
128
- "TrainerControl": {
129
- "args": {
130
- "should_epoch_stop": false,
131
- "should_evaluate": false,
132
- "should_log": false,
133
- "should_save": true,
134
- "should_training_stop": true
135
- },
136
- "attributes": {}
137
- }
138
- },
139
- "total_flos": 1332007138928640.0,
140
- "train_batch_size": 8,
141
- "trial_name": null,
142
- "trial_params": null
143
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
results/{checkpoint-5030 β†’ checkpoint-753}/config.json RENAMED
@@ -1,25 +1,25 @@
1
  {
2
- "_name_or_path": "microsoft/deberta-v3-small",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
- "hidden_size": 768,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 3072,
12
  "layer_norm_eps": 1e-07,
13
  "max_position_embeddings": 512,
14
  "max_relative_positions": -1,
15
  "model_type": "deberta-v2",
16
  "norm_rel_ebd": "layer_norm",
17
- "num_attention_heads": 12,
18
- "num_hidden_layers": 6,
19
  "pad_token_id": 0,
20
  "pooler_dropout": 0,
21
  "pooler_hidden_act": "gelu",
22
- "pooler_hidden_size": 768,
23
  "pos_att_type": [
24
  "p2c",
25
  "c2p"
 
1
  {
2
+ "_name_or_path": "microsoft/deberta-v3-xsmall",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 384,
10
  "initializer_range": 0.02,
11
+ "intermediate_size": 1536,
12
  "layer_norm_eps": 1e-07,
13
  "max_position_embeddings": 512,
14
  "max_relative_positions": -1,
15
  "model_type": "deberta-v2",
16
  "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 6,
18
+ "num_hidden_layers": 12,
19
  "pad_token_id": 0,
20
  "pooler_dropout": 0,
21
  "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 384,
23
  "pos_att_type": [
24
  "p2c",
25
  "c2p"
results/{checkpoint-5030 β†’ checkpoint-753}/model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f34f9b72aa96cb0927c5cfcdad25c0281212e297d61dd14dcacdb68138c40840
3
- size 567598552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c8bc472032aa1625a83fa5a61358b394aa47e8936084fd5d5fc53d39b4819e7
3
+ size 283347432
results/checkpoint-753/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d54a2486861a93c63c9d3f1ad129317a5ec061c153cc35f88750193eb19c8db
3
+ size 566814714
results/{checkpoint-5030 β†’ checkpoint-753}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d0c9d10259d2c7407ae8f630db471aed45598cb19d4fec8b8a17555906525a5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab711e45afdac9084a8d3228aa5d84f0234c10b8536782c428a3e5241e763c0
3
  size 14244
results/{checkpoint-5030 β†’ checkpoint-753}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f0b07a36064ffcbc9c9cdc658bf6076e72b04ada218a099af03a6b74a3518d1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2254eb2782bb8f96d8221a7f05be58b9aa6b59a9ac623c10f2d2cc29c6abdd07
3
  size 1064
results/checkpoint-753/trainer_state.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.13373112678527832,
3
+ "best_model_checkpoint": "./results\\checkpoint-503",
4
+ "epoch": 2.99403578528827,
5
+ "eval_steps": 500,
6
+ "global_step": 753,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.9980119284294234,
13
+ "eval_loss": 0.16927649080753326,
14
+ "eval_runtime": 34.3209,
15
+ "eval_samples_per_second": 58.623,
16
+ "eval_steps_per_second": 3.671,
17
+ "step": 251
18
+ },
19
+ {
20
+ "epoch": 1.9880715705765408,
21
+ "grad_norm": 3.436805248260498,
22
+ "learning_rate": 2.53479125248509e-05,
23
+ "loss": 0.2895,
24
+ "step": 500
25
+ },
26
+ {
27
+ "epoch": 2.0,
28
+ "eval_loss": 0.13373112678527832,
29
+ "eval_runtime": 32.7048,
30
+ "eval_samples_per_second": 61.52,
31
+ "eval_steps_per_second": 3.853,
32
+ "step": 503
33
+ },
34
+ {
35
+ "epoch": 2.99403578528827,
36
+ "eval_loss": 0.1674525886774063,
37
+ "eval_runtime": 33.2196,
38
+ "eval_samples_per_second": 60.567,
39
+ "eval_steps_per_second": 3.793,
40
+ "step": 753
41
+ }
42
+ ],
43
+ "logging_steps": 500,
44
+ "max_steps": 753,
45
+ "num_input_tokens_seen": 0,
46
+ "num_train_epochs": 3,
47
+ "save_steps": 500,
48
+ "stateful_callbacks": {
49
+ "TrainerControl": {
50
+ "args": {
51
+ "should_epoch_stop": false,
52
+ "should_evaluate": false,
53
+ "should_log": false,
54
+ "should_save": true,
55
+ "should_training_stop": true
56
+ },
57
+ "attributes": {}
58
+ }
59
+ },
60
+ "total_flos": 198349894207488.0,
61
+ "train_batch_size": 16,
62
+ "trial_name": null,
63
+ "trial_params": null
64
+ }
results/{checkpoint-5030 β†’ checkpoint-753}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e34c99e352dd9e22706f7f1143f42ff1385e64d6b188ee3ed83ab034094c017
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d020540fc118248e604cd22f9ec20b7acb4023a8953f7fb309148a6a3c3deb8
3
  size 5240