dh-mc commited on
Commit
863a809
·
1 Parent(s): c905041

finalized results: openai, qwen2.5 3/7b, internlm 7b/7b-1m

Browse files
data/Qwen2.5-3B-Instruct_metrics.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.float16_lf,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
3
- 0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.float16_lf,0.689,0.7450174119748659,0.689,0.709114466474576,0.9986666666666667
4
- 0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.float16_lf,0.6556666666666666,0.7590430811422313,0.6556666666666666,0.6934194398116857,1.0
5
- 0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.float16_lf,0.6963333333333334,0.7550938479315918,0.6963333333333334,0.71844324172961,1.0
6
- 0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.float16_lf,0.6853333333333333,0.7542524799326954,0.6853333333333333,0.7128732915785243,1.0
7
- 1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.float16_lf,0.6846666666666666,0.7564071354272528,0.6846666666666666,0.7125676758538035,1.0
8
- 1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.float16_lf,0.6896666666666667,0.7690917466956201,0.6896666666666667,0.720231747443145,1.0
9
- 1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.float16_lf,0.7256666666666667,0.7753705482689578,0.7256666666666667,0.7440390153124937,1.0
10
- 1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.float16_lf,0.708,0.7659638403826392,0.708,0.7293997518219294,1.0
11
- 1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.float16_lf,0.7056666666666667,0.7717562122699148,0.7056666666666667,0.729817759784445,1.0
12
- 2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.float16_lf,0.7003333333333334,0.7698824212888824,0.7003333333333334,0.726563613830647,1.0
 
1
  epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.bfloat16_lf,0.569,0.6886829973126811,0.569,0.5333701103243736,1.0
3
+ 0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.bfloat16_lf,0.6833333333333333,0.7269965624622317,0.6833333333333333,0.6985990460224034,0.999
4
+ 0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.bfloat16_lf,0.6766666666666666,0.7583682510610537,0.6766666666666666,0.705917900971524,1.0
5
+ 0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.bfloat16_lf,0.7023333333333334,0.7500816082620184,0.7023333333333334,0.7196546370690564,1.0
6
+ 0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.bfloat16_lf,0.71,0.7658586215313521,0.71,0.7318979017034846,1.0
7
+ 1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.bfloat16_lf,0.7053333333333334,0.7645466069416816,0.7053333333333334,0.7278931369071717,1.0
8
+ 1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.bfloat16_lf,0.6943333333333334,0.7711644797484947,0.6943333333333334,0.7225635970673485,1.0
9
+ 1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.bfloat16_lf,0.7303333333333333,0.7695138336135122,0.7303333333333333,0.7445711153936881,1.0
10
+ 1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.bfloat16_lf,0.7273333333333334,0.7726772149368513,0.7273333333333334,0.7426310656072148,1.0
11
+ 1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.bfloat16_lf,0.7093333333333334,0.7726776026356509,0.7093333333333334,0.7321516443823387,1.0
12
+ 2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.bfloat16_lf,0.7166666666666667,0.7741275713911147,0.7166666666666667,0.7370173522943904,1.0
data/Qwen2.5-3B-Instruct_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/Qwen2.5-3B-Instruct_shots_metrics.csv CHANGED
@@ -1,8 +1,8 @@
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
- 0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
3
- 5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333333
4
- 10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
5
- 20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4666666666666667,0.6987641430848737,0.46666666666666673,0.5265074036660548,0.9316666666666666
6
- 30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
7
- 40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
8
- 50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
 
1
  shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5783333333333334,0.6938704799615603,0.5783333333333334,0.5482371104670698,1.0
3
+ 5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.6446666666666667,0.7230280501918229,0.6446666666666667,0.6455439085887453,0.9973333333333333
4
+ 10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.6356666666666667,0.717399441576705,0.6356666666666667,0.647050125518008,0.995
5
+ 20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4806666666666667,0.6978154586535756,0.4806666666666667,0.5325218737400426,0.9316666666666666
6
+ 30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.48833333333333334,0.6902162639713183,0.48833333333333334,0.5393146850625054,0.904
7
+ 40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.595,0.7060453498136213,0.595,0.6271468055875201,0.7173333333333334
8
+ 50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6186666666666667,0.7088242726720394,0.6186666666666667,0.6483835468519816,0.574
data/internlm2_5-7b-chat-1m_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/internlm2_5-7b-chat_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/openai_results.csv CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/01a_internlm2_5-7b-chat-1m_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f01d281a0aca1ecf7f4709236483cd8df03723dd37db2c919d60c1ce794bd86
3
- size 2394865
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd1d720ee03fe5f6243b6be4af970d687f70dcc2455babd96d74f578db99d7e
3
+ size 2394646
notebooks/01a_internlm2_5-7b-chat_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98790c1668d147990a375ab0c467d196eaed8a9b8d65a25fa448a4fa0971657d
3
- size 6121512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a1898e0248db2b32153ab90e358130da2c3219b15bbecaff2b1cbb544e2ae46
3
+ size 6121033
notebooks/02c_Qwen2.5-3B-Instruct_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72aa616f3daa58fb0b1622cf2031ee49b69d8fbad5e1776f195cd9312642d413
3
- size 7147148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:093c8cb58c4b539d898a812c9f67f37e8c4a612f7528c49635b5103dcfe7e372
3
+ size 7235857
notebooks/02e_Qwen2.5-1.5B-Instruct_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:663af79c7319b3f22b0612e2a38d541edb9f1f88d1300d2a5c291dd3d9ad9873
3
- size 6228358
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ea5ef736646432a76a0862b4ace6760d5571910206959b14067652d858ef67f
3
+ size 631284
notebooks/04b_OpenAI-Models_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e521c17eacedb18f9d479e87577c3870e7ab15155678967fd456fc0a79e96d6
3
- size 4856810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:990e48b9542a39813f7237515ba472c9e2911275a9e9fa7e48dc62213cb53776
3
+ size 4856591