dh-mc commited on
Commit
84958e4
·
1 Parent(s): 6807b71

final few-shots

Browse files
data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv CHANGED
@@ -4,3 +4,4 @@ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
4
  10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.612,0.7259976964524691,0.612,0.6501410678512595,0.10633333333333334
5
  20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6336666666666667,0.7315100617022602,0.6336666666666667,0.6683245802083553,0.08266666666666667
6
  30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.665,0.7374233826761456,0.665,0.6872462947319797,0.07
 
 
4
  10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.612,0.7259976964524691,0.612,0.6501410678512595,0.10633333333333334
5
  20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6336666666666667,0.7315100617022602,0.6336666666666667,0.6683245802083553,0.08266666666666667
6
  30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.665,0.7374233826761456,0.665,0.6872462947319797,0.07
7
+ 40,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-40,0.6306666666666667,0.7422868762493116,0.6306666666666667,0.6670711390706651,0.06333333333333334
data/Qwen2.5-0.5B-Instruct-float16_metrics.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.43833333333333335,0.5292917259914629,0.43833333333333335,0.42286875992486556,0.594
3
+ 0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.5223333333333333,0.5704911830866488,0.5223333333333333,0.454387436259078,1.0
4
+ 0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.542,0.6358012674347429,0.542,0.5272438410312219,1.0
5
+ 0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.44633333333333336,0.6477441598024034,0.44633333333333336,0.4917457459702999,1.0
6
+ 0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.5053333333333333,0.6438300456580985,0.5053333333333333,0.4995247505211914,1.0
7
+ 1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.558,0.6560369730369926,0.558,0.5632487818615118,1.0
8
+ 1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5453333333333333,0.6357935773889876,0.5453333333333333,0.5594242895140294,1.0
9
+ 1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
10
+ 1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5286666666666666,0.6532851084098983,0.5286666666666666,0.5617239467523474,1.0
11
+ 1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.5336666666666666,0.6607103736450911,0.5336666666666666,0.5622949959647037,1.0
12
+ 2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.5156666666666667,0.652809461208547,0.5156666666666667,0.549955024535151,1.0
data/Qwen2.5-0.5B-Instruct-float16_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/Qwen2.5-0.5B-Instruct-float16_shots_metrics.csv ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.43833333333333335,0.5292917259914629,0.43833333333333335,0.42286875992486556,0.594
3
+ 5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.17966666666666667,0.47516573853109806,0.17966666666666667,0.214144872117911,0.004
4
+ 10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.351,0.5084853117995367,0.351,0.39097839594031075,0.068
5
+ 20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.43366666666666664,0.513186330900278,0.43366666666666664,0.463747974034812,0.37266666666666665
6
+ 30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39,0.5367753683204347,0.39,0.4299603249123421,0.07566666666666666
7
+ 40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.466,0.5400134144413437,0.466,0.49542975613961904,0.324
8
+ 50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.496,0.5465409839032335,0.496,0.5069942984615308,0.24333333333333335
data/Qwen2.5-1.5B-Instruct-float16_metrics.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.float16_lf,0.20166666666666666,0.5269756683734005,0.20166666666666666,0.24069835329504388,0.9223333333333333
3
+ 0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.float16_lf,0.48533333333333334,0.654166887199198,0.48533333333333334,0.5381849571995003,0.9996666666666667
4
+ 0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.float16_lf,0.573,0.7037737273232145,0.573,0.6131069400231612,0.9996666666666667
5
+ 0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.float16_lf,0.539,0.7162869126454278,0.539,0.5961610389687657,1.0
6
+ 0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.float16_lf,0.6443333333333333,0.7218750831357578,0.6443333333333333,0.6721473356905486,1.0
7
+ 1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.float16_lf,0.6296666666666667,0.7065049203038848,0.6296666666666667,0.6496809196018393,1.0
8
+ 1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.float16_lf,0.5836666666666667,0.7222805944180548,0.5836666666666667,0.6314346830311218,1.0
9
+ 1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.float16_lf,0.6493333333333333,0.7440287895607589,0.6493333333333333,0.6815314583590799,1.0
10
+ 1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.float16_lf,0.6293333333333333,0.7332138067544355,0.6293333333333333,0.6634330572585689,1.0
11
+ 1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.float16_lf,0.599,0.7297954686265763,0.599,0.6396292878324805,1.0
12
+ 2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.float16_lf,0.6056666666666667,0.7305580205770756,0.6056666666666667,0.6426785514786738,1.0
data/Qwen2.5-1.5B-Instruct-float16_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/Qwen2.5-1.5B-Instruct-float16_shots_metrics.csv ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.20166666666666666,0.5269756683734005,0.20166666666666666,0.24069835329504388,0.9223333333333333
3
+ 5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.3933333333333333,0.578886379886985,0.3933333333333333,0.43554636943558694,0.8283333333333334
4
+ 10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.407,0.5820145311822223,0.407,0.459589777544246,0.9156666666666666
5
+ 20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.232,0.5282610881631451,0.232,0.3093707499897376,0.676
6
+ 30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
7
+ 40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.29233333333333333,0.5608411738006117,0.29233333333333333,0.3751714671158081,0.5206666666666667
8
+ 50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.36883826526592467,0.4603333333333333
data/Qwen2.5-3B-Instruct-float16_metrics.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.float16_lf,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
3
+ 0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.float16_lf,0.689,0.7450174119748659,0.689,0.709114466474576,0.9986666666666667
4
+ 0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.float16_lf,0.6556666666666666,0.7590430811422313,0.6556666666666666,0.6934194398116857,1.0
5
+ 0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.float16_lf,0.6963333333333334,0.7550938479315918,0.6963333333333334,0.71844324172961,1.0
6
+ 0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.float16_lf,0.6853333333333333,0.7542524799326954,0.6853333333333333,0.7128732915785243,1.0
7
+ 1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.float16_lf,0.6846666666666666,0.7564071354272528,0.6846666666666666,0.7125676758538035,1.0
8
+ 1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.float16_lf,0.6896666666666667,0.7690917466956201,0.6896666666666667,0.720231747443145,1.0
9
+ 1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.float16_lf,0.7256666666666667,0.7753705482689578,0.7256666666666667,0.7440390153124937,1.0
10
+ 1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.float16_lf,0.708,0.7659638403826392,0.708,0.7293997518219294,1.0
11
+ 1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.float16_lf,0.7056666666666667,0.7717562122699148,0.7056666666666667,0.729817759784445,1.0
12
+ 2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.float16_lf,0.7003333333333334,0.7698824212888824,0.7003333333333334,0.726563613830647,1.0
data/Qwen2.5-3B-Instruct-float16_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/Qwen2.5-3B-Instruct-float16_shots_metrics.csv ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
2
+ 0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
3
+ 5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333333
4
+ 10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
5
+ 20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4666666666666667,0.6987641430848737,0.46666666666666673,0.5265074036660548,0.9316666666666666
6
+ 30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
7
+ 40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
8
+ 50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cac861767f0655fabdf2436f6b5c807ab152c5d0e0ac65a7e4b89bca5d20b203
3
- size 11022066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9883868b31f10c598e1f5b0711d8dad24e3739b69760b7c8b55d4963c74a94df
3
+ size 14775082
scripts/eval-mgtv-shots.sh CHANGED
@@ -13,14 +13,18 @@ cat /etc/os-release
13
  lscpu
14
  grep MemTotal /proc/meminfo
15
 
16
- export START_NUM_SHOTS=5
 
17
 
 
 
 
 
18
  $BASEDIR/scripts/eval-shots.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat
19
 
20
- $BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat
21
 
22
- $BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat-1m
23
 
24
- $BASEDIR/scripts/eval-shots.sh Qwen Qwen2-7B-Instruct
25
 
26
- $BASEDIR/scripts/eval-shots.sh shenzhi-wang Llama3.1-8B-Chinese-Chat
 
13
  lscpu
14
  grep MemTotal /proc/meminfo
15
 
16
+ export USING_LLAMA_FACTORY=true
17
+ export LOAD_IN_4BIT=false
18
 
19
+ export START_NUM_SHOTS=20
20
+ $BASEDIR/scripts/eval-shots.sh shenzhi-wang Llama3.1-8B-Chinese-Chat
21
+
22
+ export START_NUM_SHOTS=50
23
  $BASEDIR/scripts/eval-shots.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat
24
 
25
+ # $BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat
26
 
27
+ # $BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat-1m
28
 
29
+ # $BASEDIR/scripts/eval-shots.sh Qwen Qwen2-7B-Instruct
30
 
 
scripts/eval-mgtv.sh CHANGED
@@ -1 +1 @@
1
- eval-mgtv-qwen2.5_3b.sh
 
1
+ eval-mgtv-shots.sh