Spaces:
Build error
Build error
final few-shots
Browse files- data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv +1 -0
- data/Qwen2.5-0.5B-Instruct-float16_metrics.csv +12 -0
- data/Qwen2.5-0.5B-Instruct-float16_results.csv +0 -0
- data/Qwen2.5-0.5B-Instruct-float16_shots_metrics.csv +8 -0
- data/Qwen2.5-1.5B-Instruct-float16_metrics.csv +12 -0
- data/Qwen2.5-1.5B-Instruct-float16_results.csv +0 -0
- data/Qwen2.5-1.5B-Instruct-float16_shots_metrics.csv +8 -0
- data/Qwen2.5-3B-Instruct-float16_metrics.csv +12 -0
- data/Qwen2.5-3B-Instruct-float16_results.csv +0 -0
- data/Qwen2.5-3B-Instruct-float16_shots_metrics.csv +8 -0
- notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb +2 -2
- scripts/eval-mgtv-shots.sh +9 -5
- scripts/eval-mgtv.sh +1 -1
data/Mistral-7B-v0.3-Chinese-Chat_shots_metrics.csv
CHANGED
@@ -4,3 +4,4 @@ shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
|
4 |
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.612,0.7259976964524691,0.612,0.6501410678512595,0.10633333333333334
|
5 |
20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6336666666666667,0.7315100617022602,0.6336666666666667,0.6683245802083553,0.08266666666666667
|
6 |
30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.665,0.7374233826761456,0.665,0.6872462947319797,0.07
|
|
|
|
4 |
10,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-10,0.612,0.7259976964524691,0.612,0.6501410678512595,0.10633333333333334
|
5 |
20,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-20,0.6336666666666667,0.7315100617022602,0.6336666666666667,0.6683245802083553,0.08266666666666667
|
6 |
30,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-30,0.665,0.7374233826761456,0.665,0.6872462947319797,0.07
|
7 |
+
40,Mistral-7B-v0.3-Chinese-Chat,shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat/shots-40,0.6306666666666667,0.7422868762493116,0.6306666666666667,0.6670711390706651,0.06333333333333334
|
data/Qwen2.5-0.5B-Instruct-float16_metrics.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct_torch.float16_lf,0.43833333333333335,0.5292917259914629,0.43833333333333335,0.42286875992486556,0.594
|
3 |
+
0.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-35_torch.float16_lf,0.5223333333333333,0.5704911830866488,0.5223333333333333,0.454387436259078,1.0
|
4 |
+
0.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-70_torch.float16_lf,0.542,0.6358012674347429,0.542,0.5272438410312219,1.0
|
5 |
+
0.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-105_torch.float16_lf,0.44633333333333336,0.6477441598024034,0.44633333333333336,0.4917457459702999,1.0
|
6 |
+
0.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-140_torch.float16_lf,0.5053333333333333,0.6438300456580985,0.5053333333333333,0.4995247505211914,1.0
|
7 |
+
1.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-175_torch.float16_lf,0.558,0.6560369730369926,0.558,0.5632487818615118,1.0
|
8 |
+
1.2,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-210_torch.float16_lf,0.5453333333333333,0.6357935773889876,0.5453333333333333,0.5594242895140294,1.0
|
9 |
+
1.4,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-245_torch.float16_lf,0.5903333333333334,0.6503049529377274,0.5903333333333334,0.6094397514027766,1.0
|
10 |
+
1.6,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-280_torch.float16_lf,0.5286666666666666,0.6532851084098983,0.5286666666666666,0.5617239467523474,1.0
|
11 |
+
1.8,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-315_torch.float16_lf,0.5336666666666666,0.6607103736450911,0.5336666666666666,0.5622949959647037,1.0
|
12 |
+
2.0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/checkpoint-350_torch.float16_lf,0.5156666666666667,0.652809461208547,0.5156666666666667,0.549955024535151,1.0
|
data/Qwen2.5-0.5B-Instruct-float16_results.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/Qwen2.5-0.5B-Instruct-float16_shots_metrics.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-00,0.43833333333333335,0.5292917259914629,0.43833333333333335,0.42286875992486556,0.594
|
3 |
+
5,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-05,0.17966666666666667,0.47516573853109806,0.17966666666666667,0.214144872117911,0.004
|
4 |
+
10,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-10,0.351,0.5084853117995367,0.351,0.39097839594031075,0.068
|
5 |
+
20,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-20,0.43366666666666664,0.513186330900278,0.43366666666666664,0.463747974034812,0.37266666666666665
|
6 |
+
30,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-30,0.39,0.5367753683204347,0.39,0.4299603249123421,0.07566666666666666
|
7 |
+
40,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-40,0.466,0.5400134144413437,0.466,0.49542975613961904,0.324
|
8 |
+
50,Qwen2.5-0.5B-Instruct,Qwen/Qwen2.5-0.5B-Instruct/shots-50,0.496,0.5465409839032335,0.496,0.5069942984615308,0.24333333333333335
|
data/Qwen2.5-1.5B-Instruct-float16_metrics.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct_torch.float16_lf,0.20166666666666666,0.5269756683734005,0.20166666666666666,0.24069835329504388,0.9223333333333333
|
3 |
+
0.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-35_torch.float16_lf,0.48533333333333334,0.654166887199198,0.48533333333333334,0.5381849571995003,0.9996666666666667
|
4 |
+
0.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-70_torch.float16_lf,0.573,0.7037737273232145,0.573,0.6131069400231612,0.9996666666666667
|
5 |
+
0.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-105_torch.float16_lf,0.539,0.7162869126454278,0.539,0.5961610389687657,1.0
|
6 |
+
0.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-140_torch.float16_lf,0.6443333333333333,0.7218750831357578,0.6443333333333333,0.6721473356905486,1.0
|
7 |
+
1.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-175_torch.float16_lf,0.6296666666666667,0.7065049203038848,0.6296666666666667,0.6496809196018393,1.0
|
8 |
+
1.2,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-210_torch.float16_lf,0.5836666666666667,0.7222805944180548,0.5836666666666667,0.6314346830311218,1.0
|
9 |
+
1.4,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-245_torch.float16_lf,0.6493333333333333,0.7440287895607589,0.6493333333333333,0.6815314583590799,1.0
|
10 |
+
1.6,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-280_torch.float16_lf,0.6293333333333333,0.7332138067544355,0.6293333333333333,0.6634330572585689,1.0
|
11 |
+
1.8,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-315_torch.float16_lf,0.599,0.7297954686265763,0.599,0.6396292878324805,1.0
|
12 |
+
2.0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/checkpoint-350_torch.float16_lf,0.6056666666666667,0.7305580205770756,0.6056666666666667,0.6426785514786738,1.0
|
data/Qwen2.5-1.5B-Instruct-float16_results.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/Qwen2.5-1.5B-Instruct-float16_shots_metrics.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-00,0.20166666666666666,0.5269756683734005,0.20166666666666666,0.24069835329504388,0.9223333333333333
|
3 |
+
5,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-05,0.3933333333333333,0.578886379886985,0.3933333333333333,0.43554636943558694,0.8283333333333334
|
4 |
+
10,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-10,0.407,0.5820145311822223,0.407,0.459589777544246,0.9156666666666666
|
5 |
+
20,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-20,0.232,0.5282610881631451,0.232,0.3093707499897376,0.676
|
6 |
+
30,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-30,0.23,0.5479545947886839,0.23,0.3064381040560128,0.661
|
7 |
+
40,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-40,0.29233333333333333,0.5608411738006117,0.29233333333333333,0.3751714671158081,0.5206666666666667
|
8 |
+
50,Qwen2.5-1.5B-Instruct,Qwen/Qwen2.5-1.5B-Instruct/shots-50,0.29,0.5646814860840066,0.29,0.36883826526592467,0.4603333333333333
|
data/Qwen2.5-3B-Instruct-float16_metrics.csv
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct_torch.float16_lf,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
|
3 |
+
0.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-35_torch.float16_lf,0.689,0.7450174119748659,0.689,0.709114466474576,0.9986666666666667
|
4 |
+
0.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-70_torch.float16_lf,0.6556666666666666,0.7590430811422313,0.6556666666666666,0.6934194398116857,1.0
|
5 |
+
0.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-105_torch.float16_lf,0.6963333333333334,0.7550938479315918,0.6963333333333334,0.71844324172961,1.0
|
6 |
+
0.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-140_torch.float16_lf,0.6853333333333333,0.7542524799326954,0.6853333333333333,0.7128732915785243,1.0
|
7 |
+
1.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-175_torch.float16_lf,0.6846666666666666,0.7564071354272528,0.6846666666666666,0.7125676758538035,1.0
|
8 |
+
1.2,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-210_torch.float16_lf,0.6896666666666667,0.7690917466956201,0.6896666666666667,0.720231747443145,1.0
|
9 |
+
1.4,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-245_torch.float16_lf,0.7256666666666667,0.7753705482689578,0.7256666666666667,0.7440390153124937,1.0
|
10 |
+
1.6,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-280_torch.float16_lf,0.708,0.7659638403826392,0.708,0.7293997518219294,1.0
|
11 |
+
1.8,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-315_torch.float16_lf,0.7056666666666667,0.7717562122699148,0.7056666666666667,0.729817759784445,1.0
|
12 |
+
2.0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/checkpoint-350_torch.float16_lf,0.7003333333333334,0.7698824212888824,0.7003333333333334,0.726563613830647,1.0
|
data/Qwen2.5-3B-Instruct-float16_results.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/Qwen2.5-3B-Instruct-float16_shots_metrics.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
shots,model,run,accuracy,precision,recall,f1,ratio_valid_classifications
|
2 |
+
0,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-00,0.5796666666666667,0.6966500240864278,0.5796666666666667,0.5506370828782681,1.0
|
3 |
+
5,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-05,0.639,0.7226431221398603,0.639,0.641568790114368,0.9973333333333333
|
4 |
+
10,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-10,0.625,0.7164154004131771,0.625,0.6402584852791593,0.995
|
5 |
+
20,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-20,0.4666666666666667,0.6987641430848737,0.46666666666666673,0.5265074036660548,0.9316666666666666
|
6 |
+
30,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-30,0.475,0.6880994914236809,0.475,0.5310948082593374,0.904
|
7 |
+
40,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-40,0.584,0.7065303262365236,0.584,0.6214992664375876,0.7173333333333334
|
8 |
+
50,Qwen2.5-3B-Instruct,Qwen/Qwen2.5-3B-Instruct/shots-50,0.6093333333333333,0.7120506480394511,0.6093333333333333,0.6451959368825358,0.574
|
notebooks/01b_Mistral-7B-v0.3-Chinese-Chat_analysis.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9883868b31f10c598e1f5b0711d8dad24e3739b69760b7c8b55d4963c74a94df
|
3 |
+
size 14775082
|
scripts/eval-mgtv-shots.sh
CHANGED
@@ -13,14 +13,18 @@ cat /etc/os-release
|
|
13 |
lscpu
|
14 |
grep MemTotal /proc/meminfo
|
15 |
|
16 |
-
export
|
|
|
17 |
|
|
|
|
|
|
|
|
|
18 |
$BASEDIR/scripts/eval-shots.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat
|
19 |
|
20 |
-
$BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat
|
21 |
|
22 |
-
$BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat-1m
|
23 |
|
24 |
-
$BASEDIR/scripts/eval-shots.sh Qwen Qwen2-7B-Instruct
|
25 |
|
26 |
-
$BASEDIR/scripts/eval-shots.sh shenzhi-wang Llama3.1-8B-Chinese-Chat
|
|
|
13 |
lscpu
|
14 |
grep MemTotal /proc/meminfo
|
15 |
|
16 |
+
export USING_LLAMA_FACTORY=true
|
17 |
+
export LOAD_IN_4BIT=false
|
18 |
|
19 |
+
export START_NUM_SHOTS=20
|
20 |
+
$BASEDIR/scripts/eval-shots.sh shenzhi-wang Llama3.1-8B-Chinese-Chat
|
21 |
+
|
22 |
+
export START_NUM_SHOTS=50
|
23 |
$BASEDIR/scripts/eval-shots.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat
|
24 |
|
25 |
+
# $BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat
|
26 |
|
27 |
+
# $BASEDIR/scripts/eval-shots.sh internlm internlm2_5-7b-chat-1m
|
28 |
|
29 |
+
# $BASEDIR/scripts/eval-shots.sh Qwen Qwen2-7B-Instruct
|
30 |
|
|
scripts/eval-mgtv.sh
CHANGED
@@ -1 +1 @@
|
|
1 |
-
eval-mgtv-
|
|
|
1 |
+
eval-mgtv-shots.sh
|