Update README.md
Browse files
README.md
CHANGED
@@ -1,65 +1,55 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
|
4 |
-
qlora
|
5 |
-
|
6 |
-
max_seq_length=1024
|
7 |
-
|
8 |
-
|
9 |
-
num_train_epochs=3
|
10 |
-
|
11 |
-
|
12 |
-
per_device_train_batch_size=8
|
13 |
-
|
14 |
-
|
15 |
-
gradient_accumulation_steps=32,
|
16 |
-
|
17 |
-
|
18 |
-
evaluation_strategy="steps"
|
19 |
-
|
20 |
-
|
21 |
-
eval_steps=2000,
|
22 |
-
|
23 |
-
|
24 |
-
logging_steps=25,
|
25 |
-
|
26 |
-
|
27 |
-
optim="paged_adamw_8bit",
|
28 |
-
|
29 |
-
|
30 |
-
learning_rate=2e-4,
|
31 |
-
|
32 |
-
|
33 |
-
lr_scheduler_type="cosine",
|
34 |
-
|
35 |
-
|
36 |
-
warmup_steps=10,
|
37 |
-
|
38 |
-
|
39 |
-
warmup_ratio=0.05,
|
40 |
-
|
41 |
-
|
42 |
-
report_to="tensorboard",
|
43 |
-
|
44 |
-
|
45 |
-
weight_decay=0.01,
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
| Model | rouge-1 | rouge-2 | rouge-l |
|
52 |
-
|-------|---------|---------|---------|
|
53 |
-
| **Book** | | | |
|
54 |
-
| yanolja/EEVE-Korean-Instruct-2.8B-v1.0 | 0.2095 | 0.0866 | 0.1985 |
|
55 |
-
| ryanu/EEVE-10.8-BOOK-v0.1 | **0.2454 | **0.1158 | **0.2404 |
|
56 |
-
| meta-llama/llama-3-8b-instruct | 0.2137 | 0.0883 | 0.2020 |
|
57 |
-
| meta-llama/llama-3-70b-instruct | 0.2269 | 0.0925 | 0.2186 |
|
58 |
-
| **Paper** | | | |
|
59 |
-
| yanolja/EEVE-Korean-Instruct-2.8B-v1.0 | 0.1934 | 0.0829 | 0.1832 |
|
60 |
-
| meta-llama/llama-3-8b-instruct | **0.2044 | **0.0868 | 0.1895 |
|
61 |
-
| meta-llama/llama-3-70b-instruct | 0.1935 | 0.0783 | 0.1836 |
|
62 |
-
| ryanu/EEVE-10.8-BOOK-v0.1 | 0.2004 | 0.0860 | **0.1938 |
|
63 |
|
64 |
|
65 |
|
@@ -71,4 +61,5 @@ prompt template
|
|
71 |
문장: {context}
|
72 |
|
73 |
요약: {summary}
|
|
|
74 |
-------------------------------
|
|
|
1 |
+
| 파라미터 | 값 |
|
2 |
+
|----------|-----|
|
3 |
+
| Task | Book (사회과학, 기술과학, 철학, 법학, 예술 등) |
|
4 |
+
| 데이터 크기 | 5000개 |
|
5 |
+
| 모델 | qlora |
|
6 |
+
| max_seq_length | 1024 |
|
7 |
+
| num_train_epochs | 3 |
|
8 |
+
| per_device_train_batch_size | 8 |
|
9 |
+
| gradient_accumulation_steps | 32 |
|
10 |
+
| evaluation_strategy | "steps" |
|
11 |
+
| eval_steps | 2000 |
|
12 |
+
| logging_steps | 25 |
|
13 |
+
| optim | "paged_adamw_8bit" |
|
14 |
+
| learning_rate | 2e-4 |
|
15 |
+
| lr_scheduler_type | "cosine" |
|
16 |
+
| warmup_steps | 10 |
|
17 |
+
| warmup_ratio | 0.05 |
|
18 |
+
| report_to | "tensorboard" |
|
19 |
+
| weight_decay | 0.01 |
|
20 |
+
| max_steps | -1 |
|
21 |
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
| **Book** | | | |
|
25 |
+
| 모델 이름 | Rouge-1 | Rouge-2 | Rouge-L |
|
26 |
+
|----------------------------------------------|---------|---------|---------|
|
27 |
+
| *ryanu/EEVE-10.8-BOOK-v0.1 | 0.2454 | 0.1158 | 0.2404 |
|
28 |
+
| meta-llama/llama-3-70b-instruct | 0.2269 | 0.0925 | 0.2186 |
|
29 |
+
| meta-llama/llama-3-8b-instruct | 0.2137 | 0.0883 | 0.2020 |
|
30 |
+
| yanolja/EEVE-Korean-Instruct-2.8B-v1.0 | 0.2095 | 0.0866 | 0.1985 |
|
31 |
+
| mistralai/mixtral-8x7b-instruct-v0-1 | 0.1735 | 0.0516 | 0.1668 |
|
32 |
+
| ibm-mistralai/mixtral-8x7b-instruct-v01-q | 0.1724 | 0.0534 | 0.1630 |
|
33 |
|
34 |
+
| **Paper** | | | |
|
35 |
+
| 모델 이름 | Rouge-1 | Rouge-2 | Rouge-L |
|
36 |
+
|----------------------------------------------|---------|---------|---------|
|
37 |
+
| *meta-llama/llama-3-8b-instruct | 0.2044 | 0.0868 | 0.1895 |
|
38 |
+
| ryanu/EEVE-10.8-BOOK-v0.1 | 0.2004 | 0.0860 | 0.1938 |
|
39 |
+
| meta-llama/llama-3-70b-instruct | 0.1935 | 0.0783 | 0.1836 |
|
40 |
+
| yanolja/EEVE-Korean-Instruct-2.8B-v1.0 | 0.1934 | 0.0829 | 0.1832 |
|
41 |
+
| mistralai/mixtral-8x7b-instruct-v0-1 | 0.1774 | 0.0601 | 0.1684 |
|
42 |
+
| ibm-mistralai/mixtral-8x7b-instruct-v01-q | 0.1702 | 0.0561 | 0.1605 |
|
43 |
|
44 |
+
| **RAG Q&A** | | | |
|
45 |
+
| 모델 이름 | Rouge-1 | Rouge-2 | Rouge-L |
|
46 |
+
|----------------------------------------------|---------|---------|---------|
|
47 |
+
| *meta-llama/llama-3-70b-instruct | 0.4418 | 0.2986 | 0.4297 |
|
48 |
+
| *meta-llama/llama-3-8b-instruct | 0.4391 | 0.3100 | 0.4273 |
|
49 |
+
| mistralai/mixtral-8x7b-instruct-v0-1 | 0.4022 | 0.2653 | 0.3916 |
|
50 |
+
| ibm-mistralai/mixtral-8x7b-instruct-v01-q | 0.3105 | 0.1763 | 0.2960 |
|
51 |
+
| yanolja/EEVE-Korean-Instruct-10.8B-v1.0 | 0.3191 | 0.2069 | 0.3136 |
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
|
55 |
|
|
|
61 |
문장: {context}
|
62 |
|
63 |
요약: {summary}
|
64 |
+
|
65 |
-------------------------------
|