Update README.md
Browse files
README.md
CHANGED
@@ -37,43 +37,43 @@ tags:
|
|
37 |
|
38 |
|
39 |
########## First turn ##########
|
40 |
-
| model | turn | score |
|
41 |
-
|
42 |
-
| gpt-4 | 1 | 8.95625 |
|
43 |
-
| xDAN-L1-Chat-RL-v1 | 1 | 8.87500 |
|
44 |
-
| xDAN-L2-Chat-RL-v2 | 1 | 8.78750 |
|
45 |
-
| claude-v1 | 1 | 8.15000 |
|
46 |
-
| gpt-3.5-turbo | 1 | 8.07500 |
|
47 |
-
| vicuna-33b-v1.3 | 1 | 7.45625 |
|
48 |
-
| wizardlm-30b | 1 | 7.13125 |
|
49 |
-
| oasst-sft-7-llama-30b | 1 | 7.10625 |
|
50 |
-
| Llama-2-70b-chat | 1 | 6.98750 |
|
51 |
|
52 |
-
########## Second turn ##########
|
53 |
-
| model | turn | score |
|
54 |
-
|--------------------|------|-----------|
|
55 |
-
| gpt-4 | 2 | 9.025000 |
|
56 |
-
| xDAN-L2-Chat-RL-v2 | 1 | 8.087500 |
|
57 |
-
| xDAN-L1-Chat-RL-v1 | 2 | 7.825000 |
|
58 |
-
| gpt-3.5-turbo | 2 | 7.812500 |
|
59 |
-
| claude-v1 | 2 | 7.650000 |
|
60 |
-
| wizardlm-30b | 2 | 6.887500 |
|
61 |
-
| vicuna-33b-v1.3 | 2 | 6.787500 |
|
62 |
-
| Llama-2-70b-chat | 2 | 6.725000 |
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
########## Average turn##########
|
66 |
-
| model | score |
|
67 |
-
|
68 |
-
| gpt-4 | 8.990625 |
|
69 |
-
| xDAN-L2-Chat-RL-v2 | 8.437500 |
|
70 |
-
| xDAN-L1-Chat-RL-v1 | 8.350000 |
|
71 |
-
| gpt-3.5-turbo | 7.943750 |
|
72 |
-
| claude-
|
73 |
-
|
|
74 |
-
|
|
75 |
-
|
|
76 |
-
| Llama-2-70b-chat | 6.856250 |
|
77 |
|
78 |
|
79 |
|
|
|
37 |
|
38 |
|
39 |
########## First turn ##########
|
40 |
+
| model | turn | score | size
|
41 |
+
|--------------------|------|----------|--------
|
42 |
+
| gpt-4 | 1 | 8.95625 | -
|
43 |
+
| xDAN-L1-Chat-RL-v1 | 1 | 8.87500 | 7b
|
44 |
+
| xDAN-L2-Chat-RL-v2 | 1 | 8.78750 | 30b
|
45 |
+
| claude-v1 | 1 | 8.15000 | -
|
46 |
+
| gpt-3.5-turbo | 1 | 8.07500 | 20b
|
47 |
+
| vicuna-33b-v1.3 | 1 | 7.45625 | 33b
|
48 |
+
| wizardlm-30b | 1 | 7.13125 | 30b
|
49 |
+
| oasst-sft-7-llama-30b | 1 | 7.10625 | 30b
|
50 |
+
| Llama-2-70b-chat | 1 | 6.98750 | 70b
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
+
########## Second turn ##########
|
54 |
+
| model | turn | score | size
|
55 |
+
|--------------------|------|-----------|--------
|
56 |
+
| gpt-4 | 2 | 9.025000 | -
|
57 |
+
| xDAN-L2-Chat-RL-v2 | 1 | 8.087500 | 30b
|
58 |
+
| claude-instant-v1 | 2 | 8.012658 | -
|
59 |
+
| xDAN-L1-Chat-RL-v1 | 2 | 7.825000 | 7b
|
60 |
+
| gpt-3.5-turbo | 2 | 7.812500 | 20b
|
61 |
+
| claude-v1 | 2 | 7.650000 | -
|
62 |
+
| wizardlm-30b | 2 | 6.887500 | 30b
|
63 |
+
| vicuna-33b-v1.3 | 2 | 6.787500 | 33b
|
64 |
+
| Llama-2-70b-chat | 2 | 6.725000 | 70b
|
65 |
|
66 |
########## Average turn##########
|
67 |
+
| model | score | size
|
68 |
+
|--------------------|-----------|--------
|
69 |
+
| gpt-4 | 8.990625 | -
|
70 |
+
| xDAN-L2-Chat-RL-v2 | 8.437500 | 30b
|
71 |
+
| xDAN-L1-Chat-RL-v1 | 8.350000 | 7b
|
72 |
+
| gpt-3.5-turbo | 7.943750 | 20b
|
73 |
+
| claude-v1 | 7.900000 | -
|
74 |
+
| vicuna-33b-v1.3 | 7.121875 | 33b
|
75 |
+
| wizardlm-30b | 7.009375 | 30b
|
76 |
+
| Llama-2-70b-chat | 6.856250 | 70b
|
|
|
77 |
|
78 |
|
79 |
|