Spaces:
Sleeping
Sleeping
Upload 60 files
Browse files- results/Noise Robustness/scores_mixtral-8x7b-32768_noise_0.6_passage_5_num_queries_50.json +9 -0
- results/Noise Robustness/scores_mixtral-8x7b-32768_noise_0.8_passage_5_num_queries_100.json +9 -0
- results/Noise Robustness/scores_mixtral-8x7b-32768_noise_0.8_passage_5_num_queries_300.json +9 -0
- results/Noise Robustness/scores_mixtral-8x7b-32768_noise_0.8_passage_5_num_queries_50.json +9 -0
- results/Noise Robustness/scores_mixtral-8x7b-32768_noise_1.0_passage_5_num_queries_10.json +9 -0
- results/Noise Robustness/scores_mixtral-8x7b-32768_noise_1.0_passage_5_num_queries_100.json +9 -0
- results/Noise Robustness/scores_mixtral-8x7b-32768_noise_1.0_passage_5_num_queries_300.json +9 -0
- results/Noise Robustness/scores_mixtral-8x7b-32768_noise_1.0_passage_5_num_queries_50.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_0.2_passage_5_num_queries_100.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_0.2_passage_5_num_queries_50.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_0.4_passage_5_num_queries_10.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_0.4_passage_5_num_queries_100.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_0.4_passage_5_num_queries_50.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_0.6_passage_5_num_queries_100.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_0.6_passage_5_num_queries_50.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_0.8_passage_5_num_queries_100.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_0.8_passage_5_num_queries_50.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_1.0_passage_5_num_queries_10.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_1.0_passage_5_num_queries_100.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_1.0_passage_5_num_queries_300.json +9 -0
- results/Noise Robustness/scores_qwen-2.5-32b_noise_1.0_passage_5_num_queries_50.json +9 -0
results/Noise Robustness/scores_mixtral-8x7b-32768_noise_0.6_passage_5_num_queries_50.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "mixtral-8x7b-32768",
|
3 |
+
"accuracy": 0.78,
|
4 |
+
"noise_rate": 0.6,
|
5 |
+
"correct_count": 39,
|
6 |
+
"total": 50,
|
7 |
+
"all_rate": 0.78,
|
8 |
+
"tt": 39
|
9 |
+
}
|
results/Noise Robustness/scores_mixtral-8x7b-32768_noise_0.8_passage_5_num_queries_100.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "mixtral-8x7b-32768",
|
3 |
+
"accuracy": 0.62,
|
4 |
+
"noise_rate": 0.8,
|
5 |
+
"correct_count": 82,
|
6 |
+
"total": 100,
|
7 |
+
"all_rate": 0.82,
|
8 |
+
"tt": 82
|
9 |
+
}
|
results/Noise Robustness/scores_mixtral-8x7b-32768_noise_0.8_passage_5_num_queries_300.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "mixtral-8x7b-32768",
|
3 |
+
"accuracy": 0.6633333333333333,
|
4 |
+
"noise_rate": 0.8,
|
5 |
+
"correct_count": 199,
|
6 |
+
"total": 300,
|
7 |
+
"all_rate": 0.6633333333333333,
|
8 |
+
"tt": 199
|
9 |
+
}
|
results/Noise Robustness/scores_mixtral-8x7b-32768_noise_0.8_passage_5_num_queries_50.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "mixtral-8x7b-32768",
|
3 |
+
"accuracy": 0.82,
|
4 |
+
"noise_rate": 0.8,
|
5 |
+
"correct_count": 41,
|
6 |
+
"total": 50,
|
7 |
+
"all_rate": 0.82,
|
8 |
+
"tt": 41
|
9 |
+
}
|
results/Noise Robustness/scores_mixtral-8x7b-32768_noise_1.0_passage_5_num_queries_10.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "mixtral-8x7b-32768",
|
3 |
+
"accuracy": 0.0,
|
4 |
+
"noise_rate": 1.0,
|
5 |
+
"correct_count": 0,
|
6 |
+
"total": 10,
|
7 |
+
"all_rate": 0.6,
|
8 |
+
"tt": 6
|
9 |
+
}
|
results/Noise Robustness/scores_mixtral-8x7b-32768_noise_1.0_passage_5_num_queries_100.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "mixtral-8x7b-32768",
|
3 |
+
"accuracy": 0.267,
|
4 |
+
"noise_rate": 0.6,
|
5 |
+
"correct_count": 260,
|
6 |
+
"total": 300,
|
7 |
+
"all_rate": 0.267,
|
8 |
+
"tt": 260
|
9 |
+
}
|
results/Noise Robustness/scores_mixtral-8x7b-32768_noise_1.0_passage_5_num_queries_300.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "mixtral-8x7b-32768",
|
3 |
+
"accuracy": 0.62,
|
4 |
+
"noise_rate": 0.8,
|
5 |
+
"correct_count": 186,
|
6 |
+
"total": 300,
|
7 |
+
"all_rate": 0.62,
|
8 |
+
"tt": 186
|
9 |
+
}
|
results/Noise Robustness/scores_mixtral-8x7b-32768_noise_1.0_passage_5_num_queries_50.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "mixtral-8x7b-32768",
|
3 |
+
"accuracy": 0.18,
|
4 |
+
"noise_rate": 1.0,
|
5 |
+
"correct_count": 9,
|
6 |
+
"total": 50,
|
7 |
+
"all_rate": 0.56,
|
8 |
+
"tt": 28
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_0.2_passage_5_num_queries_100.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.98,
|
4 |
+
"noise_rate": 0.2,
|
5 |
+
"correct_count": 98,
|
6 |
+
"total": 100,
|
7 |
+
"all_rate": 0.98,
|
8 |
+
"tt": 98
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_0.2_passage_5_num_queries_50.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.98,
|
4 |
+
"noise_rate": 0.2,
|
5 |
+
"correct_count": 49,
|
6 |
+
"total": 50,
|
7 |
+
"all_rate": 0.98,
|
8 |
+
"tt": 49
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_0.4_passage_5_num_queries_10.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 1.0,
|
4 |
+
"noise_rate": 0.4,
|
5 |
+
"correct_count": 10,
|
6 |
+
"total": 10,
|
7 |
+
"all_rate": 1.0,
|
8 |
+
"tt": 10
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_0.4_passage_5_num_queries_100.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.82,
|
4 |
+
"noise_rate": 0.4,
|
5 |
+
"correct_count": 82,
|
6 |
+
"total": 100,
|
7 |
+
"all_rate": 0.82,
|
8 |
+
"tt": 82
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_0.4_passage_5_num_queries_50.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 1.0,
|
4 |
+
"noise_rate": 0.4,
|
5 |
+
"correct_count": 50,
|
6 |
+
"total": 50,
|
7 |
+
"all_rate": 1.0,
|
8 |
+
"tt": 50
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_0.6_passage_5_num_queries_100.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.94,
|
4 |
+
"noise_rate": 0.6,
|
5 |
+
"correct_count": 94,
|
6 |
+
"total": 100,
|
7 |
+
"all_rate": 0.94,
|
8 |
+
"tt": 94
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_0.6_passage_5_num_queries_50.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.96,
|
4 |
+
"noise_rate": 0.6,
|
5 |
+
"correct_count": 48,
|
6 |
+
"total": 50,
|
7 |
+
"all_rate": 0.96,
|
8 |
+
"tt": 48
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_0.8_passage_5_num_queries_100.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.95,
|
4 |
+
"noise_rate": 0.8,
|
5 |
+
"correct_count": 95,
|
6 |
+
"total": 100,
|
7 |
+
"all_rate": 0.95,
|
8 |
+
"tt": 95
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_0.8_passage_5_num_queries_50.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.9,
|
4 |
+
"noise_rate": 0.8,
|
5 |
+
"correct_count": 45,
|
6 |
+
"total": 50,
|
7 |
+
"all_rate": 0.9,
|
8 |
+
"tt": 45
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_1.0_passage_5_num_queries_10.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.0,
|
4 |
+
"noise_rate": 1.0,
|
5 |
+
"correct_count": 0,
|
6 |
+
"total": 10,
|
7 |
+
"all_rate": 0.8,
|
8 |
+
"tt": 8
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_1.0_passage_5_num_queries_100.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.07,
|
4 |
+
"noise_rate": 1.0,
|
5 |
+
"correct_count": 7,
|
6 |
+
"total": 100,
|
7 |
+
"all_rate": 0.84,
|
8 |
+
"tt": 84
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_1.0_passage_5_num_queries_300.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.08,
|
4 |
+
"noise_rate": 1.0,
|
5 |
+
"correct_count": 24,
|
6 |
+
"total": 300,
|
7 |
+
"all_rate": 0.9166666666666666,
|
8 |
+
"tt": 275
|
9 |
+
}
|
results/Noise Robustness/scores_qwen-2.5-32b_noise_1.0_passage_5_num_queries_50.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "qwen-2.5-32b",
|
3 |
+
"accuracy": 0.1,
|
4 |
+
"noise_rate": 1.0,
|
5 |
+
"correct_count": 5,
|
6 |
+
"total": 50,
|
7 |
+
"all_rate": 0.88,
|
8 |
+
"tt": 44
|
9 |
+
}
|