Spaces:
Running
Running
update
Browse files- data/dataset/agent-nxpay-id-40-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-lingoace-zh-80-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250814_175318/agent-bingoplus-ph-25-summary.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-lingoace-zh-80-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-lingoace-zh-80-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-lingoace-zh-80-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-25-summary.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-lingoace-zh-80-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-8B/shenzhen_sase/siliconflow_api_key/20250814_163605/agent-bingoplus-ph-25-summary.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-8B/shenzhen_sase/siliconflow_api_key/20250814_163605/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl +3 -0
- examples/make_dataset/make_bingoplus_ph_200_chat.py +4 -2
- examples/make_raw_dataset/step_3_filter_by_keywords.py +22 -4
- examples/make_raw_dataset/step_6_filter_by_choice.py +1 -1
- examples/test_metrics/bingoplus_chat_metric.py +2 -2
- examples/test_metrics/lingoace_chat_metric.py +2 -2
- llm_eval_script/siliconflow.py +10 -3
- llm_eval_script/siliconflow_chat.py +44 -8
- llm_eval_script/siliconflow_summary.py +9 -2
data/dataset/agent-nxpay-id-40-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69c2687adb5fe6ae6082cc5504270a3b592b01226a43defb38bb0d6eac829206
|
3 |
+
size 211683
|
data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5ee4af8e0b636f992510850946aea0c891d718736305adcd318024cd1a8796f
|
3 |
+
size 2436409
|
data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a657945f6f5b28396c9a538fdbc623a9c20926f518a3f93e29398f6073557923
|
3 |
+
size 258605
|
data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f2b83d2a03d1f0fd54c84bb1bbf1407609ff032279e61ebabc3762b3a7db6f5
|
3 |
+
size 1211300
|
data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb2580363c9d95aae96e632f939076a8d814877176601919170dc44d335f5bce
|
3 |
+
size 892580
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f0fa41a9e6a68d7da60199c8ed27d19ef78f7486262188558270b1380313fa3
|
3 |
+
size 2397396
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92cf0ca215afbc52cd93fc66733a0527ca5a491e20dc317173f901ce6e1c87f7
|
3 |
+
size 258545
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41d37a7948993c79855004ab8d6ba71feb2a4e334b20e0c695d62766addfb764
|
3 |
+
size 1210547
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b63327ba9e8252bb7c2ec9b7f31d7a9e2df4dc2c46e72d58487afab6635a54c
|
3 |
+
size 873820
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250814_175318/agent-bingoplus-ph-25-summary.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb5ff498f24964d44ce0417035016816e15cb023c26086ece9e4a05e4442bca1
|
3 |
+
size 180171
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7444f40db5ed93a76208b9021a684e04d75b5a69296b8169818e84f4cef7f635
|
3 |
+
size 2409152
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc3c5ebde1353edf931b29f97467860cad09ee499d51b4d15c2a2c6f7658db8e
|
3 |
+
size 258403
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a7f7f13c1c26217a73818ce6e105850c0c1550a123835bf5c84ad2b6c710e8a
|
3 |
+
size 1211617
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f54b8f450d9cc4b08183ddfc4b2c34a50279568aec5e0f11c212347e73b2c907
|
3 |
+
size 879114
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a359ea25e35f20f507fab92af54770cc543b36ebc218babf2fbc386a93aba2c6
|
3 |
+
size 2405978
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94a6383a6fb0ef83b3c82692b726646eb15d967bfac746ae90124df612b1dbb0
|
3 |
+
size 258539
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82826bd4ce3d64e02ce1ea79fc3d73a28654e3f1d51e45b4536754c24c9d9993
|
3 |
+
size 1211170
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea6308aced7c2ccfa2c7f5de09229887a8990815fb6491c79482d59144b9a4f2
|
3 |
+
size 882503
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:875912816e6e396e929359a6c5a44521a3b0d51e0d28754d28566edffb540a85
|
3 |
+
size 2404865
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff6a056cc471b5af50d00092b76eef9dae03673972ce70eb41b46609eb503ac3
|
3 |
+
size 258671
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b48b94d7c1389ba21506dea94d6df957e87d1bd70e42939636f2d9e9fba663ed
|
3 |
+
size 1211401
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dbb2418dc5e76b73a5fb725a7e2e7e0aff2e9c95f648055a4a83a38cc39138c
|
3 |
+
size 873914
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e47033bde0f0c387f0d9bfae7c9d32cf06aa9dd4e0e62a0225dcad2410dd952c
|
3 |
+
size 2414736
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-25-summary.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:505a2de4045efa8e27c03c8b1dbe5094e3d522e9b3484073ea469d412d7c0898
|
3 |
+
size 182566
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b665bf4e3e9fa32cf273ddc923c8e04850625d05819462e5987b6fa1cb44b4c8
|
3 |
+
size 258583
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:283f3c29a5a2b9b7d0faae4300bdacd28a930a387db1a8754ca6488af5f39429
|
3 |
+
size 1208679
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90cf5b4f17d7b27c1e20f73aa24fddd2895fcc8a9f1598a015b3d0b384e6c090
|
3 |
+
size 881952
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-8B/shenzhen_sase/siliconflow_api_key/20250814_163605/agent-bingoplus-ph-25-summary.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b8c2b975eec172c895c866473d67a6d25a0e6c27eba6f260f5b14c718b5c293
|
3 |
+
size 181707
|
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-8B/shenzhen_sase/siliconflow_api_key/20250814_163605/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:571d1fc583aa78a42b2d4c5cc7e9bdff71b4e4a472adf03459ef9913aec7bca0
|
3 |
+
size 258615
|
data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb4c714e099894700646c482bd68c997d41b14acb16255ec1b2795f11ddace61
|
3 |
+
size 2409276
|
data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddac1a4f6815eee7123c9fc8fcd4b056ce5a180bb8c56b35c442bd0c19ac62f0
|
3 |
+
size 258516
|
data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcb61871b9ebed78a80efec356d8c047d38cfda9049767c082cdb429f67962c0
|
3 |
+
size 1211414
|
data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e09e738040b67bd357dcbc85cbf3b5566a8e5597a24921c4a514d3d7e2434f1c
|
3 |
+
size 882489
|
examples/make_dataset/make_bingoplus_ph_200_chat.py
CHANGED
@@ -17,12 +17,14 @@ def get_args():
|
|
17 |
parser = argparse.ArgumentParser()
|
18 |
parser.add_argument(
|
19 |
"--raw_dataset",
|
20 |
-
default=(project_path / "data/raw_dataset/finished/agent-bingoplus-ph-200-chat").as_posix(),
|
|
|
21 |
type=str
|
22 |
)
|
23 |
parser.add_argument(
|
24 |
"--dataset",
|
25 |
-
default=(project_path / "data/dataset/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
|
|
|
26 |
type=str
|
27 |
)
|
28 |
args = parser.parse_args()
|
|
|
17 |
parser = argparse.ArgumentParser()
|
18 |
parser.add_argument(
|
19 |
"--raw_dataset",
|
20 |
+
# default=(project_path / "data/raw_dataset/finished/agent-bingoplus-ph-200-chat").as_posix(),
|
21 |
+
default=(project_path / "data/raw_dataset/agent-nxpay-id-100-chat").as_posix(),
|
22 |
type=str
|
23 |
)
|
24 |
parser.add_argument(
|
25 |
"--dataset",
|
26 |
+
# default=(project_path / "data/dataset/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
|
27 |
+
default=(project_path / "data/dataset/agent-nxpay-id-100-chat.jsonl").as_posix(),
|
28 |
type=str
|
29 |
)
|
30 |
args = parser.parse_args()
|
examples/make_raw_dataset/step_3_filter_by_keywords.py
CHANGED
@@ -15,7 +15,8 @@ def get_args():
|
|
15 |
parser = argparse.ArgumentParser()
|
16 |
parser.add_argument(
|
17 |
"--dataset_dir",
|
18 |
-
default=(project_path / "data/llm-log-hk/extract-dataset").as_posix(),
|
|
|
19 |
# default=(project_path / "data/llm-log-usa/extract-dataset").as_posix(),
|
20 |
type=str
|
21 |
)
|
@@ -33,7 +34,11 @@ def main():
|
|
33 |
json_dir = dataset_dir / "json"
|
34 |
json_dir.mkdir(parents=True, exist_ok=True)
|
35 |
|
36 |
-
for data_dir in [
|
|
|
|
|
|
|
|
|
37 |
for sample_dir in data_dir.glob("*"):
|
38 |
# print(f"process: {sample_dir.as_posix()}")
|
39 |
|
@@ -49,10 +54,15 @@ def main():
|
|
49 |
response = f.read()
|
50 |
|
51 |
for key_str in [
|
|
|
52 |
# "BingoPlus",
|
53 |
-
" COD ",
|
|
|
|
|
|
|
54 |
# "NXPay",
|
55 |
# "NX Money",
|
|
|
56 |
# "Exodus Telecom",
|
57 |
# "Exodus Retail",
|
58 |
# "Exodus Automotive",
|
@@ -60,19 +70,27 @@ def main():
|
|
60 |
# "NXCloud",
|
61 |
# "作为VIP客户",
|
62 |
# "FedEx",
|
|
|
63 |
]:
|
64 |
if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
|
65 |
print(f"process: {sample_dir.as_posix()}")
|
|
|
66 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-bingoplus"
|
67 |
-
tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-cod"
|
|
|
|
|
68 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxpay"
|
69 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxmoney"
|
|
|
|
|
70 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-retail"
|
71 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-automotive"
|
72 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-kta"
|
73 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxcloud"
|
74 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-vip"
|
75 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-fedex"
|
|
|
|
|
76 |
tgt_dir.mkdir(parents=True, exist_ok=True)
|
77 |
shutil.move(
|
78 |
sample_dir.as_posix(),
|
|
|
15 |
parser = argparse.ArgumentParser()
|
16 |
parser.add_argument(
|
17 |
"--dataset_dir",
|
18 |
+
# default=(project_path / "data/llm-log-hk/extract-dataset").as_posix(),
|
19 |
+
default=(project_path / "data/llm-log-hk/extract-dataset/20250804").as_posix(),
|
20 |
# default=(project_path / "data/llm-log-usa/extract-dataset").as_posix(),
|
21 |
type=str
|
22 |
)
|
|
|
34 |
json_dir = dataset_dir / "json"
|
35 |
json_dir.mkdir(parents=True, exist_ok=True)
|
36 |
|
37 |
+
for data_dir in [
|
38 |
+
raw_dir,
|
39 |
+
choice_dir,
|
40 |
+
json_dir,
|
41 |
+
]:
|
42 |
for sample_dir in data_dir.glob("*"):
|
43 |
# print(f"process: {sample_dir.as_posix()}")
|
44 |
|
|
|
54 |
response = f.read()
|
55 |
|
56 |
for key_str in [
|
57 |
+
# "LingoAce",
|
58 |
# "BingoPlus",
|
59 |
+
# " COD ",
|
60 |
+
# "机器狗",
|
61 |
+
# "andy",
|
62 |
+
"HomePro",
|
63 |
# "NXPay",
|
64 |
# "NX Money",
|
65 |
+
# "Exodus Bank",
|
66 |
# "Exodus Telecom",
|
67 |
# "Exodus Retail",
|
68 |
# "Exodus Automotive",
|
|
|
70 |
# "NXCloud",
|
71 |
# "作为VIP客户",
|
72 |
# "FedEx",
|
73 |
+
# "Chinese laser cutting",
|
74 |
]:
|
75 |
if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
|
76 |
print(f"process: {sample_dir.as_posix()}")
|
77 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-lingoace"
|
78 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-bingoplus"
|
79 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-cod"
|
80 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-andy"
|
81 |
+
tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-homepro"
|
82 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxpay"
|
83 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxmoney"
|
84 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-bank"
|
85 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-telecom"
|
86 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-retail"
|
87 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-automotive"
|
88 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-kta"
|
89 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxcloud"
|
90 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-vip"
|
91 |
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-fedex"
|
92 |
+
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-laser"
|
93 |
+
|
94 |
tgt_dir.mkdir(parents=True, exist_ok=True)
|
95 |
shutil.move(
|
96 |
sample_dir.as_posix(),
|
examples/make_raw_dataset/step_6_filter_by_choice.py
CHANGED
@@ -12,7 +12,7 @@ def get_args():
|
|
12 |
parser = argparse.ArgumentParser()
|
13 |
parser.add_argument(
|
14 |
"--data_dir",
|
15 |
-
default=(project_path / "data/llm-log-hk/extract-dataset/choice-
|
16 |
type=str
|
17 |
)
|
18 |
args = parser.parse_args()
|
|
|
12 |
parser = argparse.ArgumentParser()
|
13 |
parser.add_argument(
|
14 |
"--data_dir",
|
15 |
+
default=(project_path / "data/llm-log-hk/extract-dataset/choice-nxpay").as_posix(),
|
16 |
type=str
|
17 |
)
|
18 |
args = parser.parse_args()
|
examples/test_metrics/bingoplus_chat_metric.py
CHANGED
@@ -38,12 +38,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
|
|
38 |
)
|
39 |
parser.add_argument(
|
40 |
"--eval_data_file",
|
41 |
-
default=(project_path / "data/eval_data/
|
42 |
type=str
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--output_file",
|
46 |
-
default=(project_path / "data/eval_data/
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
|
|
38 |
)
|
39 |
parser.add_argument(
|
40 |
"--eval_data_file",
|
41 |
+
default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl.raw").as_posix(),
|
42 |
type=str
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--output_file",
|
46 |
+
default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
examples/test_metrics/lingoace_chat_metric.py
CHANGED
@@ -43,12 +43,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
|
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--eval_data_file",
|
46 |
-
default=(project_path / "data/eval_data/
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
50 |
"--output_file",
|
51 |
-
default=(project_path / "data/eval_data/
|
52 |
type=str
|
53 |
)
|
54 |
parser.add_argument(
|
|
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--eval_data_file",
|
46 |
+
default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
50 |
"--output_file",
|
51 |
+
default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl").as_posix(),
|
52 |
type=str
|
53 |
)
|
54 |
parser.add_argument(
|
llm_eval_script/siliconflow.py
CHANGED
@@ -70,12 +70,19 @@ def get_args():
|
|
70 |
"--model_name",
|
71 |
# default="Pro/deepseek-ai/DeepSeek-R1",
|
72 |
# default="tencent/Hunyuan-A13B-Instruct",
|
73 |
-
default="deepseek-ai/DeepSeek-V3",
|
74 |
-
# default="Qwen/Qwen3-8B",
|
75 |
# default="deepseek-ai/DeepSeek-R1",
|
76 |
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
77 |
# default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
78 |
# default="baidu/ERNIE-4.5-300B-A47B",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
type=str
|
80 |
)
|
81 |
parser.add_argument(
|
@@ -108,7 +115,7 @@ def get_args():
|
|
108 |
parser.add_argument(
|
109 |
"--create_time_str",
|
110 |
default="null",
|
111 |
-
# default="
|
112 |
type=str
|
113 |
)
|
114 |
parser.add_argument(
|
|
|
70 |
"--model_name",
|
71 |
# default="Pro/deepseek-ai/DeepSeek-R1",
|
72 |
# default="tencent/Hunyuan-A13B-Instruct",
|
73 |
+
# default="deepseek-ai/DeepSeek-V3",
|
|
|
74 |
# default="deepseek-ai/DeepSeek-R1",
|
75 |
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
76 |
# default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
77 |
# default="baidu/ERNIE-4.5-300B-A47B",
|
78 |
+
# default="Qwen/Qwen3-8B",
|
79 |
+
# default="Qwen/Qwen3-14B",
|
80 |
+
# default="Qwen/Qwen3-32B",
|
81 |
+
# default="Qwen/Qwen3-30B-A3B",
|
82 |
+
# default="Qwen/Qwen3-30B-A3B-Instruct-2507",
|
83 |
+
# default="Qwen/Qwen3-235B-A22B-Instruct-2507",
|
84 |
+
# default="Qwen/QwQ-32B",
|
85 |
+
default="Tongyi-Zhiwen/QwenLong-L1-32B",
|
86 |
type=str
|
87 |
)
|
88 |
parser.add_argument(
|
|
|
115 |
parser.add_argument(
|
116 |
"--create_time_str",
|
117 |
default="null",
|
118 |
+
# default="20250812_092418",
|
119 |
type=str
|
120 |
)
|
121 |
parser.add_argument(
|
llm_eval_script/siliconflow_chat.py
CHANGED
@@ -51,6 +51,7 @@ from datetime import datetime
|
|
51 |
import json
|
52 |
import os
|
53 |
from pathlib import Path
|
|
|
54 |
import sys
|
55 |
import time
|
56 |
from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装
|
@@ -69,12 +70,19 @@ def get_args():
|
|
69 |
"--model_name",
|
70 |
# default="Pro/deepseek-ai/DeepSeek-R1",
|
71 |
# default="tencent/Hunyuan-A13B-Instruct",
|
72 |
-
default="deepseek-ai/DeepSeek-V3",
|
73 |
-
# default="Qwen/Qwen3-8B",
|
74 |
# default="deepseek-ai/DeepSeek-R1",
|
75 |
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
76 |
# default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
77 |
# default="baidu/ERNIE-4.5-300B-A47B",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
type=str
|
79 |
)
|
80 |
parser.add_argument(
|
@@ -105,13 +113,13 @@ def get_args():
|
|
105 |
)
|
106 |
parser.add_argument(
|
107 |
"--create_time_str",
|
108 |
-
default="null",
|
109 |
-
|
110 |
type=str
|
111 |
)
|
112 |
parser.add_argument(
|
113 |
"--interval",
|
114 |
-
default=
|
115 |
type=int
|
116 |
)
|
117 |
args = parser.parse_args()
|
@@ -171,15 +179,43 @@ def main():
|
|
171 |
continue
|
172 |
finished_idx_set.add(idx)
|
173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
try:
|
175 |
time.sleep(args.interval)
|
176 |
print(f"sleep: {args.interval}")
|
177 |
time_begin = time.time()
|
178 |
completion = client.chat.completions.create(
|
179 |
model=args.model_name,
|
180 |
-
messages=
|
181 |
-
{"role": "user", "content": prompt},
|
182 |
-
],
|
183 |
stream=False,
|
184 |
max_tokens=4096,
|
185 |
# max_tokens=1,
|
|
|
51 |
import json
|
52 |
import os
|
53 |
from pathlib import Path
|
54 |
+
import re
|
55 |
import sys
|
56 |
import time
|
57 |
from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装
|
|
|
70 |
"--model_name",
|
71 |
# default="Pro/deepseek-ai/DeepSeek-R1",
|
72 |
# default="tencent/Hunyuan-A13B-Instruct",
|
73 |
+
# default="deepseek-ai/DeepSeek-V3",
|
|
|
74 |
# default="deepseek-ai/DeepSeek-R1",
|
75 |
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
76 |
# default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
77 |
# default="baidu/ERNIE-4.5-300B-A47B",
|
78 |
+
# default="Qwen/Qwen3-8B",
|
79 |
+
# default="Qwen/Qwen3-14B",
|
80 |
+
# default="Qwen/Qwen3-32B",
|
81 |
+
# default="Qwen/Qwen3-30B-A3B",
|
82 |
+
# default="Qwen/Qwen3-30B-A3B-Instruct-2507",
|
83 |
+
# default="Qwen/Qwen3-235B-A22B-Instruct-2507",
|
84 |
+
# default="Qwen/QwQ-32B",
|
85 |
+
default="Tongyi-Zhiwen/QwenLong-L1-32B",
|
86 |
type=str
|
87 |
)
|
88 |
parser.add_argument(
|
|
|
113 |
)
|
114 |
parser.add_argument(
|
115 |
"--create_time_str",
|
116 |
+
# default="null",
|
117 |
+
default="20250814_134104",
|
118 |
type=str
|
119 |
)
|
120 |
parser.add_argument(
|
121 |
"--interval",
|
122 |
+
default=10,
|
123 |
type=int
|
124 |
)
|
125 |
args = parser.parse_args()
|
|
|
179 |
continue
|
180 |
finished_idx_set.add(idx)
|
181 |
|
182 |
+
# prompt
|
183 |
+
splits = prompt[::-1].split("\n\n", maxsplit=1)
|
184 |
+
conversation = splits[0]
|
185 |
+
system_prompt = splits[1]
|
186 |
+
conversation = conversation[::-1].strip()
|
187 |
+
system_prompt = system_prompt[::-1].strip()
|
188 |
+
|
189 |
+
pattern = "^(Client|Assistant): (.*?)(?=\n(?:Client|Assistant):)"
|
190 |
+
match = re.findall(pattern=pattern, string=conversation, flags=re.I|re.DOTALL|re.MULTILINE)
|
191 |
+
|
192 |
+
messages_ = list()
|
193 |
+
for m in match:
|
194 |
+
role = m[0].lower()
|
195 |
+
content = m[1]
|
196 |
+
if role in ("client", "Client"):
|
197 |
+
role = "user"
|
198 |
+
elif role in ("assistant", "Assistant"):
|
199 |
+
role = "assistant"
|
200 |
+
else:
|
201 |
+
raise AssertionError
|
202 |
+
messages_.append({
|
203 |
+
"role": role,
|
204 |
+
"content": content
|
205 |
+
})
|
206 |
+
|
207 |
+
messages = [
|
208 |
+
{"role": "system", "content": system_prompt},
|
209 |
+
*messages_
|
210 |
+
]
|
211 |
+
|
212 |
try:
|
213 |
time.sleep(args.interval)
|
214 |
print(f"sleep: {args.interval}")
|
215 |
time_begin = time.time()
|
216 |
completion = client.chat.completions.create(
|
217 |
model=args.model_name,
|
218 |
+
messages=messages,
|
|
|
|
|
219 |
stream=False,
|
220 |
max_tokens=4096,
|
221 |
# max_tokens=1,
|
llm_eval_script/siliconflow_summary.py
CHANGED
@@ -69,12 +69,19 @@ def get_args():
|
|
69 |
"--model_name",
|
70 |
# default="Pro/deepseek-ai/DeepSeek-R1",
|
71 |
# default="tencent/Hunyuan-A13B-Instruct",
|
72 |
-
default="deepseek-ai/DeepSeek-V3",
|
73 |
-
# default="Qwen/Qwen3-8B",
|
74 |
# default="deepseek-ai/DeepSeek-R1",
|
75 |
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
76 |
# default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
77 |
# default="baidu/ERNIE-4.5-300B-A47B",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
type=str
|
79 |
)
|
80 |
parser.add_argument(
|
|
|
69 |
"--model_name",
|
70 |
# default="Pro/deepseek-ai/DeepSeek-R1",
|
71 |
# default="tencent/Hunyuan-A13B-Instruct",
|
72 |
+
# default="deepseek-ai/DeepSeek-V3",
|
|
|
73 |
# default="deepseek-ai/DeepSeek-R1",
|
74 |
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
75 |
# default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
76 |
# default="baidu/ERNIE-4.5-300B-A47B",
|
77 |
+
# default="Qwen/Qwen3-8B",
|
78 |
+
# default="Qwen/Qwen3-14B",
|
79 |
+
default="Qwen/Qwen3-32B",
|
80 |
+
# default="Qwen/Qwen3-30B-A3B",
|
81 |
+
# default="Qwen/Qwen3-30B-A3B-Instruct-2507",
|
82 |
+
# default="Qwen/Qwen3-235B-A22B-Instruct-2507",
|
83 |
+
# default="Qwen/QwQ-32B",
|
84 |
+
# default="Tongyi-Zhiwen/QwenLong-L1-32B",
|
85 |
type=str
|
86 |
)
|
87 |
parser.add_argument(
|