HoneyTian commited on
Commit
01f9a03
·
1 Parent(s): dbd1ddd
Files changed (41) hide show
  1. data/dataset/agent-nxpay-id-40-chat.jsonl +3 -0
  2. data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl +3 -0
  3. data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-90-choice.jsonl +3 -0
  4. data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-400-choice.jsonl +3 -0
  5. data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl +3 -0
  6. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-bingoplus-ph-200-chat.jsonl +3 -0
  7. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-bingoplus-ph-90-choice.jsonl +3 -0
  8. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-lingoace-zh-400-choice.jsonl +3 -0
  9. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-lingoace-zh-80-chat.jsonl +3 -0
  10. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250814_175318/agent-bingoplus-ph-25-summary.jsonl +3 -0
  11. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-bingoplus-ph-200-chat.jsonl +3 -0
  12. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-bingoplus-ph-90-choice.jsonl +3 -0
  13. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-lingoace-zh-400-choice.jsonl +3 -0
  14. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-lingoace-zh-80-chat.jsonl +3 -0
  15. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-bingoplus-ph-200-chat.jsonl +3 -0
  16. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-bingoplus-ph-90-choice.jsonl +3 -0
  17. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-lingoace-zh-400-choice.jsonl +3 -0
  18. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-lingoace-zh-80-chat.jsonl +3 -0
  19. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-bingoplus-ph-200-chat.jsonl +3 -0
  20. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-bingoplus-ph-90-choice.jsonl +3 -0
  21. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-lingoace-zh-400-choice.jsonl +3 -0
  22. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-lingoace-zh-80-chat.jsonl +3 -0
  23. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-200-chat.jsonl +3 -0
  24. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-25-summary.jsonl +3 -0
  25. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-90-choice.jsonl +3 -0
  26. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-lingoace-zh-400-choice.jsonl +3 -0
  27. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-lingoace-zh-80-chat.jsonl +3 -0
  28. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-8B/shenzhen_sase/siliconflow_api_key/20250814_163605/agent-bingoplus-ph-25-summary.jsonl +3 -0
  29. data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-8B/shenzhen_sase/siliconflow_api_key/20250814_163605/agent-bingoplus-ph-90-choice.jsonl +3 -0
  30. data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl +3 -0
  31. data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-90-choice.jsonl +3 -0
  32. data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-400-choice.jsonl +3 -0
  33. data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl +3 -0
  34. examples/make_dataset/make_bingoplus_ph_200_chat.py +4 -2
  35. examples/make_raw_dataset/step_3_filter_by_keywords.py +22 -4
  36. examples/make_raw_dataset/step_6_filter_by_choice.py +1 -1
  37. examples/test_metrics/bingoplus_chat_metric.py +2 -2
  38. examples/test_metrics/lingoace_chat_metric.py +2 -2
  39. llm_eval_script/siliconflow.py +10 -3
  40. llm_eval_script/siliconflow_chat.py +44 -8
  41. llm_eval_script/siliconflow_summary.py +9 -2
data/dataset/agent-nxpay-id-40-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c2687adb5fe6ae6082cc5504270a3b592b01226a43defb38bb0d6eac829206
3
+ size 211683
data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5ee4af8e0b636f992510850946aea0c891d718736305adcd318024cd1a8796f
3
+ size 2436409
data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-90-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a657945f6f5b28396c9a538fdbc623a9c20926f518a3f93e29398f6073557923
3
+ size 258605
data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f2b83d2a03d1f0fd54c84bb1bbf1407609ff032279e61ebabc3762b3a7db6f5
3
+ size 1211300
data/eval_data/siliconflow/siliconflow/Qwen#QwQ-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2580363c9d95aae96e632f939076a8d814877176601919170dc44d335f5bce
3
+ size 892580
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-bingoplus-ph-200-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f0fa41a9e6a68d7da60199c8ed27d19ef78f7486262188558270b1380313fa3
3
+ size 2397396
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-bingoplus-ph-90-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92cf0ca215afbc52cd93fc66733a0527ca5a491e20dc317173f901ce6e1c87f7
3
+ size 258545
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41d37a7948993c79855004ab8d6ba71feb2a4e334b20e0c695d62766addfb764
3
+ size 1210547
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250813_174600/agent-lingoace-zh-80-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b63327ba9e8252bb7c2ec9b7f31d7a9e2df4dc2c46e72d58487afab6635a54c
3
+ size 873820
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-14B/shenzhen_sase/siliconflow_api_key/20250814_175318/agent-bingoplus-ph-25-summary.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb5ff498f24964d44ce0417035016816e15cb023c26086ece9e4a05e4442bca1
3
+ size 180171
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-bingoplus-ph-200-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7444f40db5ed93a76208b9021a684e04d75b5a69296b8169818e84f4cef7f635
3
+ size 2409152
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-bingoplus-ph-90-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc3c5ebde1353edf931b29f97467860cad09ee499d51b4d15c2a2c6f7658db8e
3
+ size 258403
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a7f7f13c1c26217a73818ce6e105850c0c1550a123835bf5c84ad2b6c710e8a
3
+ size 1211617
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-235B-A22B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_115346/agent-lingoace-zh-80-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f54b8f450d9cc4b08183ddfc4b2c34a50279568aec5e0f11c212347e73b2c907
3
+ size 879114
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-bingoplus-ph-200-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a359ea25e35f20f507fab92af54770cc543b36ebc218babf2fbc386a93aba2c6
3
+ size 2405978
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-bingoplus-ph-90-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94a6383a6fb0ef83b3c82692b726646eb15d967bfac746ae90124df612b1dbb0
3
+ size 258539
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82826bd4ce3d64e02ce1ea79fc3d73a28654e3f1d51e45b4536754c24c9d9993
3
+ size 1211170
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B-Instruct-2507/shenzhen_sase/siliconflow_api_key/20250814_113756/agent-lingoace-zh-80-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea6308aced7c2ccfa2c7f5de09229887a8990815fb6491c79482d59144b9a4f2
3
+ size 882503
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-bingoplus-ph-200-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:875912816e6e396e929359a6c5a44521a3b0d51e0d28754d28566edffb540a85
3
+ size 2404865
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-bingoplus-ph-90-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff6a056cc471b5af50d00092b76eef9dae03673972ce70eb41b46609eb503ac3
3
+ size 258671
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b48b94d7c1389ba21506dea94d6df957e87d1bd70e42939636f2d9e9fba663ed
3
+ size 1211401
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-30B-A3B/shenzhen_sase/siliconflow_api_key/20250814_093406/agent-lingoace-zh-80-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dbb2418dc5e76b73a5fb725a7e2e7e0aff2e9c95f648055a4a83a38cc39138c
3
+ size 873914
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-200-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e47033bde0f0c387f0d9bfae7c9d32cf06aa9dd4e0e62a0225dcad2410dd952c
3
+ size 2414736
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-25-summary.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:505a2de4045efa8e27c03c8b1dbe5094e3d522e9b3484073ea469d412d7c0898
3
+ size 182566
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-bingoplus-ph-90-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b665bf4e3e9fa32cf273ddc923c8e04850625d05819462e5987b6fa1cb44b4c8
3
+ size 258583
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:283f3c29a5a2b9b7d0faae4300bdacd28a930a387db1a8754ca6488af5f39429
3
+ size 1208679
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-32B/shenzhen_sase/siliconflow_api_key/20250814_175528/agent-lingoace-zh-80-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90cf5b4f17d7b27c1e20f73aa24fddd2895fcc8a9f1598a015b3d0b384e6c090
3
+ size 881952
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-8B/shenzhen_sase/siliconflow_api_key/20250814_163605/agent-bingoplus-ph-25-summary.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b8c2b975eec172c895c866473d67a6d25a0e6c27eba6f260f5b14c718b5c293
3
+ size 181707
data/eval_data/siliconflow/siliconflow/Qwen#Qwen3-8B/shenzhen_sase/siliconflow_api_key/20250814_163605/agent-bingoplus-ph-90-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:571d1fc583aa78a42b2d4c5cc7e9bdff71b4e4a472adf03459ef9913aec7bca0
3
+ size 258615
data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb4c714e099894700646c482bd68c997d41b14acb16255ec1b2795f11ddace61
3
+ size 2409276
data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-90-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddac1a4f6815eee7123c9fc8fcd4b056ce5a180bb8c56b35c442bd0c19ac62f0
3
+ size 258516
data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb61871b9ebed78a80efec356d8c047d38cfda9049767c082cdb429f67962c0
3
+ size 1211414
data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e09e738040b67bd357dcbc85cbf3b5566a8e5597a24921c4a514d3d7e2434f1c
3
+ size 882489
examples/make_dataset/make_bingoplus_ph_200_chat.py CHANGED
@@ -17,12 +17,14 @@ def get_args():
17
  parser = argparse.ArgumentParser()
18
  parser.add_argument(
19
  "--raw_dataset",
20
- default=(project_path / "data/raw_dataset/finished/agent-bingoplus-ph-200-chat").as_posix(),
 
21
  type=str
22
  )
23
  parser.add_argument(
24
  "--dataset",
25
- default=(project_path / "data/dataset/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
 
26
  type=str
27
  )
28
  args = parser.parse_args()
 
17
  parser = argparse.ArgumentParser()
18
  parser.add_argument(
19
  "--raw_dataset",
20
+ # default=(project_path / "data/raw_dataset/finished/agent-bingoplus-ph-200-chat").as_posix(),
21
+ default=(project_path / "data/raw_dataset/agent-nxpay-id-100-chat").as_posix(),
22
  type=str
23
  )
24
  parser.add_argument(
25
  "--dataset",
26
+ # default=(project_path / "data/dataset/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
27
+ default=(project_path / "data/dataset/agent-nxpay-id-100-chat.jsonl").as_posix(),
28
  type=str
29
  )
30
  args = parser.parse_args()
examples/make_raw_dataset/step_3_filter_by_keywords.py CHANGED
@@ -15,7 +15,8 @@ def get_args():
15
  parser = argparse.ArgumentParser()
16
  parser.add_argument(
17
  "--dataset_dir",
18
- default=(project_path / "data/llm-log-hk/extract-dataset").as_posix(),
 
19
  # default=(project_path / "data/llm-log-usa/extract-dataset").as_posix(),
20
  type=str
21
  )
@@ -33,7 +34,11 @@ def main():
33
  json_dir = dataset_dir / "json"
34
  json_dir.mkdir(parents=True, exist_ok=True)
35
 
36
- for data_dir in [raw_dir, choice_dir, json_dir]:
 
 
 
 
37
  for sample_dir in data_dir.glob("*"):
38
  # print(f"process: {sample_dir.as_posix()}")
39
 
@@ -49,10 +54,15 @@ def main():
49
  response = f.read()
50
 
51
  for key_str in [
 
52
  # "BingoPlus",
53
- " COD ",
 
 
 
54
  # "NXPay",
55
  # "NX Money",
 
56
  # "Exodus Telecom",
57
  # "Exodus Retail",
58
  # "Exodus Automotive",
@@ -60,19 +70,27 @@ def main():
60
  # "NXCloud",
61
  # "作为VIP客户",
62
  # "FedEx",
 
63
  ]:
64
  if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
65
  print(f"process: {sample_dir.as_posix()}")
 
66
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-bingoplus"
67
- tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-cod"
 
 
68
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxpay"
69
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxmoney"
 
 
70
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-retail"
71
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-automotive"
72
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-kta"
73
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxcloud"
74
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-vip"
75
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-fedex"
 
 
76
  tgt_dir.mkdir(parents=True, exist_ok=True)
77
  shutil.move(
78
  sample_dir.as_posix(),
 
15
  parser = argparse.ArgumentParser()
16
  parser.add_argument(
17
  "--dataset_dir",
18
+ # default=(project_path / "data/llm-log-hk/extract-dataset").as_posix(),
19
+ default=(project_path / "data/llm-log-hk/extract-dataset/20250804").as_posix(),
20
  # default=(project_path / "data/llm-log-usa/extract-dataset").as_posix(),
21
  type=str
22
  )
 
34
  json_dir = dataset_dir / "json"
35
  json_dir.mkdir(parents=True, exist_ok=True)
36
 
37
+ for data_dir in [
38
+ raw_dir,
39
+ choice_dir,
40
+ json_dir,
41
+ ]:
42
  for sample_dir in data_dir.glob("*"):
43
  # print(f"process: {sample_dir.as_posix()}")
44
 
 
54
  response = f.read()
55
 
56
  for key_str in [
57
+ # "LingoAce",
58
  # "BingoPlus",
59
+ # " COD ",
60
+ # "机器狗",
61
+ # "andy",
62
+ "HomePro",
63
  # "NXPay",
64
  # "NX Money",
65
+ # "Exodus Bank",
66
  # "Exodus Telecom",
67
  # "Exodus Retail",
68
  # "Exodus Automotive",
 
70
  # "NXCloud",
71
  # "作为VIP客户",
72
  # "FedEx",
73
+ # "Chinese laser cutting",
74
  ]:
75
  if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
76
  print(f"process: {sample_dir.as_posix()}")
77
+ # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-lingoace"
78
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-bingoplus"
79
+ # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-cod"
80
+ # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-andy"
81
+ tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-homepro"
82
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxpay"
83
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxmoney"
84
+ # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-bank"
85
+ # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-telecom"
86
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-retail"
87
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-automotive"
88
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-kta"
89
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxcloud"
90
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-vip"
91
  # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-fedex"
92
+ # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-laser"
93
+
94
  tgt_dir.mkdir(parents=True, exist_ok=True)
95
  shutil.move(
96
  sample_dir.as_posix(),
examples/make_raw_dataset/step_6_filter_by_choice.py CHANGED
@@ -12,7 +12,7 @@ def get_args():
12
  parser = argparse.ArgumentParser()
13
  parser.add_argument(
14
  "--data_dir",
15
- default=(project_path / "data/llm-log-hk/extract-dataset/choice-bingoplus-filipino").as_posix(),
16
  type=str
17
  )
18
  args = parser.parse_args()
 
12
  parser = argparse.ArgumentParser()
13
  parser.add_argument(
14
  "--data_dir",
15
+ default=(project_path / "data/llm-log-hk/extract-dataset/choice-nxpay").as_posix(),
16
  type=str
17
  )
18
  args = parser.parse_args()
examples/test_metrics/bingoplus_chat_metric.py CHANGED
@@ -38,12 +38,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
38
  )
39
  parser.add_argument(
40
  "--eval_data_file",
41
- default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl.raw").as_posix(),
42
  type=str
43
  )
44
  parser.add_argument(
45
  "--output_file",
46
- default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
47
  type=str
48
  )
49
  parser.add_argument(
 
38
  )
39
  parser.add_argument(
40
  "--eval_data_file",
41
+ default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl.raw").as_posix(),
42
  type=str
43
  )
44
  parser.add_argument(
45
  "--output_file",
46
+ default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
47
  type=str
48
  )
49
  parser.add_argument(
examples/test_metrics/lingoace_chat_metric.py CHANGED
@@ -43,12 +43,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
43
  )
44
  parser.add_argument(
45
  "--eval_data_file",
46
- default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
47
  type=str
48
  )
49
  parser.add_argument(
50
  "--output_file",
51
- default=(project_path / "data/eval_data/gemini_google/google/llama-4-scout-17b-16e-instruct-maas/shenzhen_sase/google_potent_veld_462405_t3/20250731_162116/agent-lingoace-zh-80-chat.jsonl").as_posix(),
52
  type=str
53
  )
54
  parser.add_argument(
 
43
  )
44
  parser.add_argument(
45
  "--eval_data_file",
46
+ default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
47
  type=str
48
  )
49
  parser.add_argument(
50
  "--output_file",
51
+ default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl").as_posix(),
52
  type=str
53
  )
54
  parser.add_argument(
llm_eval_script/siliconflow.py CHANGED
@@ -70,12 +70,19 @@ def get_args():
70
  "--model_name",
71
  # default="Pro/deepseek-ai/DeepSeek-R1",
72
  # default="tencent/Hunyuan-A13B-Instruct",
73
- default="deepseek-ai/DeepSeek-V3",
74
- # default="Qwen/Qwen3-8B",
75
  # default="deepseek-ai/DeepSeek-R1",
76
  # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
77
  # default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
78
  # default="baidu/ERNIE-4.5-300B-A47B",
 
 
 
 
 
 
 
 
79
  type=str
80
  )
81
  parser.add_argument(
@@ -108,7 +115,7 @@ def get_args():
108
  parser.add_argument(
109
  "--create_time_str",
110
  default="null",
111
- # default="20250728_113641",
112
  type=str
113
  )
114
  parser.add_argument(
 
70
  "--model_name",
71
  # default="Pro/deepseek-ai/DeepSeek-R1",
72
  # default="tencent/Hunyuan-A13B-Instruct",
73
+ # default="deepseek-ai/DeepSeek-V3",
 
74
  # default="deepseek-ai/DeepSeek-R1",
75
  # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
76
  # default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
77
  # default="baidu/ERNIE-4.5-300B-A47B",
78
+ # default="Qwen/Qwen3-8B",
79
+ # default="Qwen/Qwen3-14B",
80
+ # default="Qwen/Qwen3-32B",
81
+ # default="Qwen/Qwen3-30B-A3B",
82
+ # default="Qwen/Qwen3-30B-A3B-Instruct-2507",
83
+ # default="Qwen/Qwen3-235B-A22B-Instruct-2507",
84
+ # default="Qwen/QwQ-32B",
85
+ default="Tongyi-Zhiwen/QwenLong-L1-32B",
86
  type=str
87
  )
88
  parser.add_argument(
 
115
  parser.add_argument(
116
  "--create_time_str",
117
  default="null",
118
+ # default="20250812_092418",
119
  type=str
120
  )
121
  parser.add_argument(
llm_eval_script/siliconflow_chat.py CHANGED
@@ -51,6 +51,7 @@ from datetime import datetime
51
  import json
52
  import os
53
  from pathlib import Path
 
54
  import sys
55
  import time
56
  from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装
@@ -69,12 +70,19 @@ def get_args():
69
  "--model_name",
70
  # default="Pro/deepseek-ai/DeepSeek-R1",
71
  # default="tencent/Hunyuan-A13B-Instruct",
72
- default="deepseek-ai/DeepSeek-V3",
73
- # default="Qwen/Qwen3-8B",
74
  # default="deepseek-ai/DeepSeek-R1",
75
  # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
76
  # default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
77
  # default="baidu/ERNIE-4.5-300B-A47B",
 
 
 
 
 
 
 
 
78
  type=str
79
  )
80
  parser.add_argument(
@@ -105,13 +113,13 @@ def get_args():
105
  )
106
  parser.add_argument(
107
  "--create_time_str",
108
- default="null",
109
- # default="20250728_113641",
110
  type=str
111
  )
112
  parser.add_argument(
113
  "--interval",
114
- default=1,
115
  type=int
116
  )
117
  args = parser.parse_args()
@@ -171,15 +179,43 @@ def main():
171
  continue
172
  finished_idx_set.add(idx)
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  try:
175
  time.sleep(args.interval)
176
  print(f"sleep: {args.interval}")
177
  time_begin = time.time()
178
  completion = client.chat.completions.create(
179
  model=args.model_name,
180
- messages=[
181
- {"role": "user", "content": prompt},
182
- ],
183
  stream=False,
184
  max_tokens=4096,
185
  # max_tokens=1,
 
51
  import json
52
  import os
53
  from pathlib import Path
54
+ import re
55
  import sys
56
  import time
57
  from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装
 
70
  "--model_name",
71
  # default="Pro/deepseek-ai/DeepSeek-R1",
72
  # default="tencent/Hunyuan-A13B-Instruct",
73
+ # default="deepseek-ai/DeepSeek-V3",
 
74
  # default="deepseek-ai/DeepSeek-R1",
75
  # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
76
  # default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
77
  # default="baidu/ERNIE-4.5-300B-A47B",
78
+ # default="Qwen/Qwen3-8B",
79
+ # default="Qwen/Qwen3-14B",
80
+ # default="Qwen/Qwen3-32B",
81
+ # default="Qwen/Qwen3-30B-A3B",
82
+ # default="Qwen/Qwen3-30B-A3B-Instruct-2507",
83
+ # default="Qwen/Qwen3-235B-A22B-Instruct-2507",
84
+ # default="Qwen/QwQ-32B",
85
+ default="Tongyi-Zhiwen/QwenLong-L1-32B",
86
  type=str
87
  )
88
  parser.add_argument(
 
113
  )
114
  parser.add_argument(
115
  "--create_time_str",
116
+ # default="null",
117
+ default="20250814_134104",
118
  type=str
119
  )
120
  parser.add_argument(
121
  "--interval",
122
+ default=10,
123
  type=int
124
  )
125
  args = parser.parse_args()
 
179
  continue
180
  finished_idx_set.add(idx)
181
 
182
+ # prompt
183
+ splits = prompt[::-1].split("\n\n", maxsplit=1)
184
+ conversation = splits[0]
185
+ system_prompt = splits[1]
186
+ conversation = conversation[::-1].strip()
187
+ system_prompt = system_prompt[::-1].strip()
188
+
189
+ pattern = "^(Client|Assistant): (.*?)(?=\n(?:Client|Assistant):)"
190
+ match = re.findall(pattern=pattern, string=conversation, flags=re.I|re.DOTALL|re.MULTILINE)
191
+
192
+ messages_ = list()
193
+ for m in match:
194
+ role = m[0].lower()
195
+ content = m[1]
196
+ if role in ("client", "Client"):
197
+ role = "user"
198
+ elif role in ("assistant", "Assistant"):
199
+ role = "assistant"
200
+ else:
201
+ raise AssertionError
202
+ messages_.append({
203
+ "role": role,
204
+ "content": content
205
+ })
206
+
207
+ messages = [
208
+ {"role": "system", "content": system_prompt},
209
+ *messages_
210
+ ]
211
+
212
  try:
213
  time.sleep(args.interval)
214
  print(f"sleep: {args.interval}")
215
  time_begin = time.time()
216
  completion = client.chat.completions.create(
217
  model=args.model_name,
218
+ messages=messages,
 
 
219
  stream=False,
220
  max_tokens=4096,
221
  # max_tokens=1,
llm_eval_script/siliconflow_summary.py CHANGED
@@ -69,12 +69,19 @@ def get_args():
69
  "--model_name",
70
  # default="Pro/deepseek-ai/DeepSeek-R1",
71
  # default="tencent/Hunyuan-A13B-Instruct",
72
- default="deepseek-ai/DeepSeek-V3",
73
- # default="Qwen/Qwen3-8B",
74
  # default="deepseek-ai/DeepSeek-R1",
75
  # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
76
  # default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
77
  # default="baidu/ERNIE-4.5-300B-A47B",
 
 
 
 
 
 
 
 
78
  type=str
79
  )
80
  parser.add_argument(
 
69
  "--model_name",
70
  # default="Pro/deepseek-ai/DeepSeek-R1",
71
  # default="tencent/Hunyuan-A13B-Instruct",
72
+ # default="deepseek-ai/DeepSeek-V3",
 
73
  # default="deepseek-ai/DeepSeek-R1",
74
  # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
75
  # default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
76
  # default="baidu/ERNIE-4.5-300B-A47B",
77
+ # default="Qwen/Qwen3-8B",
78
+ # default="Qwen/Qwen3-14B",
79
+ default="Qwen/Qwen3-32B",
80
+ # default="Qwen/Qwen3-30B-A3B",
81
+ # default="Qwen/Qwen3-30B-A3B-Instruct-2507",
82
+ # default="Qwen/Qwen3-235B-A22B-Instruct-2507",
83
+ # default="Qwen/QwQ-32B",
84
+ # default="Tongyi-Zhiwen/QwenLong-L1-32B",
85
  type=str
86
  )
87
  parser.add_argument(