HoneyTian commited on
Commit
a0ec039
·
1 Parent(s): 01f9a03
Files changed (17) hide show
  1. data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-bingoplus-ph-200-chat.jsonl +3 -0
  2. data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-bingoplus-ph-90-choice.jsonl +3 -0
  3. data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-cod-zh-70-chat.jsonl +3 -0
  4. data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-lingoace-zh-400-choice.jsonl +3 -0
  5. data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-lingoace-zh-80-chat.jsonl +3 -0
  6. data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-200-chat.jsonl +3 -0
  7. data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-90-choice.jsonl +3 -0
  8. data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-cod-zh-70-chat.jsonl +3 -0
  9. data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-400-choice.jsonl +3 -0
  10. data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-80-chat.jsonl +3 -0
  11. examples/make_raw_dataset/step_3_filter_by_keywords.py +34 -45
  12. examples/test_metrics/bingoplus_chat_metric.py +2 -2
  13. examples/test_metrics/cod_chat_metric.py +2 -6
  14. examples/test_metrics/lingoace_chat_metric.py +2 -2
  15. examples/tokenization/byteplus/step_1_get_by_api.py +3 -2
  16. llm_eval_script/byteplus.py +7 -1
  17. llm_eval_script/byteplus_chat.py +3 -1
data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-bingoplus-ph-200-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e3a2567a913b1dc14fa6ea70adc8cab8f21de50fdb7004c392f021cb3aae218
3
+ size 2448697
data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-bingoplus-ph-90-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2603d2a05a92ea2638f37542987c7f402bd8f049a92c7530b5db3a4eec13344
3
+ size 258528
data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-cod-zh-70-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1351fab4285ddb5f9157a53a7418a38dd69f45de308a9875efa03827ca9670c
3
+ size 308863
data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fe3ef2bb2dbde07ced2c9fd6b4621fd6e5126e859077f11aaea4560862dff01
3
+ size 1211263
data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-lingoace-zh-80-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6848f25727166e024e2b88e6f8a256da2363a219700c371279bb05abcfea7613
3
+ size 876960
data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-200-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bb57b8e92f77983202f69ecaa498e7482881f843b9be6469796998e86ec2ec3
3
+ size 2437685
data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-90-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d57411a307c61095f919310595ba039eaf080698657038ad469d715e9592007
3
+ size 258551
data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-cod-zh-70-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1a25a4c54e498a2ef46c863bf0d341fdcbfb210b5d80af4be36f86b20265b2
3
+ size 306012
data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdc386e8b56d6f9f098bd954067b91fb089d9aeaa53a356231155930eaaee1be
3
+ size 1211294
data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-80-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e44cf602fbf5ad5a7d1a4efb9394de7b3fb34aef61f55f309920e90ab4f547f
3
+ size 877036
examples/make_raw_dataset/step_3_filter_by_keywords.py CHANGED
@@ -15,8 +15,8 @@ def get_args():
15
  parser = argparse.ArgumentParser()
16
  parser.add_argument(
17
  "--dataset_dir",
18
- # default=(project_path / "data/llm-log-hk/extract-dataset").as_posix(),
19
- default=(project_path / "data/llm-log-hk/extract-dataset/20250804").as_posix(),
20
  # default=(project_path / "data/llm-log-usa/extract-dataset").as_posix(),
21
  type=str
22
  )
@@ -53,51 +53,40 @@ def main():
53
  with open(response_file.as_posix(), "r", encoding="utf-8") as f:
54
  response = f.read()
55
 
56
- for key_str in [
57
- # "LingoAce",
58
- # "BingoPlus",
59
- # " COD ",
60
- # "机器狗",
61
- # "andy",
62
- "HomePro",
63
- # "NXPay",
64
- # "NX Money",
65
- # "Exodus Bank",
66
- # "Exodus Telecom",
67
- # "Exodus Retail",
68
- # "Exodus Automotive",
69
- # "kta kilat", "KTA KILAT",
70
- # "NXCloud",
71
- # "作为VIP客户",
72
- # "FedEx",
73
- # "Chinese laser cutting",
74
- ]:
75
- if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
76
- print(f"process: {sample_dir.as_posix()}")
77
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-lingoace"
78
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-bingoplus"
79
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-cod"
80
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-andy"
81
- tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-homepro"
82
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxpay"
83
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxmoney"
84
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-bank"
85
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-telecom"
86
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-retail"
87
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-automotive"
88
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-kta"
89
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxcloud"
90
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-vip"
91
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-fedex"
92
- # tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-laser"
93
 
94
- tgt_dir.mkdir(parents=True, exist_ok=True)
95
- shutil.move(
96
- sample_dir.as_posix(),
97
- tgt_dir.as_posix(),
98
- )
99
  break
100
-
 
 
 
 
 
 
 
 
 
 
 
101
  return
102
 
103
 
 
15
  parser = argparse.ArgumentParser()
16
  parser.add_argument(
17
  "--dataset_dir",
18
+ default=(project_path / "data/llm-log-hk/extract-dataset").as_posix(),
19
+ # default=(project_path / "data/llm-log-hk/extract-dataset/20250804").as_posix(),
20
  # default=(project_path / "data/llm-log-usa/extract-dataset").as_posix(),
21
  type=str
22
  )
 
53
  with open(response_file.as_posix(), "r", encoding="utf-8") as f:
54
  response = f.read()
55
 
56
+ filter_map = [
57
+ (["LingoAce"], "lingoace"),
58
+ (["BingoPlus"], "bingoplus"),
59
+ ([" COD "], "cod"),
60
+ (["HomePro"], "homepro"),
61
+ (["andy"], "andy" ),
62
+ (["NXPay"], "nxpay"),
63
+ (["NX Money"], "nxmoney"),
64
+ (["Exodus Bank"], "exodus-bank"),
65
+ (["Exodus Telecom"], "exodus-telecom"),
66
+ (["Exodus Retail"], "exodus-retail"),
67
+ (["Exodus Automotive"], "exodus-automotive"),
68
+ (["kta kilat", "KTA KILAT"], "kta"),
69
+ (["作为VIP客户"], "vip"),
70
+ (["FedEx"], "fedex"),
71
+ (["Chinese laser cutting"], "laser"),
72
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ flag = False
75
+ for keys, suffix in filter_map:
76
+ if flag:
 
 
77
  break
78
+ for key_str in keys:
79
+ if flag:
80
+ break
81
+ if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
82
+ print(f"process: {sample_dir.as_posix()}")
83
+ tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-{suffix}"
84
+ tgt_dir.mkdir(parents=True, exist_ok=True)
85
+ shutil.move(
86
+ sample_dir.as_posix(),
87
+ tgt_dir.as_posix(),
88
+ )
89
+ flag = True
90
  return
91
 
92
 
examples/test_metrics/bingoplus_chat_metric.py CHANGED
@@ -38,12 +38,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
38
  )
39
  parser.add_argument(
40
  "--eval_data_file",
41
- default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl.raw").as_posix(),
42
  type=str
43
  )
44
  parser.add_argument(
45
  "--output_file",
46
- default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
47
  type=str
48
  )
49
  parser.add_argument(
 
38
  )
39
  parser.add_argument(
40
  "--eval_data_file",
41
+ default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-200-chat.jsonl.raw").as_posix(),
42
  type=str
43
  )
44
  parser.add_argument(
45
  "--output_file",
46
+ default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
47
  type=str
48
  )
49
  parser.add_argument(
examples/test_metrics/cod_chat_metric.py CHANGED
@@ -38,16 +38,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
38
  )
39
  parser.add_argument(
40
  "--eval_data_file",
41
- default=(project_path / "data/eval_data/azure_openai/azure/gpt-4o-mini/shenzhen_sase/west_us_chatgpt_openai_azure_com/20250806_114802/agent-cod-zh-70-chat.jsonl.raw").as_posix(),
42
- # default=(project_path / "data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-cod-zh-70-chat.jsonl.raw").as_posix(),
43
- # default=(project_path / "data/eval_data/gemini_google/google/gemini-2.5-flash/shenzhen_sase/google_potent_veld_462405_t3/20250808_160530/agent-cod-zh-70-chat.jsonl.raw").as_posix(),
44
  type=str
45
  )
46
  parser.add_argument(
47
  "--output_file",
48
- default=(project_path / "data/eval_data/azure_openai/azure/gpt-4o-mini/shenzhen_sase/west_us_chatgpt_openai_azure_com/20250806_114802/agent-cod-zh-70-chat.jsonl").as_posix(),
49
- # default=(project_path / "data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-cod-zh-70-chat.jsonl").as_posix(),
50
- # default=(project_path / "data/eval_data/gemini_google/google/gemini-2.5-flash/shenzhen_sase/google_potent_veld_462405_t3/20250808_160530/agent-cod-zh-70-chat.jsonl").as_posix(),
51
  type=str
52
  )
53
  parser.add_argument(
 
38
  )
39
  parser.add_argument(
40
  "--eval_data_file",
41
+ default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-cod-zh-70-chat.jsonl.raw").as_posix(),
 
 
42
  type=str
43
  )
44
  parser.add_argument(
45
  "--output_file",
46
+ default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-cod-zh-70-chat.jsonl").as_posix(),
 
 
47
  type=str
48
  )
49
  parser.add_argument(
examples/test_metrics/lingoace_chat_metric.py CHANGED
@@ -43,12 +43,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
43
  )
44
  parser.add_argument(
45
  "--eval_data_file",
46
- default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
47
  type=str
48
  )
49
  parser.add_argument(
50
  "--output_file",
51
- default=(project_path / "data/eval_data/siliconflow/siliconflow/Tongyi-Zhiwen#QwenLong-L1-32B/shenzhen_sase/siliconflow_api_key/20250814_134104/agent-lingoace-zh-80-chat.jsonl").as_posix(),
52
  type=str
53
  )
54
  parser.add_argument(
 
43
  )
44
  parser.add_argument(
45
  "--eval_data_file",
46
+ default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
47
  type=str
48
  )
49
  parser.add_argument(
50
  "--output_file",
51
+ default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-80-chat.jsonl").as_posix(),
52
  type=str
53
  )
54
  parser.add_argument(
examples/tokenization/byteplus/step_1_get_by_api.py CHANGED
@@ -66,12 +66,13 @@ def main():
66
  "Content-Type": "application/json",
67
  "Authorization": f"Bearer {api_key}"
68
  }
 
69
  data = {
70
  # "model": args.model_name,
71
- # "model": "doubao-pro-32k-241215",
72
  # "model": "seed-1-6-250615",
73
  # "model": "seed-1-6-flash-250615",
74
- "model": "seed-1-6-flash-250715",
75
 
76
  "text": [
77
  "天空为什么这么蓝",
 
66
  "Content-Type": "application/json",
67
  "Authorization": f"Bearer {api_key}"
68
  }
69
+
70
  data = {
71
  # "model": args.model_name,
72
+ "model": "doubao-pro-32k-241215",
73
  # "model": "seed-1-6-250615",
74
  # "model": "seed-1-6-flash-250615",
75
+ # "model": "seed-1-6-flash-250715",
76
 
77
  "text": [
78
  "天空为什么这么蓝",
llm_eval_script/byteplus.py CHANGED
@@ -38,13 +38,19 @@ def get_args():
38
 
39
  deepseek-v3
40
  deepseek-v3-250324
 
 
 
 
41
  """
42
  parser = argparse.ArgumentParser()
43
  parser.add_argument(
44
  "--model_name",
45
  # default="seed-1-6-250615",
46
- default="seed-1-6-flash-250615",
47
  # default="deepseek-v3-250324",
 
 
48
  type=str
49
  )
50
  parser.add_argument(
 
38
 
39
  deepseek-v3
40
  deepseek-v3-250324
41
+
42
+ skylark-pro-250415
43
+ skylark-lite-250215
44
+
45
  """
46
  parser = argparse.ArgumentParser()
47
  parser.add_argument(
48
  "--model_name",
49
  # default="seed-1-6-250615",
50
+ # default="seed-1-6-flash-250615",
51
  # default="deepseek-v3-250324",
52
+ # default="skylark-pro-250415",
53
+ default="skylark-lite-250215",
54
  type=str
55
  )
56
  parser.add_argument(
llm_eval_script/byteplus_chat.py CHANGED
@@ -44,8 +44,10 @@ def get_args():
44
  parser.add_argument(
45
  "--model_name",
46
  # default="seed-1-6-250615",
47
- default="seed-1-6-flash-250615",
48
  # default="deepseek-v3-250324",
 
 
49
  type=str
50
  )
51
  parser.add_argument(
 
44
  parser.add_argument(
45
  "--model_name",
46
  # default="seed-1-6-250615",
47
+ # default="seed-1-6-flash-250615",
48
  # default="deepseek-v3-250324",
49
+ default="skylark-pro-250415",
50
+ # default="skylark-lite-250215",
51
  type=str
52
  )
53
  parser.add_argument(