Spaces:
Running
Running
update
Browse files- data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-cod-zh-70-chat.jsonl +3 -0
- data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-lingoace-zh-80-chat.jsonl +3 -0
- data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-200-chat.jsonl +3 -0
- data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-90-choice.jsonl +3 -0
- data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-cod-zh-70-chat.jsonl +3 -0
- data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-80-chat.jsonl +3 -0
- examples/make_raw_dataset/step_3_filter_by_keywords.py +34 -45
- examples/test_metrics/bingoplus_chat_metric.py +2 -2
- examples/test_metrics/cod_chat_metric.py +2 -6
- examples/test_metrics/lingoace_chat_metric.py +2 -2
- examples/tokenization/byteplus/step_1_get_by_api.py +3 -2
- llm_eval_script/byteplus.py +7 -1
- llm_eval_script/byteplus_chat.py +3 -1
data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e3a2567a913b1dc14fa6ea70adc8cab8f21de50fdb7004c392f021cb3aae218
|
3 |
+
size 2448697
|
data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2603d2a05a92ea2638f37542987c7f402bd8f049a92c7530b5db3a4eec13344
|
3 |
+
size 258528
|
data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-cod-zh-70-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1351fab4285ddb5f9157a53a7418a38dd69f45de308a9875efa03827ca9670c
|
3 |
+
size 308863
|
data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fe3ef2bb2dbde07ced2c9fd6b4621fd6e5126e859077f11aaea4560862dff01
|
3 |
+
size 1211263
|
data/eval_data/byteplus/byteplus/skylark-lite-250215/shenzhen_sase/byteplus_api_key/20250819_180217/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6848f25727166e024e2b88e6f8a256da2363a219700c371279bb05abcfea7613
|
3 |
+
size 876960
|
data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-200-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bb57b8e92f77983202f69ecaa498e7482881f843b9be6469796998e86ec2ec3
|
3 |
+
size 2437685
|
data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-90-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d57411a307c61095f919310595ba039eaf080698657038ad469d715e9592007
|
3 |
+
size 258551
|
data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-cod-zh-70-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a1a25a4c54e498a2ef46c863bf0d341fdcbfb210b5d80af4be36f86b20265b2
|
3 |
+
size 306012
|
data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdc386e8b56d6f9f098bd954067b91fb089d9aeaa53a356231155930eaaee1be
|
3 |
+
size 1211294
|
data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e44cf602fbf5ad5a7d1a4efb9394de7b3fb34aef61f55f309920e90ab4f547f
|
3 |
+
size 877036
|
examples/make_raw_dataset/step_3_filter_by_keywords.py
CHANGED
@@ -15,8 +15,8 @@ def get_args():
|
|
15 |
parser = argparse.ArgumentParser()
|
16 |
parser.add_argument(
|
17 |
"--dataset_dir",
|
18 |
-
|
19 |
-
default=(project_path / "data/llm-log-hk/extract-dataset/20250804").as_posix(),
|
20 |
# default=(project_path / "data/llm-log-usa/extract-dataset").as_posix(),
|
21 |
type=str
|
22 |
)
|
@@ -53,51 +53,40 @@ def main():
|
|
53 |
with open(response_file.as_posix(), "r", encoding="utf-8") as f:
|
54 |
response = f.read()
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
"
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
# "Chinese laser cutting",
|
74 |
-
]:
|
75 |
-
if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
|
76 |
-
print(f"process: {sample_dir.as_posix()}")
|
77 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-lingoace"
|
78 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-bingoplus"
|
79 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-cod"
|
80 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-andy"
|
81 |
-
tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-homepro"
|
82 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxpay"
|
83 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxmoney"
|
84 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-bank"
|
85 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-telecom"
|
86 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-retail"
|
87 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-exodus-automotive"
|
88 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-kta"
|
89 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-nxcloud"
|
90 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-vip"
|
91 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-fedex"
|
92 |
-
# tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-laser"
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
tgt_dir.as_posix(),
|
98 |
-
)
|
99 |
break
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
return
|
102 |
|
103 |
|
|
|
15 |
parser = argparse.ArgumentParser()
|
16 |
parser.add_argument(
|
17 |
"--dataset_dir",
|
18 |
+
default=(project_path / "data/llm-log-hk/extract-dataset").as_posix(),
|
19 |
+
# default=(project_path / "data/llm-log-hk/extract-dataset/20250804").as_posix(),
|
20 |
# default=(project_path / "data/llm-log-usa/extract-dataset").as_posix(),
|
21 |
type=str
|
22 |
)
|
|
|
53 |
with open(response_file.as_posix(), "r", encoding="utf-8") as f:
|
54 |
response = f.read()
|
55 |
|
56 |
+
filter_map = [
|
57 |
+
(["LingoAce"], "lingoace"),
|
58 |
+
(["BingoPlus"], "bingoplus"),
|
59 |
+
([" COD "], "cod"),
|
60 |
+
(["HomePro"], "homepro"),
|
61 |
+
(["andy"], "andy" ),
|
62 |
+
(["NXPay"], "nxpay"),
|
63 |
+
(["NX Money"], "nxmoney"),
|
64 |
+
(["Exodus Bank"], "exodus-bank"),
|
65 |
+
(["Exodus Telecom"], "exodus-telecom"),
|
66 |
+
(["Exodus Retail"], "exodus-retail"),
|
67 |
+
(["Exodus Automotive"], "exodus-automotive"),
|
68 |
+
(["kta kilat", "KTA KILAT"], "kta"),
|
69 |
+
(["作为VIP客户"], "vip"),
|
70 |
+
(["FedEx"], "fedex"),
|
71 |
+
(["Chinese laser cutting"], "laser"),
|
72 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
+
flag = False
|
75 |
+
for keys, suffix in filter_map:
|
76 |
+
if flag:
|
|
|
|
|
77 |
break
|
78 |
+
for key_str in keys:
|
79 |
+
if flag:
|
80 |
+
break
|
81 |
+
if system_prompt.__contains__(key_str) or user_prompt.__contains__(key_str):
|
82 |
+
print(f"process: {sample_dir.as_posix()}")
|
83 |
+
tgt_dir = dataset_dir / f"{data_dir.parts[-1]}-{suffix}"
|
84 |
+
tgt_dir.mkdir(parents=True, exist_ok=True)
|
85 |
+
shutil.move(
|
86 |
+
sample_dir.as_posix(),
|
87 |
+
tgt_dir.as_posix(),
|
88 |
+
)
|
89 |
+
flag = True
|
90 |
return
|
91 |
|
92 |
|
examples/test_metrics/bingoplus_chat_metric.py
CHANGED
@@ -38,12 +38,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
|
|
38 |
)
|
39 |
parser.add_argument(
|
40 |
"--eval_data_file",
|
41 |
-
default=(project_path / "data/eval_data/
|
42 |
type=str
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--output_file",
|
46 |
-
default=(project_path / "data/eval_data/
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
|
|
38 |
)
|
39 |
parser.add_argument(
|
40 |
"--eval_data_file",
|
41 |
+
default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-200-chat.jsonl.raw").as_posix(),
|
42 |
type=str
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--output_file",
|
46 |
+
default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-bingoplus-ph-200-chat.jsonl").as_posix(),
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
examples/test_metrics/cod_chat_metric.py
CHANGED
@@ -38,16 +38,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
|
|
38 |
)
|
39 |
parser.add_argument(
|
40 |
"--eval_data_file",
|
41 |
-
default=(project_path / "data/eval_data/
|
42 |
-
# default=(project_path / "data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-cod-zh-70-chat.jsonl.raw").as_posix(),
|
43 |
-
# default=(project_path / "data/eval_data/gemini_google/google/gemini-2.5-flash/shenzhen_sase/google_potent_veld_462405_t3/20250808_160530/agent-cod-zh-70-chat.jsonl.raw").as_posix(),
|
44 |
type=str
|
45 |
)
|
46 |
parser.add_argument(
|
47 |
"--output_file",
|
48 |
-
default=(project_path / "data/eval_data/
|
49 |
-
# default=(project_path / "data/eval_data/byteplus/byteplus/seed-1-6-flash-250615/shenzhen_sase/byteplus_api_key/20250728_113641/agent-cod-zh-70-chat.jsonl").as_posix(),
|
50 |
-
# default=(project_path / "data/eval_data/gemini_google/google/gemini-2.5-flash/shenzhen_sase/google_potent_veld_462405_t3/20250808_160530/agent-cod-zh-70-chat.jsonl").as_posix(),
|
51 |
type=str
|
52 |
)
|
53 |
parser.add_argument(
|
|
|
38 |
)
|
39 |
parser.add_argument(
|
40 |
"--eval_data_file",
|
41 |
+
default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-cod-zh-70-chat.jsonl.raw").as_posix(),
|
|
|
|
|
42 |
type=str
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--output_file",
|
46 |
+
default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-cod-zh-70-chat.jsonl").as_posix(),
|
|
|
|
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
examples/test_metrics/lingoace_chat_metric.py
CHANGED
@@ -43,12 +43,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
|
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--eval_data_file",
|
46 |
-
default=(project_path / "data/eval_data/
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
50 |
"--output_file",
|
51 |
-
default=(project_path / "data/eval_data/
|
52 |
type=str
|
53 |
)
|
54 |
parser.add_argument(
|
|
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--eval_data_file",
|
46 |
+
default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
50 |
"--output_file",
|
51 |
+
default=(project_path / "data/eval_data/byteplus/byteplus/skylark-pro-250415/shenzhen_sase/byteplus_api_key/20250819_171424/agent-lingoace-zh-80-chat.jsonl").as_posix(),
|
52 |
type=str
|
53 |
)
|
54 |
parser.add_argument(
|
examples/tokenization/byteplus/step_1_get_by_api.py
CHANGED
@@ -66,12 +66,13 @@ def main():
|
|
66 |
"Content-Type": "application/json",
|
67 |
"Authorization": f"Bearer {api_key}"
|
68 |
}
|
|
|
69 |
data = {
|
70 |
# "model": args.model_name,
|
71 |
-
|
72 |
# "model": "seed-1-6-250615",
|
73 |
# "model": "seed-1-6-flash-250615",
|
74 |
-
"model": "seed-1-6-flash-250715",
|
75 |
|
76 |
"text": [
|
77 |
"天空为什么这么蓝",
|
|
|
66 |
"Content-Type": "application/json",
|
67 |
"Authorization": f"Bearer {api_key}"
|
68 |
}
|
69 |
+
|
70 |
data = {
|
71 |
# "model": args.model_name,
|
72 |
+
"model": "doubao-pro-32k-241215",
|
73 |
# "model": "seed-1-6-250615",
|
74 |
# "model": "seed-1-6-flash-250615",
|
75 |
+
# "model": "seed-1-6-flash-250715",
|
76 |
|
77 |
"text": [
|
78 |
"天空为什么这么蓝",
|
llm_eval_script/byteplus.py
CHANGED
@@ -38,13 +38,19 @@ def get_args():
|
|
38 |
|
39 |
deepseek-v3
|
40 |
deepseek-v3-250324
|
|
|
|
|
|
|
|
|
41 |
"""
|
42 |
parser = argparse.ArgumentParser()
|
43 |
parser.add_argument(
|
44 |
"--model_name",
|
45 |
# default="seed-1-6-250615",
|
46 |
-
default="seed-1-6-flash-250615",
|
47 |
# default="deepseek-v3-250324",
|
|
|
|
|
48 |
type=str
|
49 |
)
|
50 |
parser.add_argument(
|
|
|
38 |
|
39 |
deepseek-v3
|
40 |
deepseek-v3-250324
|
41 |
+
|
42 |
+
skylark-pro-250415
|
43 |
+
skylark-lite-250215
|
44 |
+
|
45 |
"""
|
46 |
parser = argparse.ArgumentParser()
|
47 |
parser.add_argument(
|
48 |
"--model_name",
|
49 |
# default="seed-1-6-250615",
|
50 |
+
# default="seed-1-6-flash-250615",
|
51 |
# default="deepseek-v3-250324",
|
52 |
+
# default="skylark-pro-250415",
|
53 |
+
default="skylark-lite-250215",
|
54 |
type=str
|
55 |
)
|
56 |
parser.add_argument(
|
llm_eval_script/byteplus_chat.py
CHANGED
@@ -44,8 +44,10 @@ def get_args():
|
|
44 |
parser.add_argument(
|
45 |
"--model_name",
|
46 |
# default="seed-1-6-250615",
|
47 |
-
default="seed-1-6-flash-250615",
|
48 |
# default="deepseek-v3-250324",
|
|
|
|
|
49 |
type=str
|
50 |
)
|
51 |
parser.add_argument(
|
|
|
44 |
parser.add_argument(
|
45 |
"--model_name",
|
46 |
# default="seed-1-6-250615",
|
47 |
+
# default="seed-1-6-flash-250615",
|
48 |
# default="deepseek-v3-250324",
|
49 |
+
default="skylark-pro-250415",
|
50 |
+
# default="skylark-lite-250215",
|
51 |
type=str
|
52 |
)
|
53 |
parser.add_argument(
|