Spaces:
Sleeping
Sleeping
update
Browse files- .gitignore +1 -0
- data/eval_data/byteplus/byteplus/seed-1-6-250615/shenzhen_sase/byteplus_api_key/20250728_113641/arc-easy-1000-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/agent-lingoace-zh-400-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/agent-lingoace-zh-80-chat.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/arc-easy-1000-choice.jsonl +3 -0
- data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_135005/agent-lingoace-zh-400-choice.jsonl +3 -0
- examples/test_metrics/lingoace_chat_metric.py +2 -2
- llm_eval_script/byteplus.py +39 -5
- llm_eval_script/siliconflow.py +5 -2
- llm_eval_script/siliconflow_chat.py +4 -2
.gitignore
CHANGED
@@ -5,6 +5,7 @@
|
|
5 |
#/data/
|
6 |
/data/comment
|
7 |
#/data/eval_data
|
|
|
8 |
/data/raw_dataset
|
9 |
/dotenv/
|
10 |
/logs/
|
|
|
5 |
#/data/
|
6 |
/data/comment
|
7 |
#/data/eval_data
|
8 |
+
data/llm-log
|
9 |
/data/raw_dataset
|
10 |
/dotenv/
|
11 |
/logs/
|
data/eval_data/byteplus/byteplus/seed-1-6-250615/shenzhen_sase/byteplus_api_key/20250728_113641/arc-easy-1000-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49e98cb6d61aa488ab7182e77412ce5714fdb36cff9d90c48c380fc9a076163d
|
3 |
+
size 397093
|
data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb441dc5ebeddd2c0d53d4e8f1919550f870e07cdcc9f4569eaec8a73464b287
|
3 |
+
size 1211343
|
data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/agent-lingoace-zh-80-chat.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5606f956ede82a224bff0430c7496192fc4c8fc3fce35ba703dfea1f7e9b4399
|
3 |
+
size 877608
|
data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/arc-easy-1000-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3c91dce22c3349d86618e64297edb8a5d2671fc7b11fed01f9da52a161e44f6
|
3 |
+
size 720488
|
data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_135005/agent-lingoace-zh-400-choice.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9df3d2954a107a163041528409232c7b578c085929758c4d8f80954548f7a54a
|
3 |
+
size 1211301
|
examples/test_metrics/lingoace_chat_metric.py
CHANGED
@@ -43,12 +43,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
|
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--eval_data_file",
|
46 |
-
default=(project_path / "data/eval_data/
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
50 |
"--output_file",
|
51 |
-
default=(project_path / "data/eval_data/
|
52 |
type=str
|
53 |
)
|
54 |
parser.add_argument(
|
|
|
43 |
)
|
44 |
parser.add_argument(
|
45 |
"--eval_data_file",
|
46 |
+
default=(project_path / "data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
|
47 |
type=str
|
48 |
)
|
49 |
parser.add_argument(
|
50 |
"--output_file",
|
51 |
+
default=(project_path / "data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/agent-lingoace-zh-80-chat.jsonl").as_posix(),
|
52 |
type=str
|
53 |
)
|
54 |
parser.add_argument(
|
llm_eval_script/byteplus.py
CHANGED
@@ -5,6 +5,8 @@ https://docs.byteplus.com/en/docs/ModelArk/1099455
|
|
5 |
|
6 |
model list
|
7 |
https://docs.byteplus.com/en/docs/ModelArk/1330310
|
|
|
|
|
8 |
"""
|
9 |
import argparse
|
10 |
from datetime import datetime
|
@@ -24,10 +26,25 @@ from project_settings import environment, project_path
|
|
24 |
|
25 |
|
26 |
def get_args():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
parser = argparse.ArgumentParser()
|
28 |
parser.add_argument(
|
29 |
"--model_name",
|
30 |
-
default="
|
|
|
|
|
31 |
type=str
|
32 |
)
|
33 |
parser.add_argument(
|
@@ -55,6 +72,17 @@ def get_args():
|
|
55 |
default="byteplus_api_key",
|
56 |
type=str
|
57 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
args = parser.parse_args()
|
59 |
return args
|
60 |
|
@@ -67,9 +95,13 @@ def main():
|
|
67 |
eval_data_dir = Path(args.eval_data_dir)
|
68 |
eval_data_dir.mkdir(parents=True, exist_ok=True)
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
|
|
73 |
|
74 |
eval_dataset = eval_dataset_dir / args.eval_dataset_name
|
75 |
|
@@ -78,7 +110,7 @@ def main():
|
|
78 |
|
79 |
api_key = environment.get(args.service, dtype=str)
|
80 |
client = OpenAI(
|
81 |
-
base_url="https://ark.ap-southeast.bytepluses.com/api/v3",
|
82 |
# Read your Ark API Key from the environment variable.
|
83 |
api_key=api_key
|
84 |
)
|
@@ -110,6 +142,8 @@ def main():
|
|
110 |
finished_idx_set.add(idx)
|
111 |
|
112 |
try:
|
|
|
|
|
113 |
time_begin = time.time()
|
114 |
completion = client.chat.completions.create(
|
115 |
# Replace with your Inference Endpoint.
|
|
|
5 |
|
6 |
model list
|
7 |
https://docs.byteplus.com/en/docs/ModelArk/1330310
|
8 |
+
|
9 |
+
https://docs.byteplus.com/en/docs/ModelArk/Chat
|
10 |
"""
|
11 |
import argparse
|
12 |
from datetime import datetime
|
|
|
26 |
|
27 |
|
28 |
def get_args():
|
29 |
+
"""
|
30 |
+
model list:
|
31 |
+
https://docs.byteplus.com/en/docs/ModelArk/1330310
|
32 |
+
|
33 |
+
bytedance-seed-1.6
|
34 |
+
seed-1-6-250615
|
35 |
+
|
36 |
+
bytedance-seed-1.6-flash
|
37 |
+
seed-1-6-flash-250615
|
38 |
+
|
39 |
+
deepseek-v3
|
40 |
+
deepseek-v3-250324
|
41 |
+
"""
|
42 |
parser = argparse.ArgumentParser()
|
43 |
parser.add_argument(
|
44 |
"--model_name",
|
45 |
+
default="seed-1-6-250615",
|
46 |
+
# default="seed-1-6-flash-250615",
|
47 |
+
# default="deepseek-v3-250324",
|
48 |
type=str
|
49 |
)
|
50 |
parser.add_argument(
|
|
|
72 |
default="byteplus_api_key",
|
73 |
type=str
|
74 |
)
|
75 |
+
parser.add_argument(
|
76 |
+
"--create_time_str",
|
77 |
+
# default="null",
|
78 |
+
default="20250728_113641",
|
79 |
+
type=str
|
80 |
+
)
|
81 |
+
parser.add_argument(
|
82 |
+
"--interval",
|
83 |
+
default=1,
|
84 |
+
type=int
|
85 |
+
)
|
86 |
args = parser.parse_args()
|
87 |
return args
|
88 |
|
|
|
95 |
eval_data_dir = Path(args.eval_data_dir)
|
96 |
eval_data_dir.mkdir(parents=True, exist_ok=True)
|
97 |
|
98 |
+
if args.create_time_str == "null":
|
99 |
+
tz = ZoneInfo("Asia/Shanghai")
|
100 |
+
now = datetime.now(tz)
|
101 |
+
create_time_str = now.strftime("%Y%m%d_%H%M%S")
|
102 |
+
# create_time_str = "20250724_090615"
|
103 |
+
else:
|
104 |
+
create_time_str = args.create_time_str
|
105 |
|
106 |
eval_dataset = eval_dataset_dir / args.eval_dataset_name
|
107 |
|
|
|
110 |
|
111 |
api_key = environment.get(args.service, dtype=str)
|
112 |
client = OpenAI(
|
113 |
+
base_url="https://ark.ap-southeast.bytepluses.com/api/v3/",
|
114 |
# Read your Ark API Key from the environment variable.
|
115 |
api_key=api_key
|
116 |
)
|
|
|
142 |
finished_idx_set.add(idx)
|
143 |
|
144 |
try:
|
145 |
+
time.sleep(args.interval)
|
146 |
+
print(f"sleep: {args.interval}")
|
147 |
time_begin = time.time()
|
148 |
completion = client.chat.completions.create(
|
149 |
# Replace with your Inference Endpoint.
|
llm_eval_script/siliconflow.py
CHANGED
@@ -35,6 +35,7 @@ Model Name:
|
|
35 |
Qwen/Qwen3-8B
|
36 |
deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
|
37 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
|
|
|
38 |
Tips:
|
39 |
(1)为了让它只输出一个字符,设置 max_tokens=1
|
40 |
|
@@ -69,7 +70,8 @@ def get_args():
|
|
69 |
"--model_name",
|
70 |
# default="Pro/deepseek-ai/DeepSeek-R1",
|
71 |
# default="tencent/Hunyuan-A13B-Instruct",
|
72 |
-
default="
|
|
|
73 |
# default="deepseek-ai/DeepSeek-R1",
|
74 |
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
75 |
# default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
@@ -105,11 +107,12 @@ def get_args():
|
|
105 |
parser.add_argument(
|
106 |
"--create_time_str",
|
107 |
default="null",
|
|
|
108 |
type=str
|
109 |
)
|
110 |
parser.add_argument(
|
111 |
"--interval",
|
112 |
-
default=
|
113 |
type=int
|
114 |
)
|
115 |
args = parser.parse_args()
|
|
|
35 |
Qwen/Qwen3-8B
|
36 |
deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
|
37 |
deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
|
38 |
+
deepseek-ai/DeepSeek-V3
|
39 |
Tips:
|
40 |
(1)为了让它只输出一个字符,设置 max_tokens=1
|
41 |
|
|
|
70 |
"--model_name",
|
71 |
# default="Pro/deepseek-ai/DeepSeek-R1",
|
72 |
# default="tencent/Hunyuan-A13B-Instruct",
|
73 |
+
default="deepseek-ai/DeepSeek-V3",
|
74 |
+
# default="Qwen/Qwen3-8B",
|
75 |
# default="deepseek-ai/DeepSeek-R1",
|
76 |
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
77 |
# default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
|
|
107 |
parser.add_argument(
|
108 |
"--create_time_str",
|
109 |
default="null",
|
110 |
+
# default="20250728_113641",
|
111 |
type=str
|
112 |
)
|
113 |
parser.add_argument(
|
114 |
"--interval",
|
115 |
+
default=1,
|
116 |
type=int
|
117 |
)
|
118 |
args = parser.parse_args()
|
llm_eval_script/siliconflow_chat.py
CHANGED
@@ -69,10 +69,11 @@ def get_args():
|
|
69 |
"--model_name",
|
70 |
# default="Pro/deepseek-ai/DeepSeek-R1",
|
71 |
# default="tencent/Hunyuan-A13B-Instruct",
|
|
|
72 |
# default="Qwen/Qwen3-8B",
|
73 |
# default="deepseek-ai/DeepSeek-R1",
|
74 |
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
75 |
-
default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
76 |
# default="baidu/ERNIE-4.5-300B-A47B",
|
77 |
type=str
|
78 |
)
|
@@ -103,7 +104,8 @@ def get_args():
|
|
103 |
)
|
104 |
parser.add_argument(
|
105 |
"--create_time_str",
|
106 |
-
default="null",
|
|
|
107 |
type=str
|
108 |
)
|
109 |
parser.add_argument(
|
|
|
69 |
"--model_name",
|
70 |
# default="Pro/deepseek-ai/DeepSeek-R1",
|
71 |
# default="tencent/Hunyuan-A13B-Instruct",
|
72 |
+
default="deepseek-ai/DeepSeek-V3",
|
73 |
# default="Qwen/Qwen3-8B",
|
74 |
# default="deepseek-ai/DeepSeek-R1",
|
75 |
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
76 |
+
# default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
77 |
# default="baidu/ERNIE-4.5-300B-A47B",
|
78 |
type=str
|
79 |
)
|
|
|
104 |
)
|
105 |
parser.add_argument(
|
106 |
"--create_time_str",
|
107 |
+
# default="null",
|
108 |
+
default="20250728_113641",
|
109 |
type=str
|
110 |
)
|
111 |
parser.add_argument(
|