HoneyTian commited on
Commit
4fb65fb
·
1 Parent(s): 102bd7c
.gitignore CHANGED
@@ -5,6 +5,7 @@
5
  #/data/
6
  /data/comment
7
  #/data/eval_data
 
8
  /data/raw_dataset
9
  /dotenv/
10
  /logs/
 
5
  #/data/
6
  /data/comment
7
  #/data/eval_data
8
+ data/llm-log
9
  /data/raw_dataset
10
  /dotenv/
11
  /logs/
data/eval_data/byteplus/byteplus/seed-1-6-250615/shenzhen_sase/byteplus_api_key/20250728_113641/arc-easy-1000-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e98cb6d61aa488ab7182e77412ce5714fdb36cff9d90c48c380fc9a076163d
3
+ size 397093
data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb441dc5ebeddd2c0d53d4e8f1919550f870e07cdcc9f4569eaec8a73464b287
3
+ size 1211343
data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/agent-lingoace-zh-80-chat.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5606f956ede82a224bff0430c7496192fc4c8fc3fce35ba703dfea1f7e9b4399
3
+ size 877608
data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/arc-easy-1000-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3c91dce22c3349d86618e64297edb8a5d2671fc7b11fed01f9da52a161e44f6
3
+ size 720488
data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_135005/agent-lingoace-zh-400-choice.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9df3d2954a107a163041528409232c7b578c085929758c4d8f80954548f7a54a
3
+ size 1211301
examples/test_metrics/lingoace_chat_metric.py CHANGED
@@ -43,12 +43,12 @@ python3 azure_openai.py --model_name gpt-4o-mini \
43
  )
44
  parser.add_argument(
45
  "--eval_data_file",
46
- default=(project_path / "data/eval_data/azure_openai/azure/gpt-4o/shenzhen_sase/west_us_chatgpt_openai_azure_com/20250723_170505/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
47
  type=str
48
  )
49
  parser.add_argument(
50
  "--output_file",
51
- default=(project_path / "data/eval_data/azure_openai/azure/gpt-4o/shenzhen_sase/west_us_chatgpt_openai_azure_com/20250723_170505/agent-lingoace-zh-80-chat.jsonl").as_posix(),
52
  type=str
53
  )
54
  parser.add_argument(
 
43
  )
44
  parser.add_argument(
45
  "--eval_data_file",
46
+ default=(project_path / "data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/agent-lingoace-zh-80-chat.jsonl.raw").as_posix(),
47
  type=str
48
  )
49
  parser.add_argument(
50
  "--output_file",
51
+ default=(project_path / "data/eval_data/siliconflow/siliconflow/deepseek-ai#DeepSeek-V3/shenzhen_sase/siliconflow_api_key/20250728_113641/agent-lingoace-zh-80-chat.jsonl").as_posix(),
52
  type=str
53
  )
54
  parser.add_argument(
llm_eval_script/byteplus.py CHANGED
@@ -5,6 +5,8 @@ https://docs.byteplus.com/en/docs/ModelArk/1099455
5
 
6
  model list
7
  https://docs.byteplus.com/en/docs/ModelArk/1330310
 
 
8
  """
9
  import argparse
10
  from datetime import datetime
@@ -24,10 +26,25 @@ from project_settings import environment, project_path
24
 
25
 
26
  def get_args():
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  parser = argparse.ArgumentParser()
28
  parser.add_argument(
29
  "--model_name",
30
- default="seedance-1-0-lite-t2v-250428",
 
 
31
  type=str
32
  )
33
  parser.add_argument(
@@ -55,6 +72,17 @@ def get_args():
55
  default="byteplus_api_key",
56
  type=str
57
  )
 
 
 
 
 
 
 
 
 
 
 
58
  args = parser.parse_args()
59
  return args
60
 
@@ -67,9 +95,13 @@ def main():
67
  eval_data_dir = Path(args.eval_data_dir)
68
  eval_data_dir.mkdir(parents=True, exist_ok=True)
69
 
70
- tz = ZoneInfo("Asia/Shanghai")
71
- now = datetime.now(tz)
72
- create_time_str = now.strftime("%Y%m%d_%H%M%S")
 
 
 
 
73
 
74
  eval_dataset = eval_dataset_dir / args.eval_dataset_name
75
 
@@ -78,7 +110,7 @@ def main():
78
 
79
  api_key = environment.get(args.service, dtype=str)
80
  client = OpenAI(
81
- base_url="https://ark.ap-southeast.bytepluses.com/api/v3",
82
  # Read your Ark API Key from the environment variable.
83
  api_key=api_key
84
  )
@@ -110,6 +142,8 @@ def main():
110
  finished_idx_set.add(idx)
111
 
112
  try:
 
 
113
  time_begin = time.time()
114
  completion = client.chat.completions.create(
115
  # Replace with your Inference Endpoint.
 
5
 
6
  model list
7
  https://docs.byteplus.com/en/docs/ModelArk/1330310
8
+
9
+ https://docs.byteplus.com/en/docs/ModelArk/Chat
10
  """
11
  import argparse
12
  from datetime import datetime
 
26
 
27
 
28
  def get_args():
29
+ """
30
+ model list:
31
+ https://docs.byteplus.com/en/docs/ModelArk/1330310
32
+
33
+ bytedance-seed-1.6
34
+ seed-1-6-250615
35
+
36
+ bytedance-seed-1.6-flash
37
+ seed-1-6-flash-250615
38
+
39
+ deepseek-v3
40
+ deepseek-v3-250324
41
+ """
42
  parser = argparse.ArgumentParser()
43
  parser.add_argument(
44
  "--model_name",
45
+ default="seed-1-6-250615",
46
+ # default="seed-1-6-flash-250615",
47
+ # default="deepseek-v3-250324",
48
  type=str
49
  )
50
  parser.add_argument(
 
72
  default="byteplus_api_key",
73
  type=str
74
  )
75
+ parser.add_argument(
76
+ "--create_time_str",
77
+ # default="null",
78
+ default="20250728_113641",
79
+ type=str
80
+ )
81
+ parser.add_argument(
82
+ "--interval",
83
+ default=1,
84
+ type=int
85
+ )
86
  args = parser.parse_args()
87
  return args
88
 
 
95
  eval_data_dir = Path(args.eval_data_dir)
96
  eval_data_dir.mkdir(parents=True, exist_ok=True)
97
 
98
+ if args.create_time_str == "null":
99
+ tz = ZoneInfo("Asia/Shanghai")
100
+ now = datetime.now(tz)
101
+ create_time_str = now.strftime("%Y%m%d_%H%M%S")
102
+ # create_time_str = "20250724_090615"
103
+ else:
104
+ create_time_str = args.create_time_str
105
 
106
  eval_dataset = eval_dataset_dir / args.eval_dataset_name
107
 
 
110
 
111
  api_key = environment.get(args.service, dtype=str)
112
  client = OpenAI(
113
+ base_url="https://ark.ap-southeast.bytepluses.com/api/v3/",
114
  # Read your Ark API Key from the environment variable.
115
  api_key=api_key
116
  )
 
142
  finished_idx_set.add(idx)
143
 
144
  try:
145
+ time.sleep(args.interval)
146
+ print(f"sleep: {args.interval}")
147
  time_begin = time.time()
148
  completion = client.chat.completions.create(
149
  # Replace with your Inference Endpoint.
llm_eval_script/siliconflow.py CHANGED
@@ -35,6 +35,7 @@ Model Name:
35
  Qwen/Qwen3-8B
36
  deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
37
  deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
 
38
  Tips:
39
  (1)为了让它只输出一个字符,设置 max_tokens=1
40
 
@@ -69,7 +70,8 @@ def get_args():
69
  "--model_name",
70
  # default="Pro/deepseek-ai/DeepSeek-R1",
71
  # default="tencent/Hunyuan-A13B-Instruct",
72
- default="Qwen/Qwen3-8B",
 
73
  # default="deepseek-ai/DeepSeek-R1",
74
  # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
75
  # default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
@@ -105,11 +107,12 @@ def get_args():
105
  parser.add_argument(
106
  "--create_time_str",
107
  default="null",
 
108
  type=str
109
  )
110
  parser.add_argument(
111
  "--interval",
112
- default=10,
113
  type=int
114
  )
115
  args = parser.parse_args()
 
35
  Qwen/Qwen3-8B
36
  deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
37
  deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
38
+ deepseek-ai/DeepSeek-V3
39
  Tips:
40
  (1)为了让它只输出一个字符,设置 max_tokens=1
41
 
 
70
  "--model_name",
71
  # default="Pro/deepseek-ai/DeepSeek-R1",
72
  # default="tencent/Hunyuan-A13B-Instruct",
73
+ default="deepseek-ai/DeepSeek-V3",
74
+ # default="Qwen/Qwen3-8B",
75
  # default="deepseek-ai/DeepSeek-R1",
76
  # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
77
  # default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
 
107
  parser.add_argument(
108
  "--create_time_str",
109
  default="null",
110
+ # default="20250728_113641",
111
  type=str
112
  )
113
  parser.add_argument(
114
  "--interval",
115
+ default=1,
116
  type=int
117
  )
118
  args = parser.parse_args()
llm_eval_script/siliconflow_chat.py CHANGED
@@ -69,10 +69,11 @@ def get_args():
69
  "--model_name",
70
  # default="Pro/deepseek-ai/DeepSeek-R1",
71
  # default="tencent/Hunyuan-A13B-Instruct",
 
72
  # default="Qwen/Qwen3-8B",
73
  # default="deepseek-ai/DeepSeek-R1",
74
  # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
75
- default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
76
  # default="baidu/ERNIE-4.5-300B-A47B",
77
  type=str
78
  )
@@ -103,7 +104,8 @@ def get_args():
103
  )
104
  parser.add_argument(
105
  "--create_time_str",
106
- default="null",
 
107
  type=str
108
  )
109
  parser.add_argument(
 
69
  "--model_name",
70
  # default="Pro/deepseek-ai/DeepSeek-R1",
71
  # default="tencent/Hunyuan-A13B-Instruct",
72
+ default="deepseek-ai/DeepSeek-V3",
73
  # default="Qwen/Qwen3-8B",
74
  # default="deepseek-ai/DeepSeek-R1",
75
  # default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
76
+ # default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
77
  # default="baidu/ERNIE-4.5-300B-A47B",
78
  type=str
79
  )
 
104
  )
105
  parser.add_argument(
106
  "--create_time_str",
107
+ # default="null",
108
+ default="20250728_113641",
109
  type=str
110
  )
111
  parser.add_argument(