HoneyTian commited on
Commit
ea948a7
·
1 Parent(s): 48b7537
Files changed (3) hide show
  1. .gitignore +1 -0
  2. main.py +262 -12
  3. main3.py +75 -0
.gitignore CHANGED
@@ -14,3 +14,4 @@
14
 
15
  #**/*.wav
16
  **/*.xlsx
 
 
14
 
15
  #**/*.wav
16
  **/*.xlsx
17
+ **/*.jsonl.raw
main.py CHANGED
@@ -14,14 +14,22 @@ llm_eval_system:v20250724_1442 \
14
  /bin/bash
15
  """
16
  import argparse
 
17
  import logging
 
18
  import platform
 
 
19
 
20
  import gradio as gr
 
 
21
 
22
  from project_settings import environment, project_path, log_directory
23
  from toolbox.os.command import Command
24
  import log
 
 
25
 
26
  log.setup_size_rotating(log_directory=log_directory)
27
 
@@ -39,26 +47,268 @@ def get_args():
39
  return args
40
 
41
 
42
- def shell(cmd: str):
43
- return Command.popen(cmd)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
 
46
  def main():
47
  args = get_args()
48
 
 
 
 
 
 
 
 
49
  # ui
50
- with gr.Blocks() as blocks:
51
  with gr.Tabs():
52
- with gr.TabItem("shell"):
53
- shell_text = gr.Textbox(label="cmd")
54
- shell_button = gr.Button("run")
55
- shell_output = gr.Textbox(label="output", max_lines=100)
56
-
57
- shell_button.click(
58
- shell,
59
- inputs=[shell_text, ],
60
- outputs=[shell_output],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # http://127.0.0.1:7861/
64
  # http://10.75.27.247:7861/
 
14
  /bin/bash
15
  """
16
  import argparse
17
+ import json
18
  import logging
19
+ from pathlib import Path
20
  import platform
21
+ from typing import Tuple, List
22
+ import time
23
 
24
  import gradio as gr
25
+ import numpy as np
26
+ import pandas as pd
27
 
28
  from project_settings import environment, project_path, log_directory
29
  from toolbox.os.command import Command
30
  import log
31
+ from tabs.fs_tab import get_fs_tab
32
+ from tabs.shell_tab import get_shell_tab
33
 
34
  log.setup_size_rotating(log_directory=log_directory)
35
 
 
47
  return args
48
 
49
 
50
+ css = """
51
+ #dataset_df th:nth-child(1), #dataset_df td:nth-child(1) {
52
+ max-width: 50px !important; /* 第一列 */
53
+ }
54
+ #dataset_df th:nth-child(2), #dataset_df td:nth-child(2) {
55
+ max-width: 500px !important; /* 第二列 */
56
+ }
57
+ #dataset_df th:nth-child(3), #dataset_df td:nth-child(3) {
58
+ max-width: 50px !important; /* 第三列 */
59
+ }
60
+ """
61
+
62
+ temp = """
63
+
64
+ #view_chat_df th:nth-child(1), #view_chat_df td:nth-child(1) {
65
+ max-width: 50px !important; /* 第一列 */
66
+ }
67
+ #view_chat_df th:nth-child(2), #view_chat_df td:nth-child(2) {
68
+ max-width: 500px !important; /* 第二列 */
69
+ }
70
+ #view_chat_df th:nth-child(3), #view_chat_df td:nth-child(3) {
71
+ max-width: 500px !important; /* 第三列 */
72
+ }
73
+ #view_chat_df th:nth-child(4), #view_chat_df td:nth-child(4) {
74
+ max-width: 500px !important; /* 第四列 */
75
+ }
76
+ #view_chat_df th:nth-child(5), #view_chat_df td:nth-child(5) {
77
+ max-width: 500px !important; /* 第五列 */
78
+ }
79
+ #view_chat_df th:nth-child(6), #view_chat_df td:nth-child(6) {
80
+ max-width: 100px !important; /* 第六列 */
81
+ }
82
+ """
83
+
84
+ eval_data_dir: Path = None
85
+ llm_ranking: pd.DataFrame = None
86
+ last_update_ts: float = 0
87
+ update_interval = 1 * 60 * 60
88
+
89
+
90
+ def load_board():
91
+ result = list()
92
+ for filename in eval_data_dir.glob("**/*.jsonl"):
93
+ name = filename.stem
94
+ dataset = filename.parts[-1]
95
+ date = filename.parts[-2]
96
+ service = filename.parts[-3]
97
+ client = filename.parts[-4]
98
+ model_name = filename.parts[-5]
99
+ company = filename.parts[-6]
100
+ script = filename.parts[-7]
101
+
102
+ if date.endswith("-delete"):
103
+ continue
104
+ # if name.endswith("-chat"):
105
+ # continue
106
+
107
+ score_list = list()
108
+ time_cost_list = list()
109
+ total = 0
110
+
111
+ with open(filename.as_posix(), "r", encoding="utf-8") as f:
112
+ for row in f:
113
+ row = json.loads(row)
114
+ if name.endswith("-choice"):
115
+ score_ = row["correct"]
116
+ elif name.endswith("-chat"):
117
+ score_ = row["score"]
118
+ else:
119
+ raise AssertionError
120
+
121
+ time_cost_ = row["time_cost"]
122
+
123
+ score_list.append(score_)
124
+ time_cost_list.append(time_cost_)
125
+ total += 1
126
+
127
+ if total == 0:
128
+ continue
129
+ score = np.mean(score_list)
130
+ time_cost_mean = np.mean(time_cost_list)
131
+ time_cost_var = np.var(time_cost_list)
132
+
133
+ time_cost_p75 = np.percentile(time_cost_list, 95)
134
+ time_cost_p95 = np.percentile(time_cost_list, 95)
135
+ time_cost_p99 = np.percentile(time_cost_list, 99)
136
+
137
+ row_ = {
138
+ "company": company,
139
+ "model_name": model_name,
140
+ "dataset": dataset,
141
+ "score": round(score, 4),
142
+ "time_cost(mean)": round(time_cost_mean, 4),
143
+ "time_cost(var)": round(time_cost_var, 4),
144
+ "time_cost(75%)": round(time_cost_p75, 4),
145
+ "time_cost(95%)": round(time_cost_p95, 4),
146
+ "time_cost(99%)": round(time_cost_p99, 4),
147
+ "service": service,
148
+ "client": client,
149
+ "script": f"{script}.py",
150
+ "version": date,
151
+ "count": total,
152
+ }
153
+ result.append(row_)
154
+ result = pd.DataFrame(result)
155
+ return result
156
+
157
+
158
+ def load_board_lazy():
159
+ global llm_ranking
160
+ global last_update_ts
161
+
162
+ now = time.time()
163
+ if now - last_update_ts > update_interval:
164
+ llm_ranking = load_board()
165
+ last_update_ts = now
166
+
167
+ return llm_ranking
168
+
169
+
170
+ def when_click_board_button(columns: List[str]):
171
+ result = load_board_lazy()
172
+
173
+ try:
174
+ result = result[columns]
175
+ except KeyError as e:
176
+ raise gr.Error(f"{str(e)}, columns: {list(result.columns)}")
177
+ return result
178
+
179
+
180
+ def when_click_view_dataset_button(filename: str):
181
+ filename = (project_path / filename).as_posix()
182
+ result = list()
183
+ with open(filename, "r", encoding="utf-8") as f:
184
+ for row in f:
185
+ row = json.loads(row)
186
+ result.append(row)
187
+ result = pd.DataFrame(result)
188
+ return result
189
+
190
+
191
+ def when_click_view_chat_button(filename: str):
192
+ filename = (project_path / filename).as_posix()
193
+ result = list()
194
+ with open(filename, "r", encoding="utf-8") as f:
195
+ for row in f:
196
+ row = json.loads(row)
197
+
198
+ idx = row["idx"]
199
+ prompt: str = row["prompt"]
200
+ conversation = prompt.split("\n\n")[-1].strip()
201
+ response = row["response"]
202
+ prediction = row["prediction"]
203
+ evaluate = row["evaluate"]
204
+ score = row["score"]
205
+
206
+ row_ = {
207
+ "idx": idx,
208
+ "conversation": conversation,
209
+ "response": response,
210
+ "prediction": prediction,
211
+ "evaluate": json.dumps(evaluate, ensure_ascii=False, indent=4),
212
+ "score": score,
213
+ }
214
+ result.append(row_)
215
+ result = pd.DataFrame(result)
216
+ return result
217
+
218
+
219
+
220
+ board_columns_choices = [
221
+ "company", "model_name", "dataset", "score",
222
+ "time_cost(mean)",
223
+ "time_cost(var)",
224
+ "time_cost(75%)", "time_cost(95%)", "time_cost(99%)",
225
+ "service", "client",
226
+ "script", "version", "count"
227
+ ]
228
+ board_columns_choices_default_value = [
229
+ "company", "model_name", "dataset", "score",
230
+ "time_cost(mean)",
231
+ "time_cost(var)",
232
+ # "time_cost(75%)", "time_cost(95%)", "time_cost(99%)",
233
+ ]
234
+ dataset_examples_list = [
235
+ [
236
+ "arc-easy-1000-choice.jsonl",
237
+ "ARC(AI2 推理挑战赛)\nAI2 的推理挑战赛 (ARC) 数据集是一个多项选择题问答数据集,包含 3 年级至 9 年级的科学考试题目。\n该数据集分为两个部分:简单部分和挑战部分。\n\n从简单部分取前1000条作为 arc-easy-1000-choice.jsonl",
238
+ "data/dataset/arc-easy-1000-choice.jsonl"
239
+ ],
240
+ [
241
+ "agent-lingoace-zh-400-choice.jsonl",
242
+ "lingoace数据集。",
243
+ "data/dataset/agent-lingoace-zh-400-choice.jsonl"
244
+ ],
245
+ ]
246
 
247
 
248
  def main():
249
  args = get_args()
250
 
251
+ global eval_data_dir
252
+ global llm_ranking
253
+
254
+ eval_data_dir = Path(args.eval_data_dir)
255
+
256
+ llm_ranking_board = when_click_board_button(board_columns_choices_default_value)
257
+
258
  # ui
259
+ with gr.Blocks(css=css) as blocks:
260
  with gr.Tabs():
261
+ with gr.TabItem("board"):
262
+ board_columns = gr.CheckboxGroup(
263
+ choices=board_columns_choices,
264
+ value=board_columns_choices_default_value,
265
+ label="columns"
266
+ )
267
+ board_button = gr.Button(value="View", variant="primary", visible=True)
268
+ board_board = gr.DataFrame(value=llm_ranking_board, max_height=500, min_width=160, label="board", show_search="search")
269
+
270
+ board_button.click(
271
+ fn=when_click_board_button,
272
+ inputs=[board_columns],
273
+ outputs=[board_board],
274
+ )
275
+ with gr.TabItem("dataset"):
276
+ dataset_name = gr.Textbox(label="name")
277
+ dataset_desc = gr.Textbox(label="desc")
278
+ dataset_filename = gr.Textbox(label="filename")
279
+
280
+ gr.Examples(
281
+ examples=dataset_examples_list,
282
+ inputs=[dataset_name, dataset_desc, dataset_filename],
283
+ outputs=None,
284
+ )
285
+ dataset_button = gr.Button(value="View", variant="primary", visible=True)
286
+ dataset_df = gr.DataFrame(
287
+ value=None, label="dataset", interactive=True,
288
+ show_search="search",
289
+ elem_id="dataset_df"
290
  )
291
+ dataset_button.click(
292
+ fn=when_click_view_dataset_button,
293
+ inputs=[dataset_filename],
294
+ outputs=[dataset_df],
295
+ )
296
+ _ = get_fs_tab()
297
+ _ = get_shell_tab()
298
+ # with gr.TabItem("view_chat"):
299
+ # view_chat_filename = gr.Textbox(label="filename")
300
+ # with gr.Row():
301
+ # view_chat_button = gr.Button(value="View", variant="primary", visible=True)
302
+ # view_chat_df = gr.DataFrame(
303
+ # value=None, label="dataset", interactive=True,
304
+ # show_search="search",
305
+ # elem_id="view_chat_df"
306
+ # )
307
+ # view_chat_button.click(
308
+ # fn=when_click_view_chat_button,
309
+ # inputs=[view_chat_filename],
310
+ # outputs=[view_chat_df],
311
+ # )
312
 
313
  # http://127.0.0.1:7861/
314
  # http://10.75.27.247:7861/
main3.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ docker build -t llm_eval_system:v20250724_1442 .
5
+
6
+ docker stop llm_eval_system_7862 && docker rm llm_eval_system_7862
7
+
8
+ docker run -itd \
9
+ --name llm_eval_system_7862 \
10
+ --restart=always \
11
+ --network host \
12
+ -e port=7862 \
13
+ llm_eval_system:v20250724_1442 \
14
+ /bin/bash
15
+ """
16
+ import argparse
17
+ import logging
18
+ import platform
19
+
20
+ import gradio as gr
21
+
22
+ from project_settings import environment, project_path, log_directory
23
+ from toolbox.os.command import Command
24
+ import log
25
+
26
+ log.setup_size_rotating(log_directory=log_directory)
27
+
28
+ logger = logging.getLogger("main")
29
+
30
+
31
+ def get_args():
32
+ parser = argparse.ArgumentParser()
33
+ parser.add_argument(
34
+ "--eval_data_dir",
35
+ default=(project_path / "data/eval_data").as_posix(),
36
+ type=str,
37
+ )
38
+ args = parser.parse_args()
39
+ return args
40
+
41
+
42
+ def shell(cmd: str):
43
+ return Command.popen(cmd)
44
+
45
+
46
+ def main():
47
+ args = get_args()
48
+
49
+ # ui
50
+ with gr.Blocks() as blocks:
51
+ with gr.Tabs():
52
+ with gr.TabItem("shell"):
53
+ shell_text = gr.Textbox(label="cmd")
54
+ shell_button = gr.Button("run")
55
+ shell_output = gr.Textbox(label="output", max_lines=100)
56
+
57
+ shell_button.click(
58
+ shell,
59
+ inputs=[shell_text, ],
60
+ outputs=[shell_output],
61
+ )
62
+
63
+ # http://127.0.0.1:7861/
64
+ # http://10.75.27.247:7861/
65
+ blocks.queue().launch(
66
+ share=False if platform.system() == "Windows" else False,
67
+ server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
68
+ # server_name="0.0.0.0",
69
+ server_port=environment.get("port", 7860, dtype=int),
70
+ )
71
+ return
72
+
73
+
74
+ if __name__ == "__main__":
75
+ main()