huckiyang commited on
Commit
1fedf85
·
1 Parent(s): d94c0cf

[task 2] fix

Browse files
Files changed (1) hide show
  1. app.py +234 -129
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import json
3
  import datetime
 
4
  from email.utils import parseaddr
5
 
6
  import gradio as gr
@@ -13,52 +14,122 @@ from huggingface_hub import HfApi, login
13
  from scorer import instruction_scorer
14
  from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
15
 
 
 
 
 
 
 
16
  TOKEN = os.environ.get("TOKEN", None)
17
  if TOKEN:
18
- login(token=TOKEN)
 
 
 
 
19
  else:
20
- print("Token not found. Please set the HUGGINGFACE_TOKEN environment variable.")
21
 
 
22
  OWNER="Taejin"
23
  REF_JSONS_SET = f"{OWNER}/speaker_tagging_lb_refs"
24
  RESULTS_DATASET = f"{OWNER}/spk_tag_results"
25
  LEADERBOARD_PATH = f"{OWNER}/leaderboard"
26
  SUBMISSION_DATASET = f"{OWNER}/submission_leaderboard"
27
- api = HfApi()
28
-
29
  YEAR_VERSION = "2024"
30
 
 
 
 
 
31
  def read_json_file(filepath):
32
- with open(filepath) as infile:
33
- data_dict = json.load(infile)
34
- return data_dict
 
 
 
 
35
 
36
  def save_json_file(filepath, data_dict):
37
- with open(filepath, "w") as outfile:
38
- json.dump(data_dict, outfile)
39
-
 
 
 
 
 
 
40
  os.makedirs("scored", exist_ok=True)
41
- print(f"Token loaded-1 : {TOKEN}")
42
- results_data_files = {"dev": "dev_set_data.csv", "eval": "eval_set_data.csv"}
43
- results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
44
-
45
- print(f"Token loaded-2 : {TOKEN}")
46
- ref_json_files = {"dev_src": "err_dev.src.seglst.json", "dev_ref": "err_dev.ref.seglst.json", "eval_src": "err_eval.src.seglst.json", "eval_ref": "err_eval.ref.seglst.json"}
47
- ref_jsons = load_dataset(REF_JSONS_SET, data_files=ref_json_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  def get_dataframe_from_results(results, split):
50
- df = results[split].to_pandas()
51
- df = df.sort_values(by=["cpWER"], ascending=True)
52
- return df
53
-
 
 
 
 
54
 
55
  def restart_space():
56
- api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
 
 
 
 
 
57
 
58
  TYPES = ["markdown", "markdown", "markdown", "number", "number", "number"]
59
 
60
- dev_dataset_dataframe= get_dataframe_from_results(results=results, split="dev")
61
- eval_dataset_dataframe= get_dataframe_from_results(results=results, split="eval")
 
 
 
 
 
 
 
62
 
63
  def add_new_eval(
64
  system_name: str,
@@ -67,139 +138,165 @@ def add_new_eval(
67
  organisation: str,
68
  mail: str,
69
  ):
70
- print(f"printing all inputs system_name: {system_name}, method: {method}, path_to_file: {path_to_file}, organisation: {organisation}, mail: {mail}")
71
 
72
- if len(system_name)==0:
73
- print("system_name none")
 
74
  raise gr.Error("Please provide a system_name name. Field empty!")
75
 
76
- if len(method)==0:
77
- print("method none")
78
  raise gr.Error("Please provide a method. Field empty!")
79
 
80
- if len(organisation)==0:
81
- print("org none")
82
  raise gr.Error("Please provide organisation information. Field empty!")
83
 
84
- # Very basic email parsing
85
  _, parsed_mail = parseaddr(mail)
86
  if not "@" in parsed_mail:
87
- print("email here")
88
  raise gr.Error("Please provide a valid email address.")
89
 
90
- # Check if the combination system_name/org already exists and prints a warning message if yes
91
- # if system_name.lower() in set([m.lower() for m in results["dev"]["System_name"]]) and organisation.lower() in set([o.lower() for o in results["dev"]["Organisation"]]):
92
- # print("system_name org combo here")
93
- # raise gr.Error("This system_name has been already submitted.")
94
-
95
  if path_to_file is None:
96
- print("file missing here")
97
  raise gr.Error("Please attach a file.")
98
 
99
-
100
- # Save submitted file
101
- time_atm = datetime.datetime.today()
102
- api.upload_file(
103
- repo_id=SUBMISSION_DATASET,
104
- path_or_fileobj=path_to_file.name,
105
- path_in_repo=f"{organisation}/{system_name}/{YEAR_VERSION}_raw_{time_atm}.json",
106
- repo_type="dataset",
107
- token=TOKEN
108
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- # Compute score
111
- if "err_dev.hyp.seglst.json" in path_to_file.name:
112
- ref_file_path="seglst_files/err_dev.ref.seglst.json"
113
- mode = "dev"
114
- elif "err_eval.hyp.seglst.json" in path_to_file.name:
115
- ref_file_path="seglst_files/err_eval.ref.seglst.json"
116
- mode = "eval"
117
- else:
118
- basename = os.path.basename(path_to_file.name)
119
- raise gr.Error(f"{basename} is NOT a valid name. It should be either err_dev.hyp.seglst.json or err_eval.hyp.seglst.json")
120
- scores = instruction_scorer(file_path_input= path_to_file.name, ref_file_path=ref_file_path, system_name=system_name)
121
-
122
- path_or_fileobj=f"scored/{organisation}_{system_name}.json"
123
- scores_and_info = {
124
  "system_name": system_name,
125
- "method":method,
126
  "organisation": organisation,
127
  "email": mail,
128
  "cpWER": scores["cpWER"],
129
  "errors": scores["errors"],
130
  "length": scores["length"],
131
  }
132
- save_json_file(path_or_fileobj, data_dict=scores_and_info)
133
-
134
- # Save scored file
135
- api.upload_file(
136
- repo_id=SUBMISSION_DATASET,
137
- path_or_fileobj=path_or_fileobj,
138
- path_in_repo=f"{organisation}/{system_name}/{YEAR_VERSION}_scored_{time_atm}.json",
139
- repo_type="dataset",
140
- token=TOKEN
141
- )
142
-
143
- # Actual submission
144
- eval_entry = {
145
- "system_name": system_name,
146
- "method":method,
147
- "organisation": organisation,
148
- "cpWER":scores["cpWER"],
149
- "errors":scores["errors"],
150
- "length":scores["length"],
151
- }
152
-
153
- if mode == "dev":
154
- dev_set_data_csv = "dev_set_data.csv"
155
- dev_dataset_dataframe = get_dataframe_from_results(results=results, split="dev")
156
- dev_dataset_dataframe = pd.concat([dev_dataset_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
157
- dev_dataset_dataframe.to_csv(dev_set_data_csv, index=False)
158
 
 
 
159
  api.upload_file(
160
- repo_id=RESULTS_DATASET,
161
- path_or_fileobj=dev_set_data_csv,
162
- path_in_repo=dev_set_data_csv,
163
  repo_type="dataset",
164
  token=TOKEN
165
  )
166
- elif mode == "eval":
167
- eval_set_data_csv = "eval_set_data.csv"
168
- eval_dataset_dataframe = get_dataframe_from_results(results=results, split="eval")
169
- eval_dataset_dataframe = pd.concat([eval_dataset_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
170
- eval_dataset_dataframe.to_csv(eval_set_data_csv, index=False)
171
-
172
- api.upload_file(
173
- repo_id=RESULTS_DATASET,
174
- path_or_fileobj=eval_set_data_csv,
175
- path_in_repo=eval_set_data_csv,
176
- repo_type="dataset",
177
- token=TOKEN
178
- )
179
-
180
- return format_log(f"system_name {system_name} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed")
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  def refresh():
184
- results_data_files = {"dev": "dev_set_data.csv", "eval": "eval_set_data.csv"}
185
- results = load_dataset(RESULTS_DATASET, data_files=results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True)
186
- dev_results_dataframe = get_dataframe_from_results(results=results, split="dev")
187
- eval_results_dataframe = get_dataframe_from_results(results=results, split="eval")
188
- return dev_results_dataframe, eval_results_dataframe
 
 
 
 
 
 
 
189
 
190
  def upload_file(files):
191
  file_paths = [file.name for file in files]
192
  return file_paths
193
 
194
- for file_key in ['dev_src', 'dev_ref', 'eval_src', 'eval_ref']:
195
- ref_jsons[file_key].to_json(path_or_buf=f"seglst_files/{file_key}.json")
196
- buff_list = [x.strip() for x in open(f"seglst_files/{file_key}.json").readlines()]
197
- buff_str = ",\n".join(buff_list)
198
- seglst_json = f"[\n{buff_str}\n]"
199
- split, datatype = file_key.split("_")
200
- with open(f"seglst_files/err_{split}.{datatype}.seglst.json", "w") as f:
201
- f.write(seglst_json)
202
-
 
 
 
 
 
 
 
203
  demo = gr.Blocks()
204
  with demo:
205
  gr.HTML(TITLE)
@@ -221,7 +318,7 @@ with demo:
221
  elem_id="citation-button",
222
  )
223
  with gr.Tab("Results: Dev"):
224
- leaderboard_table_dev = gr.components.Dataframe(
225
  value=dev_dataset_dataframe, datatype=TYPES, interactive=False,
226
  column_widths=["20%"]
227
  )
@@ -241,7 +338,7 @@ with demo:
241
  leaderboard_table_eval,
242
  ],
243
  )
244
- with gr.Accordion("Submit a new system_name for evaluation"):
245
  with gr.Row():
246
  with gr.Column():
247
  system_name_textbox = gr.Textbox(label="System name", type='text')
@@ -251,7 +348,6 @@ with demo:
251
  mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email')
252
  file_output = gr.File()
253
 
254
-
255
  submit_button = gr.Button("Submit Eval")
256
  submission_result = gr.Markdown()
257
  submit_button.click(
@@ -266,7 +362,16 @@ with demo:
266
  submission_result,
267
  )
268
 
269
- scheduler = BackgroundScheduler()
270
- scheduler.add_job(restart_space, "interval", seconds=3600)
271
- scheduler.start()
272
- demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import json
3
  import datetime
4
+ import time
5
  from email.utils import parseaddr
6
 
7
  import gradio as gr
 
14
  from scorer import instruction_scorer
15
  from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink
16
 
17
+ # Set up logging
18
+ import logging
19
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Initialize token with better error handling
23
  TOKEN = os.environ.get("TOKEN", None)
24
  if TOKEN:
25
+ try:
26
+ login(token=TOKEN)
27
+ logger.info("Successfully logged in with token")
28
+ except Exception as e:
29
+ logger.error(f"Error logging in with token: {e}")
30
  else:
31
+ logger.warning("Token not found. Please set the HUGGINGFACE_TOKEN environment variable.")
32
 
33
+ # Constants
34
  OWNER="Taejin"
35
  REF_JSONS_SET = f"{OWNER}/speaker_tagging_lb_refs"
36
  RESULTS_DATASET = f"{OWNER}/spk_tag_results"
37
  LEADERBOARD_PATH = f"{OWNER}/leaderboard"
38
  SUBMISSION_DATASET = f"{OWNER}/submission_leaderboard"
 
 
39
  YEAR_VERSION = "2024"
40
 
41
+ # Initialize HfApi with timeout
42
+ api = HfApi(timeout=60)
43
+
44
+ # File handling functions
45
  def read_json_file(filepath):
46
+ try:
47
+ with open(filepath) as infile:
48
+ data_dict = json.load(infile)
49
+ return data_dict
50
+ except Exception as e:
51
+ logger.error(f"Error reading JSON file {filepath}: {e}")
52
+ raise
53
 
54
  def save_json_file(filepath, data_dict):
55
+ try:
56
+ with open(filepath, "w") as outfile:
57
+ json.dump(data_dict, outfile)
58
+ logger.info(f"Successfully saved JSON to {filepath}")
59
+ except Exception as e:
60
+ logger.error(f"Error saving JSON to {filepath}: {e}")
61
+ raise
62
+
63
+ # Create necessary directories
64
  os.makedirs("scored", exist_ok=True)
65
+ os.makedirs("seglst_files", exist_ok=True)
66
+
67
+ # Load datasets with retry mechanism
68
+ def load_dataset_with_retry(dataset_name, data_files, token, max_retries=3):
69
+ for attempt in range(max_retries):
70
+ try:
71
+ logger.info(f"Loading dataset {dataset_name}, attempt {attempt+1}/{max_retries}")
72
+ return load_dataset(
73
+ dataset_name,
74
+ data_files=data_files,
75
+ token=token,
76
+ download_mode="force_redownload",
77
+ ignore_verifications=True
78
+ )
79
+ except Exception as e:
80
+ logger.error(f"Error loading dataset {dataset_name}, attempt {attempt+1}: {e}")
81
+ if attempt < max_retries - 1:
82
+ time.sleep(2 * (attempt + 1)) # Exponential backoff
83
+ else:
84
+ raise
85
+
86
+ # Load datasets
87
+ try:
88
+ logger.info(f"Loading results dataset")
89
+ results_data_files = {"dev": "dev_set_data.csv", "eval": "eval_set_data.csv"}
90
+ results = load_dataset_with_retry(RESULTS_DATASET, data_files=results_data_files, token=TOKEN)
91
+
92
+ logger.info(f"Loading reference JSON files")
93
+ ref_json_files = {
94
+ "dev_src": "err_dev.src.seglst.json",
95
+ "dev_ref": "err_dev.ref.seglst.json",
96
+ "eval_src": "err_eval.src.seglst.json",
97
+ "eval_ref": "err_eval.ref.seglst.json"
98
+ }
99
+ ref_jsons = load_dataset_with_retry(REF_JSONS_SET, data_files=ref_json_files, token=TOKEN)
100
+ except Exception as e:
101
+ logger.error(f"Failed to load datasets: {e}")
102
+ # We'll continue and let the app handle this gracefully
103
 
104
  def get_dataframe_from_results(results, split):
105
+ try:
106
+ df = results[split].to_pandas()
107
+ df = df.sort_values(by=["cpWER"], ascending=True)
108
+ return df
109
+ except Exception as e:
110
+ logger.error(f"Error creating dataframe for {split}: {e}")
111
+ # Return empty dataframe as fallback
112
+ return pd.DataFrame(columns=["System_name", "Method", "Organisation", "cpWER", "errors", "length"])
113
 
114
  def restart_space():
115
+ try:
116
+ logger.info("Attempting to restart space")
117
+ api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)
118
+ logger.info("Space restart request sent successfully")
119
+ except Exception as e:
120
+ logger.error(f"Error restarting space: {e}")
121
 
122
  TYPES = ["markdown", "markdown", "markdown", "number", "number", "number"]
123
 
124
+ # Initialize dataframes
125
+ try:
126
+ dev_dataset_dataframe = get_dataframe_from_results(results=results, split="dev")
127
+ eval_dataset_dataframe = get_dataframe_from_results(results=results, split="eval")
128
+ except Exception as e:
129
+ logger.error(f"Error initializing dataframes: {e}")
130
+ # Create empty dataframes as fallback
131
+ dev_dataset_dataframe = pd.DataFrame(columns=["System_name", "Method", "Organisation", "cpWER", "errors", "length"])
132
+ eval_dataset_dataframe = pd.DataFrame(columns=["System_name", "Method", "Organisation", "cpWER", "errors", "length"])
133
 
134
  def add_new_eval(
135
  system_name: str,
 
138
  organisation: str,
139
  mail: str,
140
  ):
141
+ logger.info(f"New evaluation submission: system_name: {system_name}, method: {method}, file: {path_to_file}, org: {organisation}")
142
 
143
+ # Input validation
144
+ if len(system_name) == 0:
145
+ logger.warning("Submission rejected: Empty system name")
146
  raise gr.Error("Please provide a system_name name. Field empty!")
147
 
148
+ if len(method) == 0:
149
+ logger.warning("Submission rejected: Empty method")
150
  raise gr.Error("Please provide a method. Field empty!")
151
 
152
+ if len(organisation) == 0:
153
+ logger.warning("Submission rejected: Empty organisation")
154
  raise gr.Error("Please provide organisation information. Field empty!")
155
 
156
+ # Email validation
157
  _, parsed_mail = parseaddr(mail)
158
  if not "@" in parsed_mail:
159
+ logger.warning(f"Submission rejected: Invalid email {mail}")
160
  raise gr.Error("Please provide a valid email address.")
161
 
 
 
 
 
 
162
  if path_to_file is None:
163
+ logger.warning("Submission rejected: Missing file")
164
  raise gr.Error("Please attach a file.")
165
 
166
+ try:
167
+ # Save submitted file
168
+ time_atm = datetime.datetime.today()
169
+ file_path_in_repo = f"{organisation}/{system_name}/{YEAR_VERSION}_raw_{time_atm}.json"
170
+ logger.info(f"Uploading submission file to {file_path_in_repo}")
171
+
172
+ api.upload_file(
173
+ repo_id=SUBMISSION_DATASET,
174
+ path_or_fileobj=path_to_file.name,
175
+ path_in_repo=file_path_in_repo,
176
+ repo_type="dataset",
177
+ token=TOKEN
178
+ )
179
+
180
+ # Determine mode and reference file
181
+ if "err_dev.hyp.seglst.json" in path_to_file.name:
182
+ ref_file_path = "seglst_files/err_dev.ref.seglst.json"
183
+ mode = "dev"
184
+ elif "err_eval.hyp.seglst.json" in path_to_file.name:
185
+ ref_file_path = "seglst_files/err_eval.ref.seglst.json"
186
+ mode = "eval"
187
+ else:
188
+ basename = os.path.basename(path_to_file.name)
189
+ logger.warning(f"Submission rejected: Invalid filename {basename}")
190
+ raise gr.Error(f"{basename} is NOT a valid name. It should be either err_dev.hyp.seglst.json or err_eval.hyp.seglst.json")
191
+
192
+ # Compute score
193
+ logger.info(f"Computing scores for {system_name} in {mode} mode")
194
+ scores = instruction_scorer(file_path_input=path_to_file.name, ref_file_path=ref_file_path, system_name=system_name)
195
 
196
+ # Save scores
197
+ path_or_fileobj = f"scored/{organisation}_{system_name}.json"
198
+ scores_and_info = {
 
 
 
 
 
 
 
 
 
 
 
199
  "system_name": system_name,
200
+ "method": method,
201
  "organisation": organisation,
202
  "email": mail,
203
  "cpWER": scores["cpWER"],
204
  "errors": scores["errors"],
205
  "length": scores["length"],
206
  }
207
+ save_json_file(path_or_fileobj, data_dict=scores_and_info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
+ # Upload scored file
210
+ logger.info(f"Uploading scored file for {system_name}")
211
  api.upload_file(
212
+ repo_id=SUBMISSION_DATASET,
213
+ path_or_fileobj=path_or_fileobj,
214
+ path_in_repo=f"{organisation}/{system_name}/{YEAR_VERSION}_scored_{time_atm}.json",
215
  repo_type="dataset",
216
  token=TOKEN
217
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
+ # Prepare entry for results
220
+ eval_entry = {
221
+ "System_name": system_name,
222
+ "Method": method,
223
+ "Organisation": organisation,
224
+ "cpWER": scores["cpWER"],
225
+ "errors": scores["errors"],
226
+ "length": scores["length"],
227
+ }
228
+
229
+ # Update appropriate dataset
230
+ if mode == "dev":
231
+ dev_set_data_csv = "dev_set_data.csv"
232
+ dev_dataset_dataframe = get_dataframe_from_results(results=results, split="dev")
233
+ dev_dataset_dataframe = pd.concat([dev_dataset_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
234
+ dev_dataset_dataframe.to_csv(dev_set_data_csv, index=False)
235
+
236
+ logger.info(f"Uploading updated dev results for {system_name}")
237
+ api.upload_file(
238
+ repo_id=RESULTS_DATASET,
239
+ path_or_fileobj=dev_set_data_csv,
240
+ path_in_repo=dev_set_data_csv,
241
+ repo_type="dataset",
242
+ token=TOKEN
243
+ )
244
+ elif mode == "eval":
245
+ eval_set_data_csv = "eval_set_data.csv"
246
+ eval_dataset_dataframe = get_dataframe_from_results(results=results, split="eval")
247
+ eval_dataset_dataframe = pd.concat([eval_dataset_dataframe, pd.DataFrame([eval_entry])], ignore_index=True)
248
+ eval_dataset_dataframe.to_csv(eval_set_data_csv, index=False)
249
+
250
+ logger.info(f"Uploading updated eval results for {system_name}")
251
+ api.upload_file(
252
+ repo_id=RESULTS_DATASET,
253
+ path_or_fileobj=eval_set_data_csv,
254
+ path_in_repo=eval_set_data_csv,
255
+ repo_type="dataset",
256
+ token=TOKEN
257
+ )
258
+
259
+ logger.info(f"Submission successful for {system_name} by {organisation}")
260
+ return format_log(f"System {system_name} submitted by {organisation} successfully! \nPlease refresh the leaderboard, and wait a bit to see the score displayed")
261
+
262
+ except Exception as e:
263
+ logger.error(f"Error processing submission: {e}")
264
+ return format_error(f"Error processing submission: {str(e)}")
265
 
266
  def refresh():
267
+ try:
268
+ logger.info("Refreshing leaderboard data")
269
+ results_data_files = {"dev": "dev_set_data.csv", "eval": "eval_set_data.csv"}
270
+ results = load_dataset_with_retry(RESULTS_DATASET, data_files=results_data_files, token=TOKEN)
271
+ dev_results_dataframe = get_dataframe_from_results(results=results, split="dev")
272
+ eval_results_dataframe = get_dataframe_from_results(results=results, split="eval")
273
+ return dev_results_dataframe, eval_results_dataframe
274
+ except Exception as e:
275
+ logger.error(f"Error refreshing data: {e}")
276
+ # Return empty dataframes as fallback
277
+ empty_df = pd.DataFrame(columns=["System_name", "Method", "Organisation", "cpWER", "errors", "length"])
278
+ return empty_df, empty_df
279
 
280
  def upload_file(files):
281
  file_paths = [file.name for file in files]
282
  return file_paths
283
 
284
+ # Process reference files
285
+ try:
286
+ logger.info("Processing reference files")
287
+ for file_key in ['dev_src', 'dev_ref', 'eval_src', 'eval_ref']:
288
+ ref_jsons[file_key].to_json(path_or_buf=f"seglst_files/{file_key}.json")
289
+ buff_list = [x.strip() for x in open(f"seglst_files/{file_key}.json").readlines()]
290
+ buff_str = ",\n".join(buff_list)
291
+ seglst_json = f"[\n{buff_str}\n]"
292
+ split, datatype = file_key.split("_")
293
+ with open(f"seglst_files/err_{split}.{datatype}.seglst.json", "w") as f:
294
+ f.write(seglst_json)
295
+ logger.info("Reference files processed successfully")
296
+ except Exception as e:
297
+ logger.error(f"Error processing reference files: {e}")
298
+
299
+ # Create Gradio interface
300
  demo = gr.Blocks()
301
  with demo:
302
  gr.HTML(TITLE)
 
318
  elem_id="citation-button",
319
  )
320
  with gr.Tab("Results: Dev"):
321
+ leaderboard_table_dev = gr.components.Dataframe(
322
  value=dev_dataset_dataframe, datatype=TYPES, interactive=False,
323
  column_widths=["20%"]
324
  )
 
338
  leaderboard_table_eval,
339
  ],
340
  )
341
+ with gr.Accordion("Submit a new system for evaluation"):
342
  with gr.Row():
343
  with gr.Column():
344
  system_name_textbox = gr.Textbox(label="System name", type='text')
 
348
  mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email')
349
  file_output = gr.File()
350
 
 
351
  submit_button = gr.Button("Submit Eval")
352
  submission_result = gr.Markdown()
353
  submit_button.click(
 
362
  submission_result,
363
  )
364
 
365
+ # Set up scheduler with reduced frequency
366
+ try:
367
+ logger.info("Setting up background scheduler")
368
+ scheduler = BackgroundScheduler()
369
+ # Increase interval to reduce resource usage
370
+ scheduler.add_job(restart_space, "interval", seconds=7200) # Every 2 hours instead of 1
371
+ scheduler.start()
372
+ logger.info("Background scheduler started successfully")
373
+ except Exception as e:
374
+ logger.error(f"Error setting up scheduler: {e}")
375
+
376
+ # Launch the app with memory limits
377
+ demo.launch(debug=False) # Set debug to False in production