胥基 commited on
Commit
2c1761f
·
1 Parent(s): 9eea524

change app

Browse files
__pycache__/content.cpython-310.pyc ADDED
Binary file (3.37 kB). View file
 
__pycache__/scorer.cpython-310.pyc ADDED
Binary file (2.65 kB). View file
 
app.py CHANGED
@@ -26,7 +26,7 @@ RESULTS_DATASET = f"{OWNER}/CTFAIA_results_public"
26
  LEADERBOARD_PATH = f"{OWNER}/agent_ctf_leaderboard"
27
  api = HfApi()
28
 
29
- YEAR_VERSION = "2024"
30
 
31
  os.makedirs("scored", exist_ok=True)
32
 
@@ -56,8 +56,8 @@ eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, spli
56
 
57
  # Gold answers
58
  gold_results = {}
59
- gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}_all", token=TOKEN)
60
- gold_results = {split: {row["task_id"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
61
 
62
 
63
  def restart_space():
@@ -113,17 +113,17 @@ def add_new_eval(
113
  if "model_answer" not in task:
114
  raise format_error(f"Line {ix} contains no model_answer key. Please fix it and resubmit your file.")
115
  answer = task["model_answer"]
116
- task_id = task["task_id"]
117
  try:
118
- level = int(gold_results[val_or_test][task_id]["Level"])
119
  except KeyError:
120
- return format_error(f"{task_id} not found in split {val_or_test}. Are you sure you submitted the correct file?")
121
 
122
- score = question_scorer(task['model_answer'], gold_results[val_or_test][task_id]["Final answer"])
123
 
124
  scored_file.write(
125
  json.dumps({
126
- "id": task_id,
127
  "model_answer": answer,
128
  "score": score,
129
  "level": level
 
26
  LEADERBOARD_PATH = f"{OWNER}/agent_ctf_leaderboard"
27
  api = HfApi()
28
 
29
+ YEAR_VERSION = "default"
30
 
31
  os.makedirs("scored", exist_ok=True)
32
 
 
56
 
57
  # Gold answers
58
  gold_results = {}
59
+ gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}", token=TOKEN)
60
+ gold_results = {split: {row["task_name"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
61
 
62
 
63
  def restart_space():
 
113
  if "model_answer" not in task:
114
  raise format_error(f"Line {ix} contains no model_answer key. Please fix it and resubmit your file.")
115
  answer = task["model_answer"]
116
+ task_name = task["task_name"]
117
  try:
118
+ level = int(gold_results[val_or_test][task_name]["Level"])
119
  except KeyError:
120
+ return format_error(f"{task_name} not found in split {val_or_test}. Are you sure you submitted the correct file?")
121
 
122
+ score = question_scorer(task['model_answer'], gold_results[val_or_test][task_name]["Final answer"])
123
 
124
  scored_file.write(
125
  json.dumps({
126
+ "id": task_name,
127
  "model_answer": answer,
128
  "score": score,
129
  "level": level