胥基
commited on
Commit
·
2c1761f
1
Parent(s):
9eea524
change app
Browse files- __pycache__/content.cpython-310.pyc +0 -0
- __pycache__/scorer.cpython-310.pyc +0 -0
- app.py +8 -8
__pycache__/content.cpython-310.pyc
ADDED
Binary file (3.37 kB). View file
|
|
__pycache__/scorer.cpython-310.pyc
ADDED
Binary file (2.65 kB). View file
|
|
app.py
CHANGED
@@ -26,7 +26,7 @@ RESULTS_DATASET = f"{OWNER}/CTFAIA_results_public"
|
|
26 |
LEADERBOARD_PATH = f"{OWNER}/agent_ctf_leaderboard"
|
27 |
api = HfApi()
|
28 |
|
29 |
-
YEAR_VERSION = "
|
30 |
|
31 |
os.makedirs("scored", exist_ok=True)
|
32 |
|
@@ -56,8 +56,8 @@ eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, spli
|
|
56 |
|
57 |
# Gold answers
|
58 |
gold_results = {}
|
59 |
-
gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}
|
60 |
-
gold_results = {split: {row["
|
61 |
|
62 |
|
63 |
def restart_space():
|
@@ -113,17 +113,17 @@ def add_new_eval(
|
|
113 |
if "model_answer" not in task:
|
114 |
raise format_error(f"Line {ix} contains no model_answer key. Please fix it and resubmit your file.")
|
115 |
answer = task["model_answer"]
|
116 |
-
|
117 |
try:
|
118 |
-
level = int(gold_results[val_or_test][
|
119 |
except KeyError:
|
120 |
-
return format_error(f"{
|
121 |
|
122 |
-
score = question_scorer(task['model_answer'], gold_results[val_or_test][
|
123 |
|
124 |
scored_file.write(
|
125 |
json.dumps({
|
126 |
-
"id":
|
127 |
"model_answer": answer,
|
128 |
"score": score,
|
129 |
"level": level
|
|
|
26 |
LEADERBOARD_PATH = f"{OWNER}/agent_ctf_leaderboard"
|
27 |
api = HfApi()
|
28 |
|
29 |
+
YEAR_VERSION = "default"
|
30 |
|
31 |
os.makedirs("scored", exist_ok=True)
|
32 |
|
|
|
56 |
|
57 |
# Gold answers
|
58 |
gold_results = {}
|
59 |
+
gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}", token=TOKEN)
|
60 |
+
gold_results = {split: {row["task_name"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
|
61 |
|
62 |
|
63 |
def restart_space():
|
|
|
113 |
if "model_answer" not in task:
|
114 |
raise format_error(f"Line {ix} contains no model_answer key. Please fix it and resubmit your file.")
|
115 |
answer = task["model_answer"]
|
116 |
+
task_name = task["task_name"]
|
117 |
try:
|
118 |
+
level = int(gold_results[val_or_test][task_name]["Level"])
|
119 |
except KeyError:
|
120 |
+
return format_error(f"{task_name} not found in split {val_or_test}. Are you sure you submitted the correct file?")
|
121 |
|
122 |
+
score = question_scorer(task['model_answer'], gold_results[val_or_test][task_name]["Final answer"])
|
123 |
|
124 |
scored_file.write(
|
125 |
json.dumps({
|
126 |
+
"id": task_name,
|
127 |
"model_answer": answer,
|
128 |
"score": score,
|
129 |
"level": level
|