Revise CyberGym
Browse files- app.py +3 -3
- gen_table.py +2 -2
app.py
CHANGED
@@ -110,7 +110,7 @@ head_style) as demo:
|
|
110 |
)
|
111 |
s.headers = s.check_box['essential'] + s.checkbox_group.value
|
112 |
|
113 |
-
if benchmark
|
114 |
with gr.Row():
|
115 |
s.model_name = gr.Textbox(
|
116 |
value='Input the Model Name (fuzzy, case insensitive)',
|
@@ -137,7 +137,7 @@ head_style) as demo:
|
|
137 |
s = structs[benchmark_list.index(dataset_name)]
|
138 |
headers = s.check_box['essential'] + fields
|
139 |
df = cp.deepcopy(s.table)
|
140 |
-
if dataset_name
|
141 |
default_val = 'Input the Model Name (fuzzy, case insensitive)'
|
142 |
else:
|
143 |
default_val = 'Input the Agent Name (fuzzy, case insensitive)'
|
@@ -145,7 +145,7 @@ head_style) as demo:
|
|
145 |
if model_name != default_val:
|
146 |
print(model_name)
|
147 |
model_name = model_name.lower()
|
148 |
-
if dataset_name
|
149 |
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
|
150 |
else:
|
151 |
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
|
|
|
110 |
)
|
111 |
s.headers = s.check_box['essential'] + s.checkbox_group.value
|
112 |
|
113 |
+
if benchmark not in ["SWE-bench-verified", "CyberGym"]:
|
114 |
with gr.Row():
|
115 |
s.model_name = gr.Textbox(
|
116 |
value='Input the Model Name (fuzzy, case insensitive)',
|
|
|
137 |
s = structs[benchmark_list.index(dataset_name)]
|
138 |
headers = s.check_box['essential'] + fields
|
139 |
df = cp.deepcopy(s.table)
|
140 |
+
if dataset_name not in ["SWE-bench-verified", "CyberGym"]:
|
141 |
default_val = 'Input the Model Name (fuzzy, case insensitive)'
|
142 |
else:
|
143 |
default_val = 'Input the Agent Name (fuzzy, case insensitive)'
|
|
|
145 |
if model_name != default_val:
|
146 |
print(model_name)
|
147 |
model_name = model_name.lower()
|
148 |
+
if dataset_name not in ["SWE-bench-verified", "CyberGym"]:
|
149 |
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
|
150 |
else:
|
151 |
method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
|
gen_table.py
CHANGED
@@ -54,7 +54,7 @@ def BUILD_L2_DF(results, benchmark):
|
|
54 |
model_list=list(set(model_list))
|
55 |
|
56 |
res = defaultdict(list)
|
57 |
-
if benchmark not in ["RedCode","NYU CTF Bench","PrimeVul","SWE-bench-verified"]:
|
58 |
res['Model']=model_list
|
59 |
elif benchmark=="SWE-bench-verified" or benchmark=="CyberGym":
|
60 |
res['Agent']=model_list
|
@@ -104,7 +104,7 @@ def BUILD_L2_DF(results, benchmark):
|
|
104 |
required_fields = all_fields
|
105 |
|
106 |
check_box = {}
|
107 |
-
if benchmark
|
108 |
check_box['essential'] = ['Agent']
|
109 |
elif benchmark=='PrimeVul':
|
110 |
check_box['essential'] = ['Model','Method']
|
|
|
54 |
model_list=list(set(model_list))
|
55 |
|
56 |
res = defaultdict(list)
|
57 |
+
if benchmark not in ["RedCode","NYU CTF Bench","PrimeVul","SWE-bench-verified","CyberGym"]:
|
58 |
res['Model']=model_list
|
59 |
elif benchmark=="SWE-bench-verified" or benchmark=="CyberGym":
|
60 |
res['Agent']=model_list
|
|
|
104 |
required_fields = all_fields
|
105 |
|
106 |
check_box = {}
|
107 |
+
if benchmark in ["SWE-bench-verified", "CyberGym"]:
|
108 |
check_box['essential'] = ['Agent']
|
109 |
elif benchmark=='PrimeVul':
|
110 |
check_box['essential'] = ['Model','Method']
|