yujinyujin9393 commited on
Commit
9be313e
·
verified ·
1 Parent(s): 9c90d4d

Revise CyberGym

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. gen_table.py +2 -2
app.py CHANGED
@@ -110,7 +110,7 @@ head_style) as demo:
110
  )
111
  s.headers = s.check_box['essential'] + s.checkbox_group.value
112
 
113
- if benchmark!='SWE-bench-verified':
114
  with gr.Row():
115
  s.model_name = gr.Textbox(
116
  value='Input the Model Name (fuzzy, case insensitive)',
@@ -137,7 +137,7 @@ head_style) as demo:
137
  s = structs[benchmark_list.index(dataset_name)]
138
  headers = s.check_box['essential'] + fields
139
  df = cp.deepcopy(s.table)
140
- if dataset_name!="SWE-bench-verified":
141
  default_val = 'Input the Model Name (fuzzy, case insensitive)'
142
  else:
143
  default_val = 'Input the Agent Name (fuzzy, case insensitive)'
@@ -145,7 +145,7 @@ head_style) as demo:
145
  if model_name != default_val:
146
  print(model_name)
147
  model_name = model_name.lower()
148
- if dataset_name!="SWE-bench-verified":
149
  method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
150
  else:
151
  method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
 
110
  )
111
  s.headers = s.check_box['essential'] + s.checkbox_group.value
112
 
113
+ if benchmark not in ["SWE-bench-verified", "CyberGym"]:
114
  with gr.Row():
115
  s.model_name = gr.Textbox(
116
  value='Input the Model Name (fuzzy, case insensitive)',
 
137
  s = structs[benchmark_list.index(dataset_name)]
138
  headers = s.check_box['essential'] + fields
139
  df = cp.deepcopy(s.table)
140
+ if dataset_name not in ["SWE-bench-verified", "CyberGym"]:
141
  default_val = 'Input the Model Name (fuzzy, case insensitive)'
142
  else:
143
  default_val = 'Input the Agent Name (fuzzy, case insensitive)'
 
145
  if model_name != default_val:
146
  print(model_name)
147
  model_name = model_name.lower()
148
+ if dataset_name not in ["SWE-bench-verified", "CyberGym"]:
149
  method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Model']]
150
  else:
151
  method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in df['Agent']]
gen_table.py CHANGED
@@ -54,7 +54,7 @@ def BUILD_L2_DF(results, benchmark):
54
  model_list=list(set(model_list))
55
 
56
  res = defaultdict(list)
57
- if benchmark not in ["RedCode","NYU CTF Bench","PrimeVul","SWE-bench-verified"]:
58
  res['Model']=model_list
59
  elif benchmark=="SWE-bench-verified" or benchmark=="CyberGym":
60
  res['Agent']=model_list
@@ -104,7 +104,7 @@ def BUILD_L2_DF(results, benchmark):
104
  required_fields = all_fields
105
 
106
  check_box = {}
107
- if benchmark=="SWE-bench-verified":
108
  check_box['essential'] = ['Agent']
109
  elif benchmark=='PrimeVul':
110
  check_box['essential'] = ['Model','Method']
 
54
  model_list=list(set(model_list))
55
 
56
  res = defaultdict(list)
57
+ if benchmark not in ["RedCode","NYU CTF Bench","PrimeVul","SWE-bench-verified","CyberGym"]:
58
  res['Model']=model_list
59
  elif benchmark=="SWE-bench-verified" or benchmark=="CyberGym":
60
  res['Agent']=model_list
 
104
  required_fields = all_fields
105
 
106
  check_box = {}
107
+ if benchmark in ["SWE-bench-verified", "CyberGym"]:
108
  check_box['essential'] = ['Agent']
109
  elif benchmark=='PrimeVul':
110
  check_box['essential'] = ['Model','Method']