davidpomerenke commited on
Commit
b39df3c
·
verified ·
1 Parent(s): d380f79

Upload from GitHub Actions: minor chashing change

Browse files
Files changed (2) hide show
  1. evals/main.py +12 -7
  2. evals/models.py +1 -1
evals/main.py CHANGED
@@ -14,16 +14,21 @@ async def evaluate():
14
  single_model = os.environ.get("SINGLE_MODEL") # Optional: run only one specific model
15
  test_mode = os.environ.get("TEST", "").lower() in ("1", "true", "yes") # Optional: skip results loading/saving
16
 
17
- models_df = pd.DataFrame(models)
18
- languages_df = pd.DataFrame(languages)
 
 
 
 
 
19
  top_languages = languages.head(max_languages)
20
 
21
- # Filter to single model if specified
22
  if single_model:
23
  models_df = models_df[models_df["id"] == single_model]
24
  if len(models_df) == 0:
25
  print(f"Error: Model '{single_model}' not found. Available models:")
26
- for model_id in pd.DataFrame(models)["id"]:
27
  print(f" {model_id}")
28
  return pd.DataFrame()
29
 
@@ -110,9 +115,9 @@ async def evaluate():
110
  results_df = results_df.sort_values(by=["model", "bcp_47", "task", "metric"])
111
  results_df.to_json("results.json", **args)
112
 
113
- # Save model and language info
114
- models_df.to_json("models.json", **args)
115
- languages_df.to_json("languages.json", **args)
116
  else:
117
  print("TEST MODE: Skipping results saving")
118
 
 
14
  single_model = os.environ.get("SINGLE_MODEL") # Optional: run only one specific model
15
  test_mode = os.environ.get("TEST", "").lower() in ("1", "true", "yes") # Optional: skip results loading/saving
16
 
17
+ # Keep original DataFrames for saving metadata
18
+ original_models_df = pd.DataFrame(models)
19
+ original_languages_df = pd.DataFrame(languages)
20
+
21
+ # Create working copies for single evaluation runs
22
+ models_df = original_models_df.copy()
23
+ languages_df = original_languages_df.copy()
24
  top_languages = languages.head(max_languages)
25
 
26
+ # Filter to single model if specified (only affects evaluation, not saving)
27
  if single_model:
28
  models_df = models_df[models_df["id"] == single_model]
29
  if len(models_df) == 0:
30
  print(f"Error: Model '{single_model}' not found. Available models:")
31
+ for model_id in original_models_df["id"]:
32
  print(f" {model_id}")
33
  return pd.DataFrame()
34
 
 
115
  results_df = results_df.sort_values(by=["model", "bcp_47", "task", "metric"])
116
  results_df.to_json("results.json", **args)
117
 
118
+ # Save model and language info (always save complete metadata, not filtered)
119
+ original_models_df.to_json("models.json", **args)
120
+ original_languages_df.to_json("languages.json", **args)
121
  else:
122
  print("TEST MODE: Skipping results saving")
123
 
evals/models.py CHANGED
@@ -345,7 +345,7 @@ def get_cost(row):
345
  return None
346
 
347
 
348
- #@cache
349
  def load_models(date: date):
350
  popular_models = (
351
  get_historical_popular_models(date.today())[:20]
 
345
  return None
346
 
347
 
348
+ @cache
349
  def load_models(date: date):
350
  popular_models = (
351
  get_historical_popular_models(date.today())[:20]