Upload from GitHub Actions: minor chashing change
Browse files- evals/main.py +12 -7
- evals/models.py +1 -1
evals/main.py
CHANGED
@@ -14,16 +14,21 @@ async def evaluate():
|
|
14 |
single_model = os.environ.get("SINGLE_MODEL") # Optional: run only one specific model
|
15 |
test_mode = os.environ.get("TEST", "").lower() in ("1", "true", "yes") # Optional: skip results loading/saving
|
16 |
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
19 |
top_languages = languages.head(max_languages)
|
20 |
|
21 |
-
# Filter to single model if specified
|
22 |
if single_model:
|
23 |
models_df = models_df[models_df["id"] == single_model]
|
24 |
if len(models_df) == 0:
|
25 |
print(f"Error: Model '{single_model}' not found. Available models:")
|
26 |
-
for model_id in
|
27 |
print(f" {model_id}")
|
28 |
return pd.DataFrame()
|
29 |
|
@@ -110,9 +115,9 @@ async def evaluate():
|
|
110 |
results_df = results_df.sort_values(by=["model", "bcp_47", "task", "metric"])
|
111 |
results_df.to_json("results.json", **args)
|
112 |
|
113 |
-
# Save model and language info
|
114 |
-
|
115 |
-
|
116 |
else:
|
117 |
print("TEST MODE: Skipping results saving")
|
118 |
|
|
|
14 |
single_model = os.environ.get("SINGLE_MODEL") # Optional: run only one specific model
|
15 |
test_mode = os.environ.get("TEST", "").lower() in ("1", "true", "yes") # Optional: skip results loading/saving
|
16 |
|
17 |
+
# Keep original DataFrames for saving metadata
|
18 |
+
original_models_df = pd.DataFrame(models)
|
19 |
+
original_languages_df = pd.DataFrame(languages)
|
20 |
+
|
21 |
+
# Create working copies for single evaluation runs
|
22 |
+
models_df = original_models_df.copy()
|
23 |
+
languages_df = original_languages_df.copy()
|
24 |
top_languages = languages.head(max_languages)
|
25 |
|
26 |
+
# Filter to single model if specified (only affects evaluation, not saving)
|
27 |
if single_model:
|
28 |
models_df = models_df[models_df["id"] == single_model]
|
29 |
if len(models_df) == 0:
|
30 |
print(f"Error: Model '{single_model}' not found. Available models:")
|
31 |
+
for model_id in original_models_df["id"]:
|
32 |
print(f" {model_id}")
|
33 |
return pd.DataFrame()
|
34 |
|
|
|
115 |
results_df = results_df.sort_values(by=["model", "bcp_47", "task", "metric"])
|
116 |
results_df.to_json("results.json", **args)
|
117 |
|
118 |
+
# Save model and language info (always save complete metadata, not filtered)
|
119 |
+
original_models_df.to_json("models.json", **args)
|
120 |
+
original_languages_df.to_json("languages.json", **args)
|
121 |
else:
|
122 |
print("TEST MODE: Skipping results saving")
|
123 |
|
evals/models.py
CHANGED
@@ -345,7 +345,7 @@ def get_cost(row):
|
|
345 |
return None
|
346 |
|
347 |
|
348 |
-
|
349 |
def load_models(date: date):
|
350 |
popular_models = (
|
351 |
get_historical_popular_models(date.today())[:20]
|
|
|
345 |
return None
|
346 |
|
347 |
|
348 |
+
@cache
|
349 |
def load_models(date: date):
|
350 |
popular_models = (
|
351 |
get_historical_popular_models(date.today())[:20]
|