Commit
·
376f461
1
Parent(s):
60d6a88
fix: Separate zero-shot performance from few-shot
Browse files
app.py
CHANGED
@@ -786,7 +786,8 @@ def fetch_results() -> dict[Language, pd.DataFrame]:
|
|
786 |
logger.info("Fetching results from EuroEval benchmark...")
|
787 |
|
788 |
response = requests.get(
|
789 |
-
"https://raw.githubusercontent.com/EuroEval/leaderboards/refs/heads/main
|
|
|
790 |
)
|
791 |
response.raise_for_status()
|
792 |
records = [
|
@@ -805,6 +806,8 @@ def fetch_results() -> dict[Language, pd.DataFrame]:
|
|
805 |
data_dict = defaultdict(dict)
|
806 |
for record in records:
|
807 |
model_name = record["model"]
|
|
|
|
|
808 |
raw_results = record["results"]["raw"]
|
809 |
if isinstance(raw_results, dict) and "test" in raw_results:
|
810 |
raw_results = raw_results.get("test", raw_results)
|
|
|
786 |
logger.info("Fetching results from EuroEval benchmark...")
|
787 |
|
788 |
response = requests.get(
|
789 |
+
"https://raw.githubusercontent.com/EuroEval/leaderboards/refs/heads/main"
|
790 |
+
"/results/results.jsonl"
|
791 |
)
|
792 |
response.raise_for_status()
|
793 |
records = [
|
|
|
806 |
data_dict = defaultdict(dict)
|
807 |
for record in records:
|
808 |
model_name = record["model"]
|
809 |
+
if not record["few_shot"]:
|
810 |
+
model_name += " (zero-shot)"
|
811 |
raw_results = record["results"]["raw"]
|
812 |
if isinstance(raw_results, dict) and "test" in raw_results:
|
813 |
raw_results = raw_results.get("test", raw_results)
|