InferBench / benchmark /geneval.py
davidberenstein1957's picture
refactor: improve code formatting and organization across multiple API and benchmark files
34046e2
raw
history blame
1.48 kB
import json
from pathlib import Path
from typing import Any, Dict, Iterator, List, Tuple
import requests
class GenEvalPrompts:
def __init__(self):
super().__init__()
self._download_geneval_file()
metadata_path = Path("downloads/geneval/evaluation_metadata.jsonl")
self.entries: List[Dict[str, Any]] = []
with open(metadata_path, "r") as f:
for line in f:
if line.strip():
self.entries.append(json.loads(line))
def __iter__(self) -> Iterator[Tuple[Dict[str, Any], Path]]:
for i, entry in enumerate(self.entries):
folder_name = f"{i:05d}"
yield entry, folder_name
def _download_geneval_file(self) -> None:
folder_name = Path("downloads/geneval")
folder_name.mkdir(parents=True, exist_ok=True)
metadata_url = "https://raw.githubusercontent.com/djghosh13/geneval/main/prompts/evaluation_metadata.jsonl"
metadata_path = folder_name / "evaluation_metadata.jsonl"
if not metadata_path.exists():
response = requests.get(metadata_url)
with open(metadata_path, "w") as f:
f.write(response.text)
@property
def name(self) -> str:
return "geneval"
@property
def size(self) -> int:
return len(self.entries)
@property
def metrics(self) -> List[str]:
raise NotImplementedError("GenEval requires custom evaluation, see README.md")