Spaces:

PrunaAI
/

InferBench

Running

App Files Files Community

davidberenstein1957 commited on Jul 15

Commit

8c2887a

unverified ·

2 Parent(s): f5f9b38 34046e2

Merge pull request #2 from PrunaAI/feat/david

Browse files

Files changed (29) hide show

.gitignore +2 -1
README.md +19 -1
api/__init__.py +21 -18
api/aws.py +8 -12
api/baseline.py +5 -3
api/fal.py +2 -2
api/fireworks.py +3 -3
api/flux.py +3 -3
api/pruna.py +5 -3
api/pruna_dev.py +3 -3
api/replicate.py +1 -1
api/replicate_wan.py +48 -0
api/together.py +2 -2
benchmark/__init__.py +13 -7
benchmark/draw_bench.py +3 -3
benchmark/genai_bench.py +4 -4
benchmark/geneval.py +7 -7
benchmark/hps.py +15 -8
benchmark/metrics/arniqa.py +13 -4
benchmark/metrics/clip.py +9 -3
benchmark/metrics/clip_iqa.py +9 -5
benchmark/metrics/hps.py +39 -24
benchmark/metrics/image_reward.py +12 -3
benchmark/metrics/vqa.py +14 -3
benchmark/parti.py +2 -2
environment.yml +1 -1
evaluate.py +37 -27
nils_installs.txt +0 -177
sample.py +75 -60

.gitignore CHANGED Viewed

@@ -175,4 +175,5 @@ cython_debug/
 evaluation_results/
 images/
-hf_cache/

 evaluation_results/
 images/
+hf_cache/
+*.lock

README.md CHANGED Viewed

@@ -1,10 +1,28 @@
 # InferBench
 Evaluate the quality and efficiency of image gen api's.
-Install dependencies with conda like that:
 conda env create -f environment.yml
 Create .env file with all the credentials you will need.

 # InferBench
 Evaluate the quality and efficiency of image gen api's.
+## Installation
+### Install dependencies
+Install dependencies with conda like that:
+```
 conda env create -f environment.yml
+```
+### Install uv
+Install uv with pip like that:
+```
+uv venv --python 3.12
+```
+```
+uv sync --all-groups
+```
+## Usage
 Create .env file with all the credentials you will need.

api/__init__.py CHANGED Viewed

@@ -1,31 +1,32 @@
 from typing import Type
 from api.baseline import BaselineAPI
 from api.fireworks import FireworksAPI
 from api.flux import FluxAPI
 from api.pruna import PrunaAPI
 from api.pruna_dev import PrunaDevAPI
 from api.replicate import ReplicateAPI
 from api.together import TogetherAPI
-from api.fal import FalAPI
-from api.aws import AWSBedrockAPI
 __all__ = [
-    'create_api',
-    'FluxAPI',
-    'BaselineAPI',
-    'FireworksAPI',
-    'PrunaAPI',
-    'ReplicateAPI',
-    'TogetherAPI',
-    'FalAPI',
-    'PrunaDevAPI',
 ]
 def create_api(api_type: str) -> FluxAPI:
     """
     Factory function to create API instances.
     Args:
         api_type (str): The type of API to create. Must be one of:
             - "baseline"
@@ -35,10 +36,10 @@ def create_api(api_type: str) -> FluxAPI:
             - "together"
             - "fal"
             - "aws"
     Returns:
         FluxAPI: An instance of the requested API implementation
     Raises:
         ValueError: If an invalid API type is provided
     """
@@ -47,7 +48,7 @@ def create_api(api_type: str) -> FluxAPI:
     if api_type.startswith("pruna_"):
         speed_mode = api_type[6:]  # Remove "pruna_" prefix
         return PrunaAPI(speed_mode)
     api_map: dict[str, Type[FluxAPI]] = {
         "baseline": BaselineAPI,
         "fireworks": FireworksAPI,
@@ -56,8 +57,10 @@ def create_api(api_type: str) -> FluxAPI:
         "fal": FalAPI,
         "aws": AWSBedrockAPI,
     }
     if api_type not in api_map:
-        raise ValueError(f"Invalid API type: {api_type}. Must be one of {list(api_map.keys())} or start with 'pruna_'")
     return api_map[api_type]()

 from typing import Type
+from api.aws import AWSBedrockAPI
 from api.baseline import BaselineAPI
+from api.fal import FalAPI
 from api.fireworks import FireworksAPI
 from api.flux import FluxAPI
 from api.pruna import PrunaAPI
 from api.pruna_dev import PrunaDevAPI
 from api.replicate import ReplicateAPI
 from api.together import TogetherAPI
 __all__ = [
+    "create_api",
+    "FluxAPI",
+    "BaselineAPI",
+    "FireworksAPI",
+    "PrunaAPI",
+    "ReplicateAPI",
+    "TogetherAPI",
+    "FalAPI",
+    "PrunaDevAPI",
 ]
 def create_api(api_type: str) -> FluxAPI:
     """
     Factory function to create API instances.
     Args:
         api_type (str): The type of API to create. Must be one of:
             - "baseline"
             - "together"
             - "fal"
             - "aws"
     Returns:
         FluxAPI: An instance of the requested API implementation
     Raises:
         ValueError: If an invalid API type is provided
     """
     if api_type.startswith("pruna_"):
         speed_mode = api_type[6:]  # Remove "pruna_" prefix
         return PrunaAPI(speed_mode)
     api_map: dict[str, Type[FluxAPI]] = {
         "baseline": BaselineAPI,
         "fireworks": FireworksAPI,
         "fal": FalAPI,
         "aws": AWSBedrockAPI,
     }
     if api_type not in api_map:
+        raise ValueError(
+            f"Invalid API type: {api_type}. Must be one of {list(api_map.keys())} or start with 'pruna_'"
+        )
     return api_map[api_type]()

api/aws.py CHANGED Viewed

@@ -1,9 +1,8 @@
-import os
-import time
 import base64
 import json
 from pathlib import Path
-from typing import Any
 import boto3
 from dotenv import load_dotenv
@@ -45,23 +44,20 @@ class AWSBedrockAPI(FluxAPI):
         try:
             # Convert request to JSON and invoke the model
             request = json.dumps(native_request)
-            response = self._client.invoke_model(
-                modelId=self._model_id,
-                body=request
-            )
             # Process the response
             model_response = json.loads(response["body"].read())
             if not model_response.get("images"):
                 raise Exception("No images returned from AWS Bedrock API")
             # Save the image
             base64_image_data = model_response["images"][0]
             self._save_image_from_base64(base64_image_data, save_path)
         except Exception as e:
             raise Exception(f"Error generating image with AWS Bedrock: {str(e)}")
         end_time = time.time()
         return end_time - start_time
@@ -70,4 +66,4 @@ class AWSBedrockAPI(FluxAPI):
         save_path.parent.mkdir(parents=True, exist_ok=True)
         image_data = base64.b64decode(base64_data)
         with open(save_path, "wb") as f:
-            f.write(image_data)

 import base64
 import json
+import os
+import time
 from pathlib import Path
 import boto3
 from dotenv import load_dotenv
         try:
             # Convert request to JSON and invoke the model
             request = json.dumps(native_request)
+            response = self._client.invoke_model(modelId=self._model_id, body=request)
             # Process the response
             model_response = json.loads(response["body"].read())
             if not model_response.get("images"):
                 raise Exception("No images returned from AWS Bedrock API")
             # Save the image
             base64_image_data = model_response["images"][0]
             self._save_image_from_base64(base64_image_data, save_path)
         except Exception as e:
             raise Exception(f"Error generating image with AWS Bedrock: {str(e)}")
         end_time = time.time()
         return end_time - start_time
         save_path.parent.mkdir(parents=True, exist_ok=True)
         image_data = base64.b64decode(base64_data)
         with open(save_path, "wb") as f:
+            f.write(image_data)

api/baseline.py CHANGED Viewed

@@ -12,6 +12,7 @@ class BaselineAPI(FluxAPI):
     """
     As our baseline, we use the Replicate API with go_fast=False.
     """
     def __init__(self):
         load_dotenv()
         self._api_key = os.getenv("REPLICATE_API_TOKEN")
@@ -24,6 +25,7 @@ class BaselineAPI(FluxAPI):
     def generate_image(self, prompt: str, save_path: Path) -> float:
         import replicate
         start_time = time.time()
         result = replicate.run(
             "black-forest-labs/flux-dev",
@@ -39,15 +41,15 @@ class BaselineAPI(FluxAPI):
             },
         )
         end_time = time.time()
         if result and len(result) > 0:
             self._save_image_from_result(result[0], save_path)
         else:
             raise Exception("No result returned from Replicate API")
         return end_time - start_time
     def _save_image_from_result(self, result: Any, save_path: Path):
         save_path.parent.mkdir(parents=True, exist_ok=True)
         with open(save_path, "wb") as f:
-            f.write(result.read())

     """
     As our baseline, we use the Replicate API with go_fast=False.
     """
     def __init__(self):
         load_dotenv()
         self._api_key = os.getenv("REPLICATE_API_TOKEN")
     def generate_image(self, prompt: str, save_path: Path) -> float:
         import replicate
         start_time = time.time()
         result = replicate.run(
             "black-forest-labs/flux-dev",
             },
         )
         end_time = time.time()
         if result and len(result) > 0:
             self._save_image_from_result(result[0], save_path)
         else:
             raise Exception("No result returned from Replicate API")
         return end_time - start_time
     def _save_image_from_result(self, result: Any, save_path: Path):
         save_path.parent.mkdir(parents=True, exist_ok=True)
         with open(save_path, "wb") as f:
+            f.write(result.read())

api/fal.py CHANGED Viewed

@@ -30,10 +30,10 @@ class FalAPI(FluxAPI):
             },
         )
         end_time = time.time()
         url = result["images"][0]["url"]
         self._save_image_from_url(url, save_path)
         return end_time - start_time
     def _save_image_from_url(self, url: str, save_path: Path):

             },
         )
         end_time = time.time()
         url = result["images"][0]["url"]
         self._save_image_from_url(url, save_path)
         return end_time - start_time
     def _save_image_from_url(self, url: str, save_path: Path):

api/fireworks.py CHANGED Viewed

@@ -23,7 +23,7 @@ class FireworksAPI(FluxAPI):
     def generate_image(self, prompt: str, save_path: Path) -> float:
         start_time = time.time()
         headers = {
             "Content-Type": "application/json",
             "Accept": "image/jpeg",
@@ -39,12 +39,12 @@ class FireworksAPI(FluxAPI):
         result = requests.post(self._url, headers=headers, json=data)
         end_time = time.time()
         if result.status_code == 200:
             self._save_image_from_result(result, save_path)
         else:
             raise Exception(f"Error: {result.status_code} {result.text}")
         return end_time - start_time
     def _save_image_from_result(self, result: Any, save_path: Path):

     def generate_image(self, prompt: str, save_path: Path) -> float:
         start_time = time.time()
         headers = {
             "Content-Type": "application/json",
             "Accept": "image/jpeg",
         result = requests.post(self._url, headers=headers, json=data)
         end_time = time.time()
         if result.status_code == 200:
             self._save_image_from_result(result, save_path)
         else:
             raise Exception(f"Error: {result.status_code} {result.text}")
         return end_time - start_time
     def _save_image_from_result(self, result: Any, save_path: Path):

api/flux.py CHANGED Viewed

@@ -14,7 +14,7 @@ class FluxAPI(ABC):
     def name(self) -> str:
         """
         The name of the API implementation.
         Returns:
             str: The name of the specific API implementation
         """
@@ -24,11 +24,11 @@ class FluxAPI(ABC):
     def generate_image(self, prompt: str, save_path: Path) -> float:
         """
         Generate an image based on the prompt and save it to the specified path.
         Args:
             prompt (str): The text prompt to generate the image from
             save_path (Path): The path where the generated image should be saved
         Returns:
             float: The time taken for the API call in seconds
         """

     def name(self) -> str:
         """
         The name of the API implementation.
         Returns:
             str: The name of the specific API implementation
         """
     def generate_image(self, prompt: str, save_path: Path) -> float:
         """
         Generate an image based on the prompt and save it to the specified path.
         Args:
             prompt (str): The text prompt to generate the image from
             save_path (Path): The path where the generated image should be saved
         Returns:
             float: The time taken for the API call in seconds
         """

api/pruna.py CHANGED Viewed

@@ -3,8 +3,8 @@ import time
 from pathlib import Path
 from typing import Any
-from dotenv import load_dotenv
 import replicate
 from api.flux import FluxAPI
@@ -12,7 +12,9 @@ from api.flux import FluxAPI
 class PrunaAPI(FluxAPI):
     def __init__(self, speed_mode: str):
         self._speed_mode = speed_mode
-        self._speed_mode_name = speed_mode.split(" ")[0].strip().lower().replace(" ", "_")
         load_dotenv()
         self._api_key = os.getenv("REPLICATE_API_TOKEN")
         if not self._api_key:
@@ -38,7 +40,7 @@ class PrunaAPI(FluxAPI):
             },
         )
         end_time = time.time()
         if result:
             self._save_image_from_result(result, save_path)
         else:

 from pathlib import Path
 from typing import Any
 import replicate
+from dotenv import load_dotenv
 from api.flux import FluxAPI
 class PrunaAPI(FluxAPI):
     def __init__(self, speed_mode: str):
         self._speed_mode = speed_mode
+        self._speed_mode_name = (
+            speed_mode.split(" ")[0].strip().lower().replace(" ", "_")
+        )
         load_dotenv()
         self._api_key = os.getenv("REPLICATE_API_TOKEN")
         if not self._api_key:
             },
         )
         end_time = time.time()
         if result:
             self._save_image_from_result(result, save_path)
         else:

api/pruna_dev.py CHANGED Viewed

@@ -3,8 +3,8 @@ import time
 from pathlib import Path
 from typing import Any
-from dotenv import load_dotenv
 import replicate
 from api.flux import FluxAPI
@@ -36,7 +36,7 @@ class PrunaDevAPI(FluxAPI):
             },
         )
         end_time = time.time()
         if result:
             self._save_image_from_result(result, save_path)
         else:
@@ -46,4 +46,4 @@ class PrunaDevAPI(FluxAPI):
     def _save_image_from_result(self, result: Any, save_path: Path):
         save_path.parent.mkdir(parents=True, exist_ok=True)
         with open(save_path, "wb") as f:
-            f.write(result.read())

 from pathlib import Path
 from typing import Any
 import replicate
+from dotenv import load_dotenv
 from api.flux import FluxAPI
             },
         )
         end_time = time.time()
         if result:
             self._save_image_from_result(result, save_path)
         else:
     def _save_image_from_result(self, result: Any, save_path: Path):
         save_path.parent.mkdir(parents=True, exist_ok=True)
         with open(save_path, "wb") as f:
+            f.write(result.read())

api/replicate.py CHANGED Viewed

@@ -3,8 +3,8 @@ import time
 from pathlib import Path
 from typing import Any
-from dotenv import load_dotenv
 import replicate
 from api.flux import FluxAPI

 from pathlib import Path
 from typing import Any
 import replicate
+from dotenv import load_dotenv
 from api.flux import FluxAPI

api/replicate_wan.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+import time
+from pathlib import Path
+from typing import Any
+import replicate
+from dotenv import load_dotenv
+from api.flux import FluxAPI
+class ReplicateAPI(FluxAPI):
+    def __init__(self):
+        load_dotenv()
+        self._api_key = os.getenv("REPLICATE_API_TOKEN")
+        if not self._api_key:
+            raise ValueError("REPLICATE_API_TOKEN not found in environment variables")
+    @property
+    def name(self) -> str:
+        return "replicate_go_fast"
+    def generate_image(self, prompt: str, save_path: Path) -> float:
+        start_time = time.time()
+        result = replicate.run(
+            "black-forest-labs/flux-dev",
+            input={
+                "seed": 0,
+                "prompt": prompt,
+                "go_fast": True,
+                "guidance": 3.5,
+                "num_outputs": 1,
+                "aspect_ratio": "1:1",
+                "output_format": "png",
+                "num_inference_steps": 28,
+            },
+        )
+        end_time = time.time()
+        if result and len(result) > 0:
+            self._save_image_from_result(result[0], save_path)
+        else:
+            raise Exception("No result returned from Replicate API")
+        return end_time - start_time
+    def _save_image_from_result(self, result: Any, save_path: Path):
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(save_path, "wb") as f:
+            f.write(result.read())

api/together.py CHANGED Viewed

@@ -33,10 +33,10 @@ class TogetherAPI(FluxAPI):
             response_format="b64_json",
         )
         end_time = time.time()
-        if result and hasattr(result, 'data') and len(result.data) > 0:
             self._save_image_from_result(result, save_path)
         else:
-            raise Exception("No result returned from Together API")
         return end_time - start_time
     def _save_image_from_result(self, result: Any, save_path: Path):

             response_format="b64_json",
         )
         end_time = time.time()
+        if result and hasattr(result, "data") and len(result.data) > 0:
             self._save_image_from_result(result, save_path)
         else:
+            raise Exception("No result returned from Together API")
         return end_time - start_time
     def _save_image_from_result(self, result: Any, save_path: Path):

benchmark/__init__.py CHANGED Viewed

@@ -7,10 +7,14 @@ from benchmark.hps import HPSPrompts
 from benchmark.parti import PartiPrompts
-def create_benchmark(benchmark_type: str) -> Type[DrawBenchPrompts | GenAIBenchPrompts | GenEvalPrompts | HPSPrompts | PartiPrompts]:
     """
     Factory function to create benchmark instances.
     Args:
         benchmark_type (str): The type of benchmark to create. Must be one of:
             - "draw_bench"
@@ -18,10 +22,10 @@ def create_benchmark(benchmark_type: str) -> Type[DrawBenchPrompts | GenAIBenchP
             - "geneval"
             - "hps"
             - "parti"
     Returns:
         An instance of the requested benchmark implementation
     Raises:
         ValueError: If an invalid benchmark type is provided
     """
@@ -32,8 +36,10 @@ def create_benchmark(benchmark_type: str) -> Type[DrawBenchPrompts | GenAIBenchP
         "hps": HPSPrompts,
         "parti": PartiPrompts,
     }
     if benchmark_type not in benchmark_map:
-        raise ValueError(f"Invalid benchmark type: {benchmark_type}. Must be one of {list(benchmark_map.keys())}")
     return benchmark_map[benchmark_type]()

 from benchmark.parti import PartiPrompts
+def create_benchmark(
+    benchmark_type: str,
+) -> Type[
+    DrawBenchPrompts | GenAIBenchPrompts | GenEvalPrompts | HPSPrompts | PartiPrompts
+]:
     """
     Factory function to create benchmark instances.
     Args:
         benchmark_type (str): The type of benchmark to create. Must be one of:
             - "draw_bench"
             - "geneval"
             - "hps"
             - "parti"
     Returns:
         An instance of the requested benchmark implementation
     Raises:
         ValueError: If an invalid benchmark type is provided
     """
         "hps": HPSPrompts,
         "parti": PartiPrompts,
     }
     if benchmark_type not in benchmark_map:
+        raise ValueError(
+            f"Invalid benchmark type: {benchmark_type}. Must be one of {list(benchmark_map.keys())}"
+        )
     return benchmark_map[benchmark_type]()

benchmark/draw_bench.py CHANGED Viewed

@@ -7,15 +7,15 @@ from datasets import load_dataset
 class DrawBenchPrompts:
     def __init__(self):
         self.dataset = load_dataset("shunk031/DrawBench")["test"]
     def __iter__(self) -> Iterator[Tuple[str, Path]]:
         for i, row in enumerate(self.dataset):
             yield row["prompts"], Path(f"{i}.png")
     @property
     def name(self) -> str:
         return "draw_bench"
     @property
     def size(self) -> int:
         return len(self.dataset)

 class DrawBenchPrompts:
     def __init__(self):
         self.dataset = load_dataset("shunk031/DrawBench")["test"]
     def __iter__(self) -> Iterator[Tuple[str, Path]]:
         for i, row in enumerate(self.dataset):
             yield row["prompts"], Path(f"{i}.png")
     @property
     def name(self) -> str:
         return "draw_bench"
     @property
     def size(self) -> int:
         return len(self.dataset)

benchmark/genai_bench.py CHANGED Viewed

@@ -8,8 +8,8 @@ class GenAIBenchPrompts:
     def __init__(self):
         super().__init__()
         self._download_genai_bench_files()
-        prompts_path = Path('downloads/genai_bench/prompts.txt')
-        with open(prompts_path, 'r') as f:
             self.prompts = [line.strip() for line in f if line.strip()]
     def __iter__(self) -> Iterator[Tuple[str, Path]]:
@@ -17,13 +17,13 @@ class GenAIBenchPrompts:
             yield prompt, Path(f"{i}.png")
     def _download_genai_bench_files(self) -> None:
-        folder_name = Path('downloads/genai_bench')
         folder_name.mkdir(parents=True, exist_ok=True)
         prompts_url = "https://huggingface.co/datasets/zhiqiulin/GenAI-Bench-527/raw/main/prompts.txt"
         prompts_path = folder_name / "prompts.txt"
         if not prompts_path.exists():
             response = requests.get(prompts_url)
-            with open(prompts_path, 'w') as f:
                 f.write(response.text)
     @property

     def __init__(self):
         super().__init__()
         self._download_genai_bench_files()
+        prompts_path = Path("downloads/genai_bench/prompts.txt")
+        with open(prompts_path, "r") as f:
             self.prompts = [line.strip() for line in f if line.strip()]
     def __iter__(self) -> Iterator[Tuple[str, Path]]:
             yield prompt, Path(f"{i}.png")
     def _download_genai_bench_files(self) -> None:
+        folder_name = Path("downloads/genai_bench")
         folder_name.mkdir(parents=True, exist_ok=True)
         prompts_url = "https://huggingface.co/datasets/zhiqiulin/GenAI-Bench-527/raw/main/prompts.txt"
         prompts_path = folder_name / "prompts.txt"
         if not prompts_path.exists():
             response = requests.get(prompts_url)
+            with open(prompts_path, "w") as f:
                 f.write(response.text)
     @property

benchmark/geneval.py CHANGED Viewed

@@ -9,32 +9,32 @@ class GenEvalPrompts:
     def __init__(self):
         super().__init__()
         self._download_geneval_file()
-        metadata_path = Path('downloads/geneval/evaluation_metadata.jsonl')
         self.entries: List[Dict[str, Any]] = []
-        with open(metadata_path, 'r') as f:
             for line in f:
                 if line.strip():
                     self.entries.append(json.loads(line))
     def __iter__(self) -> Iterator[Tuple[Dict[str, Any], Path]]:
         for i, entry in enumerate(self.entries):
             folder_name = f"{i:05d}"
             yield entry, folder_name
     def _download_geneval_file(self) -> None:
-        folder_name = Path('downloads/geneval')
         folder_name.mkdir(parents=True, exist_ok=True)
         metadata_url = "https://raw.githubusercontent.com/djghosh13/geneval/main/prompts/evaluation_metadata.jsonl"
         metadata_path = folder_name / "evaluation_metadata.jsonl"
         if not metadata_path.exists():
             response = requests.get(metadata_url)
-            with open(metadata_path, 'w') as f:
                 f.write(response.text)
     @property
     def name(self) -> str:
         return "geneval"
     @property
     def size(self) -> int:
         return len(self.entries)

     def __init__(self):
         super().__init__()
         self._download_geneval_file()
+        metadata_path = Path("downloads/geneval/evaluation_metadata.jsonl")
         self.entries: List[Dict[str, Any]] = []
+        with open(metadata_path, "r") as f:
             for line in f:
                 if line.strip():
                     self.entries.append(json.loads(line))
     def __iter__(self) -> Iterator[Tuple[Dict[str, Any], Path]]:
         for i, entry in enumerate(self.entries):
             folder_name = f"{i:05d}"
             yield entry, folder_name
     def _download_geneval_file(self) -> None:
+        folder_name = Path("downloads/geneval")
         folder_name.mkdir(parents=True, exist_ok=True)
         metadata_url = "https://raw.githubusercontent.com/djghosh13/geneval/main/prompts/evaluation_metadata.jsonl"
         metadata_path = folder_name / "evaluation_metadata.jsonl"
         if not metadata_path.exists():
             response = requests.get(metadata_url)
+            with open(metadata_path, "w") as f:
                 f.write(response.text)
     @property
     def name(self) -> str:
         return "geneval"
     @property
     def size(self) -> int:
         return len(self.entries)

benchmark/hps.py CHANGED Viewed

@@ -9,13 +9,18 @@ import huggingface_hub
 class HPSPrompts:
     def __init__(self):
         super().__init__()
-        self.hps_prompt_files = ['anime.json', 'concept-art.json', 'paintings.json', 'photo.json']
         self._download_benchmark_prompts()
         self.prompts: Dict[str, str] = {}
         self._size = 0
         for file in self.hps_prompt_files:
-            category = file.replace('.json', '')
-            with open(os.path.join('downloads/hps', file), 'r') as f:
                 prompts = json.load(f)
                 for i, prompt in enumerate(prompts):
                     if i == 100:
@@ -23,24 +28,26 @@ class HPSPrompts:
                     filename = f"{category}_{i:03d}.png"
                     self.prompts[filename] = prompt
                     self._size += 1
     def __iter__(self) -> Iterator[Tuple[str, Path]]:
         for filename, prompt in self.prompts.items():
             yield prompt, Path(filename)
     @property
     def name(self) -> str:
         return "hps"
     @property
     def size(self) -> int:
         return self._size
     def _download_benchmark_prompts(self) -> None:
-        folder_name = Path('downloads/hps')
         folder_name.mkdir(parents=True, exist_ok=True)
         for file in self.hps_prompt_files:
-            file_name = huggingface_hub.hf_hub_download("zhwang/HPDv2", file, subfolder="benchmark", repo_type="dataset")
             if not os.path.exists(os.path.join(folder_name, file)):
                 os.symlink(file_name, os.path.join(folder_name, file))

 class HPSPrompts:
     def __init__(self):
         super().__init__()
+        self.hps_prompt_files = [
+            "anime.json",
+            "concept-art.json",
+            "paintings.json",
+            "photo.json",
+        ]
         self._download_benchmark_prompts()
         self.prompts: Dict[str, str] = {}
         self._size = 0
         for file in self.hps_prompt_files:
+            category = file.replace(".json", "")
+            with open(os.path.join("downloads/hps", file), "r") as f:
                 prompts = json.load(f)
                 for i, prompt in enumerate(prompts):
                     if i == 100:
                     filename = f"{category}_{i:03d}.png"
                     self.prompts[filename] = prompt
                     self._size += 1
     def __iter__(self) -> Iterator[Tuple[str, Path]]:
         for filename, prompt in self.prompts.items():
             yield prompt, Path(filename)
     @property
     def name(self) -> str:
         return "hps"
     @property
     def size(self) -> int:
         return self._size
     def _download_benchmark_prompts(self) -> None:
+        folder_name = Path("downloads/hps")
         folder_name.mkdir(parents=True, exist_ok=True)
         for file in self.hps_prompt_files:
+            file_name = huggingface_hub.hf_hub_download(
+                "zhwang/HPDv2", file, subfolder="benchmark", repo_type="dataset"
+            )
             if not os.path.exists(os.path.join(folder_name, file)):
                 os.symlink(file_name, os.path.join(folder_name, file))

benchmark/metrics/arniqa.py CHANGED Viewed

@@ -8,20 +8,29 @@ from torchmetrics.image.arniqa import ARNIQA
 class ARNIQAMetric:
     def __init__(self):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.metric = ARNIQA(
             regressor_dataset="koniq10k",
             reduction="mean",
             normalize=True,
-            autocast=False
         )
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "arniqa"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
-        image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0
         image_tensor = image_tensor.unsqueeze(0).to(self.device)
         score = self.metric(image_tensor)
         return {"arniqa": score.item()}

 class ARNIQAMetric:
     def __init__(self):
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
         self.metric = ARNIQA(
             regressor_dataset="koniq10k",
             reduction="mean",
             normalize=True,
+            autocast=False,
         )
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "arniqa"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
+        image_tensor = (
+            torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0
+        )
         image_tensor = image_tensor.unsqueeze(0).to(self.device)
         score = self.metric(image_tensor)
         return {"arniqa": score.item()}

benchmark/metrics/clip.py CHANGED Viewed

@@ -8,14 +8,20 @@ from torchmetrics.multimodal.clip_score import CLIPScore
 class CLIPMetric:
     def __init__(self, model_name_or_path: str = "openai/clip-vit-large-patch14"):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.metric = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14")
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "clip"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
         image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float()
         image_tensor = image_tensor.to(self.device)

 class CLIPMetric:
     def __init__(self, model_name_or_path: str = "openai/clip-vit-large-patch14"):
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
         self.metric = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14")
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "clip"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
         image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float()
         image_tensor = image_tensor.to(self.device)

benchmark/metrics/clip_iqa.py CHANGED Viewed

@@ -8,18 +8,22 @@ from torchmetrics.multimodal import CLIPImageQualityAssessment
 class CLIPIQAMetric:
     def __init__(self):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.metric = CLIPImageQualityAssessment(
-            model_name_or_path="clip_iqa",
-            data_range=255.0,
-            prompts=("quality",)
         )
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "clip_iqa"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
         image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float()
         image_tensor = image_tensor.unsqueeze(0)

 class CLIPIQAMetric:
     def __init__(self):
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
         self.metric = CLIPImageQualityAssessment(
+            model_name_or_path="clip_iqa", data_range=255.0, prompts=("quality",)
         )
         self.metric.to(self.device)
     @property
     def name(self) -> str:
         return "clip_iqa"
     def compute_score(self, image: Image.Image, prompt: str) -> Dict[str, float]:
         image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float()
         image_tensor = image_tensor.unsqueeze(0)

benchmark/metrics/hps.py CHANGED Viewed

@@ -1,26 +1,32 @@
 import os
 from typing import Dict
 import torch
-from PIL import Image
 from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
-import huggingface_hub
-from hpsv2.utils import root_path, hps_version_map
 class HPSMetric:
     def __init__(self):
         self.hps_version = "v2.1"
-        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
         self.model_dict = {}
         self._initialize_model()
     def _initialize_model(self):
         if not self.model_dict:
             model, preprocess_train, preprocess_val = create_model_and_transforms(
-                'ViT-H-14',
-                'laion2B-s32B-b79K',
-                precision='amp',
                 device=self.device,
                 jit=False,
                 force_quick_gelu=False,
@@ -34,44 +40,53 @@ class HPSMetric:
                 aug_cfg={},
                 output_dict=True,
                 with_score_predictor=False,
-                with_region_predictor=False
             )
-            self.model_dict['model'] = model
-            self.model_dict['preprocess_val'] = preprocess_val
             # Load checkpoint
             if not os.path.exists(root_path):
                 os.makedirs(root_path)
-            cp = huggingface_hub.hf_hub_download("xswu/HPSv2", hps_version_map[self.hps_version])
             checkpoint = torch.load(cp, map_location=self.device)
-            model.load_state_dict(checkpoint['state_dict'])
-            self.tokenizer = get_tokenizer('ViT-H-14')
             model = model.to(self.device)
             model.eval()
     @property
     def name(self) -> str:
         return "hps"
     def compute_score(
         self,
         image: Image.Image,
         prompt: str,
     ) -> Dict[str, float]:
-        model = self.model_dict['model']
-        preprocess_val = self.model_dict['preprocess_val']
         with torch.no_grad():
             # Process the image
-            image_tensor = preprocess_val(image).unsqueeze(0).to(device=self.device, non_blocking=True)
             # Process the prompt
             text = self.tokenizer([prompt]).to(device=self.device, non_blocking=True)
             # Calculate the HPS
             with torch.cuda.amp.autocast():
                 outputs = model(image_tensor, text)
-                image_features, text_features = outputs["image_features"], outputs["text_features"]
                 logits_per_image = image_features @ text_features.T
                 hps_score = torch.diagonal(logits_per_image).cpu().numpy()
         return {"hps": float(hps_score[0])}

 import os
 from typing import Dict
+import huggingface_hub
 import torch
 from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
+from hpsv2.utils import hps_version_map, root_path
+from PIL import Image
 class HPSMetric:
     def __init__(self):
         self.hps_version = "v2.1"
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
         self.model_dict = {}
         self._initialize_model()
     def _initialize_model(self):
         if not self.model_dict:
             model, preprocess_train, preprocess_val = create_model_and_transforms(
+                "ViT-H-14",
+                "laion2B-s32B-b79K",
+                precision="amp",
                 device=self.device,
                 jit=False,
                 force_quick_gelu=False,
                 aug_cfg={},
                 output_dict=True,
                 with_score_predictor=False,
+                with_region_predictor=False,
             )
+            self.model_dict["model"] = model
+            self.model_dict["preprocess_val"] = preprocess_val
             # Load checkpoint
             if not os.path.exists(root_path):
                 os.makedirs(root_path)
+            cp = huggingface_hub.hf_hub_download(
+                "xswu/HPSv2", hps_version_map[self.hps_version]
+            )
             checkpoint = torch.load(cp, map_location=self.device)
+            model.load_state_dict(checkpoint["state_dict"])
+            self.tokenizer = get_tokenizer("ViT-H-14")
             model = model.to(self.device)
             model.eval()
     @property
     def name(self) -> str:
         return "hps"
     def compute_score(
         self,
         image: Image.Image,
         prompt: str,
     ) -> Dict[str, float]:
+        model = self.model_dict["model"]
+        preprocess_val = self.model_dict["preprocess_val"]
         with torch.no_grad():
             # Process the image
+            image_tensor = (
+                preprocess_val(image)
+                .unsqueeze(0)
+                .to(device=self.device, non_blocking=True)
+            )
             # Process the prompt
             text = self.tokenizer([prompt]).to(device=self.device, non_blocking=True)
             # Calculate the HPS
             with torch.cuda.amp.autocast():
                 outputs = model(image_tensor, text)
+                image_features, text_features = (
+                    outputs["image_features"],
+                    outputs["text_features"],
+                )
                 logits_per_image = image_features @ text_features.T
                 hps_score = torch.diagonal(logits_per_image).cpu().numpy()
         return {"hps": float(hps_score[0])}

benchmark/metrics/image_reward.py CHANGED Viewed

@@ -3,17 +3,26 @@ import tempfile
 from typing import Dict
 import ImageReward as RM
 from PIL import Image
 class ImageRewardMetric:
     def __init__(self):
-        self.model = RM.load("ImageReward-v1.0")
     @property
     def name(self) -> str:
         return "image_reward"
     def compute_score(
         self,
         image: Image.Image,

 from typing import Dict
 import ImageReward as RM
+import torch
 from PIL import Image
 class ImageRewardMetric:
     def __init__(self):
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
+        self.model = RM.load("ImageReward-v1.0", device=str(self.device))
     @property
     def name(self) -> str:
         return "image_reward"
     def compute_score(
         self,
         image: Image.Image,

benchmark/metrics/vqa.py CHANGED Viewed

@@ -2,15 +2,26 @@ from pathlib import Path
 from typing import Dict
 import t2v_metrics
 class VQAMetric:
     def __init__(self):
-        self.metric = t2v_metrics.VQAScore(model="clip-flant5-xxl")
     @property
     def name(self) -> str:
         return "vqa_score"
     def compute_score(
         self,
         image_path: Path,

 from typing import Dict
 import t2v_metrics
+import torch
 class VQAMetric:
     def __init__(self):
+        self.device = torch.device(
+            "cuda"
+            if torch.cuda.is_available()
+            else "mps"
+            if torch.backends.mps.is_available()
+            else "cpu"
+        )
+        self.metric = t2v_metrics.VQAScore(
+            model="clip-flant5-xxl", device=str(self.device)
+        )
     @property
     def name(self) -> str:
         return "vqa_score"
     def compute_score(
         self,
         image_path: Path,

benchmark/parti.py CHANGED Viewed

@@ -14,11 +14,11 @@ class PartiPrompts:
     def __iter__(self) -> Iterator[Tuple[str, Path]]:
         for i, prompt in enumerate(self.prompts):
             yield prompt, Path(f"{i}.png")
     @property
     def name(self) -> str:
         return "parti"
     @property
     def size(self) -> int:
         return len(self.prompts)

     def __iter__(self) -> Iterator[Tuple[str, Path]]:
         for i, prompt in enumerate(self.prompts):
             yield prompt, Path(f"{i}.png")
     @property
     def name(self) -> str:
         return "parti"
     @property
     def size(self) -> int:
         return len(self.prompts)

environment.yml CHANGED Viewed

@@ -12,7 +12,7 @@ dependencies:
   - tqdm
   - pip
   - pip:
-      - datasets>=3.5.0
       - fal-client>=0.5.9
       - hpsv2>=1.2.0
       - huggingface-hub>=0.30.2

   - tqdm
   - pip
   - pip:
+      - datasets==3.6.0
       - fal-client>=0.5.9
       - hpsv2>=1.2.0
       - huggingface-hub>=0.30.2

evaluate.py CHANGED Viewed

@@ -1,59 +1,65 @@
 import argparse
 import json
 from pathlib import Path
 from typing import Dict
-import warnings
-from benchmark import create_benchmark
-from benchmark.metrics import create_metric
 import numpy as np
 from PIL import Image
 from tqdm import tqdm
 warnings.filterwarnings("ignore", category=FutureWarning)
-def evaluate_benchmark(benchmark_type: str, api_type: str, images_dir: Path = Path("images")) -> Dict:
     """
     Evaluate a benchmark's images using its specific metrics.
     Args:
         benchmark_type (str): Type of benchmark to evaluate
         api_type (str): Type of API used to generate images
         images_dir (Path): Base directory containing generated images
     Returns:
         Dict containing evaluation results
     """
     benchmark = create_benchmark(benchmark_type)
     benchmark_dir = images_dir / api_type / benchmark_type
     metadata_file = benchmark_dir / "metadata.jsonl"
     if not metadata_file.exists():
-        raise FileNotFoundError(f"No metadata file found for {api_type}/{benchmark_type}. Please run sample.py first.")
     metadata = []
     with open(metadata_file, "r") as f:
         for line in f:
             metadata.append(json.loads(line))
-    metrics = {metric_type: create_metric(metric_type) for metric_type in benchmark.metrics}
     results = {
         "api": api_type,
         "benchmark": benchmark_type,
         "metrics": {metric: 0.0 for metric in benchmark.metrics},
-        "total_images": len(metadata)
     }
     inference_times = []
     for entry in tqdm(metadata):
         image_path = benchmark_dir / entry["filepath"]
         if not image_path.exists():
             continue
         for metric_type, metric in metrics.items():
             try:
                 if metric_type == "vqa":
@@ -64,26 +70,30 @@ def evaluate_benchmark(benchmark_type: str, api_type: str, images_dir: Path = Pa
                 results["metrics"][metric_type] += score[metric_type]
             except Exception as e:
                 print(f"Error computing {metric_type} for {image_path}: {str(e)}")
         inference_times.append(entry["inference_time"])
     for metric in results["metrics"]:
         results["metrics"][metric] /= len(metadata)
     results["median_inference_time"] = np.median(inference_times).item()
     return results
 def main():
-    parser = argparse.ArgumentParser(description="Evaluate generated images using benchmark-specific metrics")
     parser.add_argument("api_type", help="Type of API to evaluate")
-    parser.add_argument("benchmarks", nargs="+", help="List of benchmark types to evaluate")
     args = parser.parse_args()
     results_dir = Path("evaluation_results")
     results_dir.mkdir(exist_ok=True)
     results_file = results_dir / f"{args.api_type}.jsonl"
     existing_results = set()
@@ -97,15 +107,15 @@ def main():
         if benchmark_type in existing_results:
             print(f"Skipping {args.api_type}/{benchmark_type} - already evaluated")
             continue
         try:
             print(f"Evaluating {args.api_type}/{benchmark_type}")
             results = evaluate_benchmark(benchmark_type, args.api_type)
             # Append results to file
             with open(results_file, "a") as f:
                 f.write(json.dumps(results) + "\n")
         except Exception as e:
             print(f"Error evaluating {args.api_type}/{benchmark_type}: {str(e)}")

 import argparse
 import json
+import warnings
 from pathlib import Path
 from typing import Dict
 import numpy as np
 from PIL import Image
 from tqdm import tqdm
+from benchmark import create_benchmark
+from benchmark.metrics import create_metric
 warnings.filterwarnings("ignore", category=FutureWarning)
+def evaluate_benchmark(
+    benchmark_type: str, api_type: str, images_dir: Path = Path("images")
+) -> Dict:
     """
     Evaluate a benchmark's images using its specific metrics.
     Args:
         benchmark_type (str): Type of benchmark to evaluate
         api_type (str): Type of API used to generate images
         images_dir (Path): Base directory containing generated images
     Returns:
         Dict containing evaluation results
     """
     benchmark = create_benchmark(benchmark_type)
     benchmark_dir = images_dir / api_type / benchmark_type
     metadata_file = benchmark_dir / "metadata.jsonl"
     if not metadata_file.exists():
+        raise FileNotFoundError(
+            f"No metadata file found for {api_type}/{benchmark_type}. Please run sample.py first."
+        )
     metadata = []
     with open(metadata_file, "r") as f:
         for line in f:
             metadata.append(json.loads(line))
+    metrics = {
+        metric_type: create_metric(metric_type) for metric_type in benchmark.metrics
+    }
     results = {
         "api": api_type,
         "benchmark": benchmark_type,
         "metrics": {metric: 0.0 for metric in benchmark.metrics},
+        "total_images": len(metadata),
     }
     inference_times = []
     for entry in tqdm(metadata):
         image_path = benchmark_dir / entry["filepath"]
         if not image_path.exists():
             continue
         for metric_type, metric in metrics.items():
             try:
                 if metric_type == "vqa":
                 results["metrics"][metric_type] += score[metric_type]
             except Exception as e:
                 print(f"Error computing {metric_type} for {image_path}: {str(e)}")
         inference_times.append(entry["inference_time"])
     for metric in results["metrics"]:
         results["metrics"][metric] /= len(metadata)
     results["median_inference_time"] = np.median(inference_times).item()
     return results
 def main():
+    parser = argparse.ArgumentParser(
+        description="Evaluate generated images using benchmark-specific metrics"
+    )
     parser.add_argument("api_type", help="Type of API to evaluate")
+    parser.add_argument(
+        "benchmarks", nargs="+", help="List of benchmark types to evaluate"
+    )
     args = parser.parse_args()
     results_dir = Path("evaluation_results")
     results_dir.mkdir(exist_ok=True)
     results_file = results_dir / f"{args.api_type}.jsonl"
     existing_results = set()
         if benchmark_type in existing_results:
             print(f"Skipping {args.api_type}/{benchmark_type} - already evaluated")
             continue
         try:
             print(f"Evaluating {args.api_type}/{benchmark_type}")
             results = evaluate_benchmark(benchmark_type, args.api_type)
             # Append results to file
             with open(results_file, "a") as f:
                 f.write(json.dumps(results) + "\n")
         except Exception as e:
             print(f"Error evaluating {args.api_type}/{benchmark_type}: {str(e)}")

nils_installs.txt DELETED Viewed

@@ -1,177 +0,0 @@
-accelerate==1.7.0
-aiohappyeyeballs==2.6.1
-aiohttp==3.12.12
-aiosignal==1.3.2
-annotated-types==0.7.0
-antlr4-python3-runtime==4.9.3
-anyio==4.9.0
-args==0.1.0
-asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1733250440834/work
-attrs==25.3.0
-beautifulsoup4==4.13.4
-boto3==1.38.33
-botocore==1.38.33
-braceexpand==0.1.7
-Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1749229842835/work
-certifi @ file:///home/conda/feedstock_root/build_artifacts/certifi_1746569525376/work/certifi
-cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1725560558132/work
-charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1746214863626/work
-click==8.1.8
-clint==0.5.1
-clip==0.2.0
-colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1733218098505/work
-comm @ file:///home/conda/feedstock_root/build_artifacts/comm_1733502965406/work
-contourpy==1.3.2
-cycler==0.12.1
-datasets==3.6.0
-debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1744321241074/work
-decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1740384970518/work
-diffusers==0.31.0
-dill==0.3.8
-distro==1.9.0
-einops==0.8.1
-eval_type_backport==0.2.2
-exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1746947292760/work
-executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1745502089858/work
-fairscale==0.4.13
-fal_client==0.7.0
-filelock==3.18.0
-fire==0.4.0
-fonttools==4.58.2
-frozenlist==1.7.0
-fsspec==2025.3.0
-ftfy==6.3.1
-gdown==5.2.0
-h11==0.16.0
-h2 @ file:///home/conda/feedstock_root/build_artifacts/h2_1738578511449/work
-hf-xet==1.1.3
-hpack @ file:///home/conda/feedstock_root/build_artifacts/hpack_1737618293087/work
-hpsv2==1.2.0
-httpcore==1.0.9
-httpx==0.28.1
-httpx-sse==0.4.0
-huggingface-hub==0.32.5
-hyperframe @ file:///home/conda/feedstock_root/build_artifacts/hyperframe_1737618333194/work
-idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1733211830134/work
-image-reward==1.5
-importlib_metadata==8.7.0
-iniconfig==2.1.0
-iopath==0.1.10
-ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1719845459717/work
-ipython @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_ipython_1748713870/work
-ipython_pygments_lexers @ file:///home/conda/feedstock_root/build_artifacts/ipython_pygments_lexers_1737123620466/work
-jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1733300866624/work
-Jinja2==3.1.6
-jiter==0.10.0
-jmespath==1.0.1
-joblib==1.5.1
-jupyter_client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1733440914442/work
-jupyter_core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1748333051527/work
-kiwisolver==1.4.8
-lightning-utilities==0.14.3
-markdown-it-py==3.0.0
-MarkupSafe==3.0.2
-matplotlib==3.10.3
-matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1733416936468/work
-mdurl==0.1.2
-mpmath==1.3.0
-multidict==6.4.4
-multiprocess==0.70.16
-nest_asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1733325553580/work
-networkx==3.5
-numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1749430504934/work/dist/numpy-2.3.0-cp312-cp312-linux_x86_64.whl#sha256=3c4437a0cbe50dbae872ad4cd8dc5316009165bce459c4ffe2c46cd30aba13d4
-nvidia-cublas-cu12==12.6.4.1
-nvidia-cuda-cupti-cu12==12.6.80
-nvidia-cuda-nvrtc-cu12==12.6.77
-nvidia-cuda-runtime-cu12==12.6.77
-nvidia-cudnn-cu12==9.5.1.17
-nvidia-cufft-cu12==11.3.0.4
-nvidia-cufile-cu12==1.11.1.6
-nvidia-curand-cu12==10.3.7.77
-nvidia-cusolver-cu12==11.7.1.2
-nvidia-cusparse-cu12==12.5.4.2
-nvidia-cusparselt-cu12==0.6.3
-nvidia-nccl-cu12==2.26.2
-nvidia-nvjitlink-cu12==12.6.85
-nvidia-nvtx-cu12==12.6.77
-omegaconf==2.3.0
-open_clip_torch==2.32.0
-openai==1.85.0
-opencv-python==4.11.0
-opencv-python-headless==4.11.0
-packaging==25.0
-pandas==2.3.0
-parso @ file:///home/conda/feedstock_root/build_artifacts/parso_1733271261340/work
-pexpect @ file:///home/conda/feedstock_root/build_artifacts/pexpect_1733301927746/work
-pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1733327343728/work
-pillow @ file:///home/conda/feedstock_root/build_artifacts/pillow_1746646208260/work
-piq==0.8.0
-platformdirs @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_platformdirs_1746710438/work
-pluggy==1.6.0
-portalocker==3.1.1
-prompt_toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1744724089886/work
-propcache==0.3.2
-protobuf==3.20.3
-psutil==7.0.0
-ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1733302279685/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl#sha256=92c32ff62b5fd8cf325bec5ab90d7be3d2a8ca8c8a3813ff487a8d2002630d1f
-pure_eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1733569405015/work
-pyarrow==20.0.0
-pycocoevalcap==1.2
-pycocotools==2.0.10
-pycparser @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pycparser_1733195786/work
-pydantic==2.11.5
-pydantic_core==2.33.2
-Pygments==2.19.1
-pyparsing==3.2.3
-PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1733217236728/work
-pytest==7.2.0
-pytest-split==0.8.0
-python-dateutil==2.9.0.post0
-python-dotenv @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_python-dotenv_1742948348/work
-pytz==2025.2
-PyYAML==6.0.2
-pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1743831245863/work
-regex==2024.11.6
-replicate==1.0.7
-requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1749498106507/work
-rich==14.0.0
-s3transfer==0.13.0
-safetensors==0.5.3
-scikit-learn==1.7.0
-scipy==1.15.3
-sentencepiece==0.2.0
-setuptools==80.9.0
-shellingham==1.5.4
-six==1.17.0
-sniffio==1.3.1
-soupsieve==2.7
-stack_data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1733569443808/work
-sympy==1.14.0
-t2v_metrics==1.2
-tabulate==0.9.0
-termcolor==3.1.0
-threadpoolctl==3.6.0
-tiktoken==0.9.0
-timm==0.6.13
-together==1.5.11
-tokenizers==0.15.2
-torch==2.7.1
-torchmetrics==1.7.2
-torchvision==0.22.1
-tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1748003300911/work
-tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1735661334605/work
-traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1733367359838/work
-transformers==4.36.1
-triton==3.3.1
-typer==0.15.4
-typing-inspection==0.4.1
-typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_typing_extensions_1748959427/work
-tzdata==2025.2
-urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1744323578849/work
-wcwidth==0.2.13
-webdataset==0.2.111
-wheel==0.45.1
-xxhash==3.5.0
-yarl==1.20.1
-zipp==3.23.0
-zstandard==0.23.0

sample.py CHANGED Viewed

@@ -12,19 +12,19 @@ from benchmark import create_benchmark
 def generate_images(api_type: str, benchmarks: List[str]):
     images_dir = Path("images")
     api = create_api(api_type)
     api_dir = images_dir / api_type
     api_dir.mkdir(parents=True, exist_ok=True)
     for benchmark_type in tqdm(benchmarks, desc="Processing benchmarks"):
         print(f"\nProcessing benchmark: {benchmark_type}")
         benchmark = create_benchmark(benchmark_type)
         if benchmark_type == "geneval":
             benchmark_dir = api_dir / benchmark_type
             benchmark_dir.mkdir(parents=True, exist_ok=True)
             metadata_file = benchmark_dir / "metadata.jsonl"
             existing_metadata = {}
             if metadata_file.exists():
@@ -32,35 +32,44 @@ def generate_images(api_type: str, benchmarks: List[str]):
                     for line in f:
                         entry = json.loads(line)
                         existing_metadata[entry["filepath"]] = entry
-            for metadata, folder_name in tqdm(benchmark, desc=f"Generating images for {benchmark_type}", leave=False):
-                sample_path = benchmark_dir / folder_name
-                samples_path = sample_path / "samples"
-                samples_path.mkdir(parents=True, exist_ok=True)
-                image_path = samples_path / "0000.png"
-                if image_path.exists():
-                    continue
-                try:
-                    inference_time = api.generate_image(metadata["prompt"], image_path)
-                    metadata_entry = {
-                        "filepath": str(image_path),
-                        "prompt": metadata["prompt"],
-                        "inference_time": inference_time
-                    }
-                    existing_metadata[str(image_path)] = metadata_entry
-                except Exception as e:
-                    print(f"\nError generating image for prompt: {metadata['prompt']}")
-                    print(f"Error: {str(e)}")
-                    continue
         else:
             benchmark_dir = api_dir / benchmark_type
             benchmark_dir.mkdir(parents=True, exist_ok=True)
             metadata_file = benchmark_dir / "metadata.jsonl"
             existing_metadata = {}
             if metadata_file.exists():
@@ -68,41 +77,47 @@ def generate_images(api_type: str, benchmarks: List[str]):
                     for line in f:
                         entry = json.loads(line)
                         existing_metadata[entry["filepath"]] = entry
-            for prompt, image_path in tqdm(benchmark, desc=f"Generating images for {benchmark_type}", leave=False):
-                full_image_path = benchmark_dir / image_path
-                if full_image_path.exists():
-                    continue
-                try:
-                    inference_time = api.generate_image(prompt, full_image_path)
-                    metadata_entry = {
-                        "filepath": str(image_path),
-                        "prompt": prompt,
-                        "inference_time": inference_time
-                    }
-                    existing_metadata[str(image_path)] = metadata_entry
-                except Exception as e:
-                    print(f"\nError generating image for prompt: {prompt}")
-                    print(f"Error: {str(e)}")
-                    continue
-            with open(metadata_file, "w") as f:
-                for entry in existing_metadata.values():
-                    f.write(json.dumps(entry) + "\n")
 def main():
-    parser = argparse.ArgumentParser(description="Generate images for specified benchmarks using a given API")
     parser.add_argument("api_type", help="Type of API to use for image generation")
     parser.add_argument("benchmarks", nargs="+", help="List of benchmark types to run")
     args = parser.parse_args()
     generate_images(args.api_type, args.benchmarks)

 def generate_images(api_type: str, benchmarks: List[str]):
     images_dir = Path("images")
     api = create_api(api_type)
     api_dir = images_dir / api_type
     api_dir.mkdir(parents=True, exist_ok=True)
     for benchmark_type in tqdm(benchmarks, desc="Processing benchmarks"):
         print(f"\nProcessing benchmark: {benchmark_type}")
         benchmark = create_benchmark(benchmark_type)
         if benchmark_type == "geneval":
             benchmark_dir = api_dir / benchmark_type
             benchmark_dir.mkdir(parents=True, exist_ok=True)
             metadata_file = benchmark_dir / "metadata.jsonl"
             existing_metadata = {}
             if metadata_file.exists():
                     for line in f:
                         entry = json.loads(line)
                         existing_metadata[entry["filepath"]] = entry
+            with open(metadata_file, "a") as f:
+                for metadata, folder_name in tqdm(
+                    benchmark,
+                    desc=f"Generating images for {benchmark_type}",
+                    leave=False,
+                ):
+                    sample_path = benchmark_dir / folder_name
+                    samples_path = sample_path / "samples"
+                    samples_path.mkdir(parents=True, exist_ok=True)
+                    image_path = samples_path / "0000.png"
+                    if image_path.exists():
+                        continue
+                    try:
+                        inference_time = api.generate_image(
+                            metadata["prompt"], image_path
+                        )
+                        metadata_entry = {
+                            "filepath": str(image_path),
+                            "prompt": metadata["prompt"],
+                            "inference_time": inference_time,
+                        }
+                        f.write(json.dumps(metadata_entry) + "\n")
+                    except Exception as e:
+                        print(
+                            f"\nError generating image for prompt: {metadata['prompt']}"
+                        )
+                        print(f"Error: {str(e)}")
+                        continue
         else:
             benchmark_dir = api_dir / benchmark_type
             benchmark_dir.mkdir(parents=True, exist_ok=True)
             metadata_file = benchmark_dir / "metadata.jsonl"
             existing_metadata = {}
             if metadata_file.exists():
                     for line in f:
                         entry = json.loads(line)
                         existing_metadata[entry["filepath"]] = entry
+            with open(metadata_file, "a") as f:
+                for prompt, image_path in tqdm(
+                    benchmark,
+                    desc=f"Generating images for {benchmark_type}",
+                    leave=False,
+                ):
+                    if image_path in existing_metadata:
+                        continue
+                    full_image_path = benchmark_dir / image_path
+                    if full_image_path.exists():
+                        continue
+                    try:
+                        inference_time = api.generate_image(prompt, full_image_path)
+                        metadata_entry = {
+                            "filepath": str(image_path),
+                            "prompt": prompt,
+                            "inference_time": inference_time,
+                        }
+                        f.write(json.dumps(metadata_entry) + "\n")
+                    except Exception as e:
+                        print(f"\nError generating image for prompt: {prompt}")
+                        print(f"Error: {str(e)}")
+                        continue
 def main():
+    parser = argparse.ArgumentParser(
+        description="Generate images for specified benchmarks using a given API"
+    )
     parser.add_argument("api_type", help="Type of API to use for image generation")
     parser.add_argument("benchmarks", nargs="+", help="List of benchmark types to run")
     args = parser.parse_args()
     generate_images(args.api_type, args.benchmarks)