Spaces:
Running
Running
Commit
·
bcec9c2
1
Parent(s):
8c2887a
feat: add Gradio dashboard and leaderboard functionality with updated dependencies
Browse files- README.md +34 -0
- dashboard/app.py +195 -0
- dashboard/data/text_to_image.jsonl +9 -0
- pyproject.toml +2 -0
README.md
CHANGED
@@ -1,4 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# InferBench
|
|
|
2 |
Evaluate the quality and efficiency of image gen api's.
|
3 |
|
4 |
## Installation
|
@@ -35,3 +51,21 @@ This is how you would evaluate the benchmarks once you have all images:
|
|
35 |
```
|
36 |
python evaluate.py replicate draw_bench genai_bench geneval hps parti
|
37 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: InferBench
|
3 |
+
emoji: 🥇
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
app_file: dashboard/app.py
|
8 |
+
pinned: true
|
9 |
+
license: apache-2.0
|
10 |
+
short_description: A cost/quality/speed Leaderboard for Inference Providers!
|
11 |
+
sdk_version: 5.19.0
|
12 |
+
tags:
|
13 |
+
- leaderboard
|
14 |
+
---
|
15 |
+
|
16 |
# InferBench
|
17 |
+
|
18 |
Evaluate the quality and efficiency of image gen api's.
|
19 |
|
20 |
## Installation
|
|
|
51 |
```
|
52 |
python evaluate.py replicate draw_bench genai_bench geneval hps parti
|
53 |
```
|
54 |
+
|
55 |
+
## Dashboard
|
56 |
+
|
57 |
+
To run the dashboard, you can use the following command:
|
58 |
+
|
59 |
+
```
|
60 |
+
python dashboard/app.py
|
61 |
+
```
|
62 |
+
|
63 |
+
To deploy the dashboard, you can use the following commands
|
64 |
+
|
65 |
+
````
|
66 |
+
git remote add hf https://huggingface.co/spaces/PrunaAI/InferBench
|
67 |
+
````
|
68 |
+
|
69 |
+
```
|
70 |
+
git push hf main
|
71 |
+
```
|
dashboard/app.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import pandas as pd
|
5 |
+
from gradio_leaderboard import Leaderboard
|
6 |
+
|
7 |
+
custom_css = """
|
8 |
+
.logo {
|
9 |
+
width: 300px;
|
10 |
+
height: auto;
|
11 |
+
max-width: 100%;
|
12 |
+
margin: 0 auto;
|
13 |
+
object-fit: contain;
|
14 |
+
padding-bottom: 0;
|
15 |
+
}
|
16 |
+
.text {
|
17 |
+
font-size: 16px !important;
|
18 |
+
}
|
19 |
+
.tabs button {
|
20 |
+
font-size: 20px;
|
21 |
+
}
|
22 |
+
.subtabs button {
|
23 |
+
font-size: 20px;
|
24 |
+
}
|
25 |
+
h1, h2 {
|
26 |
+
margin: 0;
|
27 |
+
padding-top: 0;
|
28 |
+
}
|
29 |
+
"""
|
30 |
+
|
31 |
+
# override method to avoid bugg
|
32 |
+
Leaderboard.raise_error_if_incorrect_config = lambda self: None
|
33 |
+
|
34 |
+
abs_path = Path(__file__).parent / "data"
|
35 |
+
|
36 |
+
# Load the JSONL file into a pandas DataFrame using the json library
|
37 |
+
df = pd.read_json(abs_path / "text_to_image.jsonl", lines=True)
|
38 |
+
|
39 |
+
df["URL"] = df.apply(
|
40 |
+
lambda row: f'<a target="_blank" href="{row["URL"]}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>',
|
41 |
+
axis=1,
|
42 |
+
)
|
43 |
+
df = df[
|
44 |
+
[
|
45 |
+
"URL",
|
46 |
+
"Platform",
|
47 |
+
"Owner",
|
48 |
+
"Device",
|
49 |
+
"Model",
|
50 |
+
"Optimization",
|
51 |
+
"Median Inference Time",
|
52 |
+
"Price per Image",
|
53 |
+
]
|
54 |
+
+ [
|
55 |
+
col
|
56 |
+
for col in df.columns.tolist()
|
57 |
+
if col
|
58 |
+
not in [
|
59 |
+
"URL",
|
60 |
+
"Model",
|
61 |
+
"Median Inference Time",
|
62 |
+
"Price per Image",
|
63 |
+
"Platform",
|
64 |
+
"Owner",
|
65 |
+
"Device",
|
66 |
+
"Optimization",
|
67 |
+
]
|
68 |
+
]
|
69 |
+
]
|
70 |
+
df = df.sort_values(by="GenEval", ascending=False)
|
71 |
+
|
72 |
+
with gr.Blocks("ParityError/Interstellar", fill_width=True, css=custom_css) as demo:
|
73 |
+
gr.HTML(
|
74 |
+
"""
|
75 |
+
<div style="text-align: center;">
|
76 |
+
<img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/inferbench/logo2-cropped.png" style="width: 200px; height: auto; max-width: 100%; margin: 0 auto;">
|
77 |
+
<h1>🏋️ InferBench 🏋️</h1>
|
78 |
+
<h2>A cost/quality/speed Leaderboard for Inference Providers!</h2>
|
79 |
+
</div>
|
80 |
+
"""
|
81 |
+
)
|
82 |
+
with gr.Tabs():
|
83 |
+
with gr.TabItem("Text-to-Image Leaderboard"):
|
84 |
+
Leaderboard(
|
85 |
+
value=df,
|
86 |
+
select_columns=df.columns.tolist(),
|
87 |
+
datatype=[
|
88 |
+
"markdown",
|
89 |
+
"markdown",
|
90 |
+
"markdown",
|
91 |
+
"markdown",
|
92 |
+
"markdown",
|
93 |
+
"markdown",
|
94 |
+
]
|
95 |
+
+ ["number"] * (len(df.columns.tolist()) - 6),
|
96 |
+
filter_columns=[
|
97 |
+
"Platform",
|
98 |
+
"Owner",
|
99 |
+
"Device",
|
100 |
+
"Model",
|
101 |
+
"Optimization",
|
102 |
+
],
|
103 |
+
)
|
104 |
+
gr.Markdown(
|
105 |
+
"""
|
106 |
+
> **💡 Note:** Each efficiency metric and quality metric captures only one dimension of model capacity. Rankings may vary when considering other metrics.
|
107 |
+
"""
|
108 |
+
)
|
109 |
+
with gr.TabItem("About"):
|
110 |
+
with gr.Row():
|
111 |
+
with gr.Column():
|
112 |
+
gr.Markdown(
|
113 |
+
"""
|
114 |
+
# 📊 Text-to-Image Leaderboard
|
115 |
+
|
116 |
+
This leaderboard compares the performance of different text-to-image providers.
|
117 |
+
|
118 |
+
We started with a comprehensive benchmark comparing our very own FLUX-juiced with the “FLUX.1 [dev]” endpoints offered by:
|
119 |
+
|
120 |
+
- Replicate: https://replicate.com/black-forest-labs/flux-dev
|
121 |
+
- Fal: https://fal.ai/models/fal-ai/flux/dev
|
122 |
+
- Fireworks AI: https://fireworks.ai/models/fireworks/flux-1-dev-fp8
|
123 |
+
- Together AI: https://www.together.ai/models/flux-1-dev
|
124 |
+
|
125 |
+
We also included the following non-FLUX providers:
|
126 |
+
|
127 |
+
- AWS Nova Canvas: https://aws.amazon.com/ai/generative-ai/nova/creative/
|
128 |
+
|
129 |
+
All of these inference providers offer implementations but they don’t always communicate about the optimisation methods used in the background, and most endpoint have different response times and performance measures.
|
130 |
+
|
131 |
+
For comparison purposes we used the same generation set-up for all the providers.
|
132 |
+
|
133 |
+
- 28 inference steps
|
134 |
+
- 1024×1024 resolution
|
135 |
+
- Guidance scale of 3.5
|
136 |
+
- H100 GPU (80GB)—only reported by Replicate
|
137 |
+
|
138 |
+
Although we did test with this specific Pruna configuration and hardware, the applied compression methods work with different config and hardware too!
|
139 |
+
|
140 |
+
> We published a full blog post on [the creation of our FLUX-juiced endpoint](https://www.pruna.ai/blog/flux-juiced-the-fastest-image-generation-endpoint).
|
141 |
+
"""
|
142 |
+
)
|
143 |
+
with gr.Column():
|
144 |
+
gr.Markdown(
|
145 |
+
"""
|
146 |
+
# 🧃 FLUX.1-dev (juiced)
|
147 |
+
|
148 |
+
FLUX.1-dev (juiced) is our optimized version of FLUX.1-dev, delivering up to **2.6x faster inference** than the official Replicate API, **without sacrificing image quality**.
|
149 |
+
|
150 |
+
Under the hood, it uses a custom combination of:
|
151 |
+
|
152 |
+
- **Graph compilation** for optimized execution paths
|
153 |
+
- **Inference-time caching** for repeated operations
|
154 |
+
|
155 |
+
We won’t go deep into the internals here, but here’s the gist:
|
156 |
+
|
157 |
+
> We combine compiler-level execution graph optimization with selective caching of heavy operations (like attention layers), allowing inference to skip redundant computations without any loss in fidelity.
|
158 |
+
|
159 |
+
These techniques are generalized and plug-and-play via the **Pruna Pro** pipeline, and can be applied to nearly any diffusion-based image model—not just FLUX. For a free but still very juicy model you can use our open source solution.
|
160 |
+
|
161 |
+
> 🧪 Try FLUX-juiced now → [replicate.com/prunaai/flux.1-juiced](https://replicate.com/prunaai/flux.1-juiced)
|
162 |
+
|
163 |
+
## Sample Images
|
164 |
+
|
165 |
+
The prompts were randomly sampled from the [parti-prompts dataset](https://github.com/google-research/parti). The reported times represent the full duration of each API call.
|
166 |
+
|
167 |
+
> **For samples, check out the [Pruna Notion page](https://pruna.notion.site/FLUX-1-dev-vs-Pruna-s-FLUX-juiced-1d270a039e5f80c6a2a3c00fc0d75ef0)**
|
168 |
+
"""
|
169 |
+
)
|
170 |
+
|
171 |
+
with gr.Accordion("🌍 Join the Pruna AI community!", open=False):
|
172 |
+
gr.HTML(
|
173 |
+
"""
|
174 |
+
<a rel="nofollow" href="https://twitter.com/PrunaAI"><img alt="Twitter" src="https://img.shields.io/twitter/follow/PrunaAI?style=social"></a>
|
175 |
+
<a rel="nofollow" href="https://github.com/PrunaAI/pruna"><img alt="GitHub" src="https://img.shields.io/github/stars/prunaai/pruna"></a>
|
176 |
+
<a rel="nofollow" href="https://www.linkedin.com/company/93832878/admin/feed/posts/?feedType=following"><img alt="LinkedIn" src="https://img.shields.io/badge/LinkedIn-Connect-blue"></a>
|
177 |
+
<a rel="nofollow" href="https://discord.com/invite/rskEr4BZJx"><img alt="Discord" src="https://img.shields.io/badge/Discord-Join%20Us-blue?style=social&logo=discord"></a>
|
178 |
+
<a rel="nofollow" href="https://www.reddit.com/r/PrunaAI/"><img alt="Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/PrunaAI?style=social"></a>
|
179 |
+
"""
|
180 |
+
)
|
181 |
+
with gr.Accordion("Citation", open=True):
|
182 |
+
gr.Markdown(
|
183 |
+
"""
|
184 |
+
```bibtex
|
185 |
+
@article{InferBench,
|
186 |
+
title={InferBench: A Leaderboard for Inference Providers},
|
187 |
+
author={PrunaAI},
|
188 |
+
year={2025},
|
189 |
+
howpublished={\\url{https://huggingface.co/spaces/PrunaAI/InferBench}}
|
190 |
+
}
|
191 |
+
```
|
192 |
+
"""
|
193 |
+
)
|
194 |
+
if __name__ == "__main__":
|
195 |
+
demo.launch()
|
dashboard/data/text_to_image.jsonl
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"Platform": "Replicate", "Owner": "Pruna AI", "Device": "1xH100", "Model": "FLUX.1-dev", "Optimization": "none", "URL": "https://huggingface.co/black-forest-labs/FLUX.1-dev?library=diffusers", "GenEval": 67.98, "HPS (v2.1)": 30.36, "GenAI-Bench (VQA)": 0.74, "DrawBench (Image Reward)": 1.0072, "PartiPromts (ARNIQA)": 0.6758, "PartiPromts (ClipIQA)": 0.8968, "PartiPromts (ClipScore)": 27.4, "PartiPromts (Sharpness - Laplacian Variance)": 6833, "Median Inference Time": 6.88, "Price per Image": 0.025}
|
2 |
+
{"Platform": "fal.ai", "Owner": "fal.ai", "Device": "Undisclosed", "Model": "FLUX.1-dev", "Optimization": "Undisclosed", "URL": "https://fal.ai/models/fal-ai/flux/dev", "GenEval": 68.72, "HPS (v2.1)": 29.97, "GenAI-Bench (VQA)": 0.7441, "DrawBench (Image Reward)": 1.0084, "PartiPromts (ARNIQA)": 0.6702, "PartiPromts (ClipIQA)": 0.8967, "PartiPromts (ClipScore)": 27.61, "PartiPromts (Sharpness - Laplacian Variance)": 7295, "Median Inference Time": 4.06, "Price per Image": 0.025}
|
3 |
+
{"Platform": "Fireworks AI", "Owner": "Fireworks AI", "Device": "Undisclosed", "Model": "FLUX.1-dev", "Optimization": "fp8", "URL": "https://fireworks.ai/models/fireworks/flux-1-dev-fp8", "GenEval": 65.55, "HPS (v2.1)": 30.26, "GenAI-Bench (VQA)": 0.7455, "DrawBench (Image Reward)": 0.9467, "PartiPromts (ARNIQA)": 0.6639, "PartiPromts (ClipIQA)": 0.8478, "PartiPromts (ClipScore)": 27.24, "PartiPromts (Sharpness - Laplacian Variance)": 5625, "Median Inference Time": 4.66, "Price per Image": 0.014}
|
4 |
+
{"Platform": "Replicate", "Owner": "Pruna AI", "Device": "1xH100", "Model": "FLUX.1-dev", "Optimization": "extra juiced", "URL": "https://replicate.com/prunaai/flux.1-juiced", "GenEval": 69.9, "HPS (v2.1)": 29.86, "GenAI-Bench (VQA)": 0.7466, "DrawBench (Image Reward)": 0.9458, "PartiPromts (ARNIQA)": 0.6591, "PartiPromts (ClipIQA)": 0.8887, "PartiPromts (ClipScore)": 27.6, "PartiPromts (Sharpness - Laplacian Variance)": 7997, "Median Inference Time": 2.6, "Price per Image": 0.004}
|
5 |
+
{"Platform": "Replicate", "Owner": "Pruna AI", "Device": "1xH100", "Model": "FLUX.1-dev", "Optimization": "juiced", "URL": "https://replicate.com/prunaai/flux.1-juiced", "GenEval": 68.64, "HPS (v2.1)": 30.38, "GenAI-Bench (VQA)": 0.7408, "DrawBench (Image Reward)": 0.9657, "PartiPromts (ARNIQA)": 0.6762, "PartiPromts (ClipIQA)": 0.9014, "PartiPromts (ClipScore)": 27.55, "PartiPromts (Sharpness - Laplacian Variance)": 7627, "Median Inference Time": 3.14, "Price per Image": 0.0048}
|
6 |
+
{"Platform": "Replicate", "Owner": "Pruna AI", "Device": "1xH100", "Model": "FLUX.1-dev", "Optimization": "lightly juiced", "URL": "https://replicate.com/prunaai/flux.1-lightly-juiced", "GenEval": 69.12, "HPS (v2.1)": 30.36, "GenAI-Bench (VQA)": 0.7405, "DrawBench (Image Reward)": 0.9972, "PartiPromts (ARNIQA)": 0.6789, "PartiPromts (ClipIQA)": 0.9031, "PartiPromts (ClipScore)": 27.56, "PartiPromts (Sharpness - Laplacian Variance)": 7849, "Median Inference Time": 3.57, "Price per Image": 0.0054}
|
7 |
+
{"Platform": "Replicate", "Owner": "Black Forest Labs", "Device": "1xH100", "Model": "FLUX.1-dev", "Optimization": "go_fast", "URL": "https://replicate.com/black-forest-labs/flux-dev", "GenEval": 67.41, "HPS (v2.1)": 29.25, "GenAI-Bench (VQA)": 0.7547, "DrawBench (Image Reward)": 0.9282, "PartiPromts (ARNIQA)": 0.6356, "PartiPromts (ClipIQA)": 0.8609, "PartiPromts (ClipScore)": 27.56, "PartiPromts (Sharpness - Laplacian Variance)": 4872, "Median Inference Time": 3.38, "Price per Image": 0.025}
|
8 |
+
{"Platform": "Together AI", "Owner": "Together AI", "Device": "Undisclosed", "Model": "FLUX.1-dev", "Optimization": "Undisclosed", "URL": "https://www.together.ai/models/flux-1-dev", "GenEval": 64.61, "HPS (v2.1)": 30.22, "GenAI-Bench (VQA)": 0.7339, "DrawBench (Image Reward)": 0.9463, "PartiPromts (ARNIQA)": 0.5752, "PartiPromts (ClipIQA)": 0.8709, "PartiPromts (ClipScore)": 27.31, "PartiPromts (Sharpness - Laplacian Variance)": 4501, "Median Inference Time": 3.38, "Price per Image": 0.025}
|
9 |
+
{"Platform": "AWS", "Owner": "AWS", "Device": "Undisclosed", "Model": "AWS Nova Canvas", "Optimization": "Undisclosed", "URL": "https://aws.amazon.com/ai/generative-ai/nova/creative/", "GenEval": null, "HPS (v2.1)": null, "GenAI-Bench (VQA)": null, "DrawBench (Image Reward)": 1.07, "PartiPromts (ARNIQA)": 0.65, "PartiPromts (ClipIQA)": 0.954, "PartiPromts (ClipScore)": 28.1, "PartiPromts (Sharpness - Laplacian Variance)": 10514, "Median Inference Time": 3.65, "Price per Image": null}
|
pyproject.toml
CHANGED
@@ -24,6 +24,8 @@ dependencies = [
|
|
24 |
"diffusers<=0.31",
|
25 |
"piq>=0.8.0",
|
26 |
"boto3>=1.39.4",
|
|
|
|
|
27 |
]
|
28 |
|
29 |
[tool.hatch.build.targets.wheel]
|
|
|
24 |
"diffusers<=0.31",
|
25 |
"piq>=0.8.0",
|
26 |
"boto3>=1.39.4",
|
27 |
+
"gradio>=5.37.0",
|
28 |
+
"gradio-leaderboard>=0.0.14",
|
29 |
]
|
30 |
|
31 |
[tool.hatch.build.targets.wheel]
|