Spaces:
Sleeping
Sleeping
Update content and formatting
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import gradio as gr
|
2 |
-
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
3 |
import pandas as pd
|
4 |
|
5 |
# Static data
|
@@ -21,56 +20,37 @@ STATIC_DATA = [
|
|
21 |
]
|
22 |
|
23 |
COLUMNS = ["Model Type", "Method", "Control Type", "Input Type", "#Param.", "Acc. β", "Mean Traj. β"]
|
24 |
-
LEADERBOARD_DF = pd.DataFrame(STATIC_DATA, columns=COLUMNS)
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
.gradio-container {
|
30 |
-
max-width: 1200px !important;
|
31 |
-
}
|
32 |
-
"""
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
value=dataframe,
|
39 |
-
datatype=["str", "str", "str", "str", "str", "number", "number"],
|
40 |
-
select_columns=SelectColumns(
|
41 |
-
default_selection=COLUMNS,
|
42 |
-
cant_deselect=["Model Type", "Method", "Acc. β"],
|
43 |
-
label="Select Columns to Display:",
|
44 |
-
),
|
45 |
-
search_columns=["Model Type", "Method"],
|
46 |
-
hide_columns=[],
|
47 |
-
filter_columns=[
|
48 |
-
ColumnFilter("Model Type", type="checkboxgroup", label="Model types"),
|
49 |
-
ColumnFilter("Control Type", type="checkboxgroup", label="Control types"),
|
50 |
-
ColumnFilter("Input Type", type="checkboxgroup", label="Input types"),
|
51 |
-
],
|
52 |
-
bool_checkboxgroup_label="Hide models",
|
53 |
-
interactive=False,
|
54 |
-
)
|
55 |
-
|
56 |
-
demo = gr.Blocks(css=custom_css, title="Model Performance Leaderboard")
|
57 |
-
with demo:
|
58 |
-
gr.HTML("<h1 style='text-align: center'>π Model Performance Leaderboard</h1>")
|
59 |
gr.Markdown("""
|
60 |
**Performance comparison across vision-language models, image generation, and video generation models.**
|
61 |
|
62 |
π **Metrics:** Acc. β (Accuracy - higher is better) | Mean Traj. β (Mean Trajectory error - lower is better)
|
63 |
-
"""
|
64 |
-
|
65 |
-
with gr.Tabs(
|
66 |
-
with gr.TabItem("
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
gr.Markdown("""
|
71 |
-
#
|
72 |
|
73 |
-
This leaderboard showcases performance metrics across different types of AI models:
|
74 |
|
75 |
## Model Categories
|
76 |
- **VLM**: Vision-Language Models
|
@@ -87,8 +67,8 @@ with demo:
|
|
87 |
- XXX indicates results pending/unavailable
|
88 |
- β indicates not applicable or not available
|
89 |
|
90 |
-
*Results may vary across different evaluation settings
|
91 |
-
"""
|
92 |
|
93 |
if __name__ == "__main__":
|
94 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
|
|
2 |
import pandas as pd
|
3 |
|
4 |
# Static data
|
|
|
20 |
]
|
21 |
|
22 |
COLUMNS = ["Model Type", "Method", "Control Type", "Input Type", "#Param.", "Acc. β", "Mean Traj. β"]
|
|
|
23 |
|
24 |
+
def create_leaderboard():
|
25 |
+
df = pd.DataFrame(STATIC_DATA, columns=COLUMNS)
|
26 |
+
return df
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
# Create the Gradio interface
|
29 |
+
with gr.Blocks(title="World-in-World: Building a Closed-Loop World Interface to Evaluate World Models", theme=gr.themes.Soft()) as demo:
|
30 |
+
gr.HTML("<h1 style='text-align: center; margin-bottom: 1rem'>π World-in-World: Building a Closed-Loop World Interface to Evaluate World Models</h1>")
|
31 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
gr.Markdown("""
|
33 |
**Performance comparison across vision-language models, image generation, and video generation models.**
|
34 |
|
35 |
π **Metrics:** Acc. β (Accuracy - higher is better) | Mean Traj. β (Mean Trajectory error - lower is better)
|
36 |
+
""")
|
37 |
+
|
38 |
+
with gr.Tabs():
|
39 |
+
with gr.TabItem("π Leaderboard"):
|
40 |
+
leaderboard_table = gr.DataFrame(
|
41 |
+
value=create_leaderboard(),
|
42 |
+
headers=COLUMNS,
|
43 |
+
datatype=["str", "str", "str", "str", "str", "number", "number"],
|
44 |
+
interactive=False,
|
45 |
+
wrap=True,
|
46 |
+
height=600
|
47 |
+
)
|
48 |
+
|
49 |
+
with gr.TabItem("π About"):
|
50 |
gr.Markdown("""
|
51 |
+
# World-in-World: Building a Closed-Loop World Interface to Evaluate World Models
|
52 |
|
53 |
+
This leaderboard showcases performance metrics across different types of AI models in world modeling tasks:
|
54 |
|
55 |
## Model Categories
|
56 |
- **VLM**: Vision-Language Models
|
|
|
67 |
- XXX indicates results pending/unavailable
|
68 |
- β indicates not applicable or not available
|
69 |
|
70 |
+
*Results represent performance on world modeling evaluation benchmarks and may vary across different evaluation settings.*
|
71 |
+
""")
|
72 |
|
73 |
if __name__ == "__main__":
|
74 |
demo.launch()
|