Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
# Static data | |
STATIC_DATA = [ | |
["VLM", "w/o WM", "β", "RGB", "72B", 50.27, 6.24], | |
["Image Gen.", "PathDreamer [36]", "Viewpoint", "RGB-D; Sem; Pano", "0.69B", 56.99, 5.28], | |
["Image Gen.", "SE3DS [11]", "Viewpoint", "RGB-D; Pano", "1.1B", 57.53, 5.29], | |
["Video Gen.", "NWM [25]", "Trajectory", "RGB", "1B", 57.35, 5.68], | |
["Video Gen.", "SVD [6]", "Image", "RGB", "1.5B", 57.71, 5.29], | |
["Video Gen.", "LTX-Video [5]", "Text", "RGB", "2B", 56.08, 5.37], | |
["Video Gen.", "Hunyuan [4]", "Text", "RGB", "13B", 57.71, 5.21], | |
["Video Gen.", "Wan2.1 [23]", "Text", "RGB", "14B", 58.26, 5.24], | |
["Video Gen.", "Cosmos [1]", "Text", "RGB", "2B", 52.27, 5.898], | |
["Video Gen.", "Runway", "Text", "β", "β", "β", "β"], | |
["Video Gen. Post-Train", "SVDβ [6]", "Action", "RGB; Pano", "1.5B", 60.98, 5.02], | |
["Video Gen. Post-Train", "LTXβ [5]", "Action", "RGB; Pano", "2B", 57.53, 5.49], | |
["Video Gen. Post-Train", "WAN2.1β [23]", "Action", "RGB; Pano", "14B", "XXX", "XXX"], | |
["Video Gen. Post-Train", "Cosmosβ [1]", "Action", "RGB; Pano", "2B", 60.25, 5.08], | |
] | |
COLUMNS = ["Model Type", "Method", "Control Type", "Input Type", "#Param.", "Acc. β", "Mean Traj. β"] | |
def create_leaderboard(): | |
df = pd.DataFrame(STATIC_DATA, columns=COLUMNS) | |
return df | |
# Create the Gradio interface | |
with gr.Blocks(title="World-in-World: Building a Closed-Loop World Interface to Evaluate World Models", theme=gr.themes.Soft()) as demo: | |
gr.HTML("<h1 style='text-align: center; margin-bottom: 1rem'>π World-in-World: Building a Closed-Loop World Interface to Evaluate World Models</h1>") | |
gr.Markdown(""" | |
**Performance comparison across vision-language models, image generation, and video generation models.** | |
π **Metrics:** Acc. β (Accuracy - higher is better) | Mean Traj. β (Mean Trajectory error - lower is better) | |
""") | |
with gr.Tabs(): | |
with gr.TabItem("π Leaderboard"): | |
leaderboard_table = gr.DataFrame( | |
value=create_leaderboard(), | |
headers=COLUMNS, | |
datatype=["str", "str", "str", "str", "str", "number", "number"], | |
interactive=False, | |
wrap=True | |
) | |
with gr.TabItem("π About"): | |
gr.Markdown(""" | |
# World-in-World: Building a Closed-Loop World Interface to Evaluate World Models | |
This leaderboard showcases performance metrics across different types of AI models in world modeling tasks: | |
## Model Categories | |
- **VLM**: Vision-Language Models | |
- **Image Gen.**: Image Generation Models | |
- **Video Gen.**: Video Generation Models | |
- **Video Gen. Post-Train**: Post-training specialized Video Generation Models | |
## Metrics Explained | |
- **Acc. β**: Accuracy score (higher values indicate better performance) | |
- **Mean Traj. β**: Mean trajectory error (lower values indicate better performance) | |
## Notes | |
- β indicates post-training specialized models | |
- XXX indicates results pending/unavailable | |
- β indicates not applicable or not available | |
*Results represent performance on world modeling evaluation benchmarks and may vary across different evaluation settings.* | |
""") | |
if __name__ == "__main__": | |
demo.launch() |