import gradio as gr
import pandas as pd
# Static data - reordered columns: Method, #Param., Input Type, Control Type, Model Type, Mean Traj. ↓, Acc. ↑
STATIC_DATA = [
["w/o WM", "72B", "RGB", "–", "VLM", 6.24, 50.27],
["PathDreamer [36]", "0.69B", "RGB-D; Sem; Pano", "Viewpoint", "Image Gen.", 5.28, 56.99],
["SE3DS [11]", "1.1B", "RGB-D; Pano", "Viewpoint", "Image Gen.", 5.29, 57.53],
["NWM [25]", "1B", "RGB", "Trajectory", "Video Gen.", 5.68, 57.35],
["SVD [6]", "1.5B", "RGB", "Image", "Video Gen.", 5.29, 57.71],
["LTX-Video [5]", "2B", "RGB", "Text", "Video Gen.", 5.37, 56.08],
["Hunyuan [4]", "13B", "RGB", "Text", "Video Gen.", 5.21, 57.71],
["Wan2.1 [23]", "14B", "RGB", "Text", "Video Gen.", 5.24, 58.26],
["Cosmos [1]", "2B", "RGB", "Text", "Video Gen.", 5.898, 52.27],
["Runway", "–", "–", "Text", "Video Gen.", "–", "–"],
["SVD† [6]", "1.5B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.02, 60.98],
["LTX† [5]", "2B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.49, 57.53],
["WAN2.1† [23]", "14B", "RGB; Pano", "Action", "Video Gen. Post-Train", "XXX", "XXX"],
["Cosmos† [1]", "2B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.08, 60.25],
]
COLUMNS = ["Method", "#Param.", "Input Type", "Control Type", "Model Type", "Mean Traj. ↓", "Acc. ↑"]
def create_leaderboard():
df = pd.DataFrame(STATIC_DATA, columns=COLUMNS)
# Sort by accuracy in descending order (highest first), handling non-numeric values
df_clean = df.copy()
# Replace non-numeric values with -1 for sorting (so they appear at bottom)
df_clean['Acc. ↑'] = pd.to_numeric(df_clean['Acc. ↑'], errors='coerce').fillna(-1)
df_sorted = df_clean.sort_values('Acc. ↑', ascending=False)
# Return original df with the sorted order but original values
return df.iloc[df_sorted.index].reset_index(drop=True)
with gr.Blocks(title="World-in-World: Building a Closed-Loop World Interface to Evaluate World Models", theme=gr.themes.Soft()) as demo:
gr.HTML("
🏆 World-in-World: Building a Closed-Loop World Interface to Evaluate World Models
")
with gr.Tabs():
with gr.TabItem("🧑🏫 Interactive Demo"):
with gr.Row():
# Left Zone: Agent's View
with gr.Column(scale=2, min_width=350):
gr.HTML("Agent's View
")
# Mimicking the blue instruction box from the image
gr.HTML("""
🧠
Instruction:
Navigate to the Toaster in the room and be as close as possible to it.
""")
# Mimicking the grey planning box from the image
gr.HTML("""
🦾
Environment Step 4-7:
Planning:
- Move leftward by 0.25.
- Move leftward by 0.25.
- Move forward by 0.25.
- Move forward by 0.25.
""")
# Middle Zone: Closed-Loop Environmental Feedback
with gr.Column(scale=4, min_width=500):
gr.HTML("Closed-Loop Environmental Feedback
")
with gr.Row():
gr.Video("/home/user/app/demo_source_data/AR/FTwan21_lora/X7HyMhZNoso/E145/A001/world_model_gen/bbox_gen_video_1.mp4", label="First Person View", interactive=False)
gr.Image("/home/user/app/demo_source_data/AR/FTwan21_lora/5ZKStnWn8Zo/E014/A000/real_obs_bbox.png", label="Bird's Eye View", type="pil", interactive=False)
gr.Model3D("/home/user/app/demo_source_data/scenes_glb/5ZKStnWn8Zo.glb", label="3D Scene", interactive=False)
# Right Zone: World Model's Generation
with gr.Column(scale=3, min_width=400):
gr.HTML("World Model's Generation
")
# Using the new video path provided by the user
gr.Video("/home/user/app/demo_source_data/AR/FTwan21_lora/5ZKStnWn8Zo/E014/A005/world_model_gen/obj_centered_gen_video_1.mp4", label="Generated View", interactive=False)
with gr.TabItem("📊 Leaderboard"):
leaderboard_table = gr.DataFrame(
value=create_leaderboard(),
headers=COLUMNS,
datatype=["str", "str", "str", "str", "str", "number", "number"],
interactive=False,
wrap=True
)
with gr.TabItem("📝 About"):
gr.Markdown("""
# World-in-World: Building a Closed-Loop World Interface to Evaluate World Models
This leaderboard showcases performance metrics across different types of AI models in world modeling tasks:
## Model Categories
- **VLM**: Vision-Language Models
- **Image Gen.**: Image Generation Models
- **Video Gen.**: Video Generation Models
- **Video Gen. Post-Train**: Post-training specialized Video Generation Models
## Metrics Explained
- **Acc. ↑**: Accuracy score (higher values indicate better performance)
- **Mean Traj. ↓**: Mean trajectory error (lower values indicate better performance)
## Notes
- † indicates post-training specialized models
- XXX indicates results pending/unavailable
- – indicates not applicable or not available
*Results represent performance on world modeling evaluation benchmarks and may vary across different evaluation settings.*
""")
if __name__ == "__main__":
demo.launch()