Spaces:
Sleeping
Sleeping
File size: 7,249 Bytes
4e5eb13 452c890 34660db 452c890 34660db 452c890 34660db 948cba3 452c890 4e5eb13 948cba3 a652572 09d7cf1 ff2ee62 a652572 ff2ee62 a652572 f36044d a652572 ff2ee62 a652572 ff2ee62 a652572 ff2ee62 a652572 ff2ee62 09d7cf1 948cba3 0123b5a 948cba3 34660db 948cba3 34660db 948cba3 34660db 948cba3 34660db a652572 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import gradio as gr
import pandas as pd
# Static data - reordered columns: Method, #Param., Input Type, Control Type, Model Type, Mean Traj. β, Acc. β
STATIC_DATA = [
["w/o WM", "72B", "RGB", "β", "VLM", 6.24, 50.27],
["PathDreamer [36]", "0.69B", "RGB-D; Sem; Pano", "Viewpoint", "Image Gen.", 5.28, 56.99],
["SE3DS [11]", "1.1B", "RGB-D; Pano", "Viewpoint", "Image Gen.", 5.29, 57.53],
["NWM [25]", "1B", "RGB", "Trajectory", "Video Gen.", 5.68, 57.35],
["SVD [6]", "1.5B", "RGB", "Image", "Video Gen.", 5.29, 57.71],
["LTX-Video [5]", "2B", "RGB", "Text", "Video Gen.", 5.37, 56.08],
["Hunyuan [4]", "13B", "RGB", "Text", "Video Gen.", 5.21, 57.71],
["Wan2.1 [23]", "14B", "RGB", "Text", "Video Gen.", 5.24, 58.26],
["Cosmos [1]", "2B", "RGB", "Text", "Video Gen.", 5.898, 52.27],
["Runway", "β", "β", "Text", "Video Gen.", "β", "β"],
["SVDβ [6]", "1.5B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.02, 60.98],
["LTXβ [5]", "2B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.49, 57.53],
["WAN2.1β [23]", "14B", "RGB; Pano", "Action", "Video Gen. Post-Train", "XXX", "XXX"],
["Cosmosβ [1]", "2B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.08, 60.25],
]
COLUMNS = ["Method", "#Param.", "Input Type", "Control Type", "Model Type", "Mean Traj. β", "Acc. β"]
def create_leaderboard():
df = pd.DataFrame(STATIC_DATA, columns=COLUMNS)
# Sort by accuracy in descending order (highest first), handling non-numeric values
df_clean = df.copy()
# Replace non-numeric values with -1 for sorting (so they appear at bottom)
df_clean['Acc. β'] = pd.to_numeric(df_clean['Acc. β'], errors='coerce').fillna(-1)
df_sorted = df_clean.sort_values('Acc. β', ascending=False)
# Return original df with the sorted order but original values
return df.iloc[df_sorted.index].reset_index(drop=True)
with gr.Blocks(title="World-in-World: Building a Closed-Loop World Interface to Evaluate World Models", theme=gr.themes.Soft()) as demo:
gr.HTML("<h1 style='text-align: center; margin-bottom: 1rem'>π World-in-World: Building a Closed-Loop World Interface to Evaluate World Models</h1>")
with gr.Tabs():
with gr.TabItem("π§βπ« Interactive Demo"):
with gr.Row():
# Left Zone: Agent's View
with gr.Column(scale=2, min_width=350):
gr.HTML("<h2 style='text-align: center;'>Agent's View</h2>")
# Mimicking the blue instruction box from the image
gr.HTML("""
<div style='background-color: #e6f3ff; border: 1px solid #b3d9ff; border-radius: 8px; padding: 15px; font-family: sans-serif;'>
<div style='display: flex; align-items: center; margin-bottom: 10px;'>
<span style='font-size: 24px; margin-right: 10px;'>π§ </span>
<h3 style='margin: 0; color: #333;'>Instruction:</h3>
</div>
<p style='margin: 0; color: #555;'>Navigate to the Toaster in the room and be as close as possible to it.</p>
</div>
""")
# Mimicking the grey planning box from the image
gr.HTML("""
<div style='background-color: #f5f5f5; border: 1px solid #e0e0e0; border-radius: 8px; padding: 15px; margin-top: 20px; font-family: sans-serif;'>
<div style='display: flex; align-items: center; margin-bottom: 10px;'>
<span style='font-size: 24px; margin-right: 10px;'>π¦Ύ</span>
<h3 style='margin: 0; color: #333;'>Environment Step 4-7:</h3>
</div>
<h4 style='margin-top: 10px; margin-bottom: 5px; color: #444;'>Planning:</h4>
<ol start="4" style='padding-left: 20px; margin: 0; color: #555;'>
<li>Move leftward by 0.25.</li>
<li>Move leftward by 0.25.</li>
<li>Move forward by 0.25.</li>
<li>Move forward by 0.25.</li>
</ol>
</div>
""")
# Middle Zone: Closed-Loop Environmental Feedback
with gr.Column(scale=4, min_width=500):
gr.HTML("<h2 style='text-align: center; color: #db83b5;'>Closed-Loop Environmental Feedback</h2>")
with gr.Row():
gr.Video("/home/user/app/demo_source_data/AR/FTwan21_lora/X7HyMhZNoso/E145/A001/world_model_gen/bbox_gen_video_1.mp4", label="First Person View", interactive=False)
gr.Image("/home/user/app/demo_source_data/AR/FTwan21_lora/5ZKStnWn8Zo/E014/A000/real_obs_bbox.png", label="Bird's Eye View", type="pil", interactive=False)
gr.Model3D("/home/user/app/demo_source_data/scenes_glb/5ZKStnWn8Zo.glb", label="3D Scene", interactive=False)
# Right Zone: World Model's Generation
with gr.Column(scale=3, min_width=400):
gr.HTML("<h2 style='text-align: center;'>World Model's Generation</h2>")
# Using the new video path provided by the user
gr.Video("/home/user/app/demo_source_data/AR/FTwan21_lora/5ZKStnWn8Zo/E014/A005/world_model_gen/obj_centered_gen_video_1.mp4", label="Generated View", interactive=False)
with gr.TabItem("π Leaderboard"):
leaderboard_table = gr.DataFrame(
value=create_leaderboard(),
headers=COLUMNS,
datatype=["str", "str", "str", "str", "str", "number", "number"],
interactive=False,
wrap=True
)
with gr.TabItem("π About"):
gr.Markdown("""
# World-in-World: Building a Closed-Loop World Interface to Evaluate World Models
This leaderboard showcases performance metrics across different types of AI models in world modeling tasks:
## Model Categories
- **VLM**: Vision-Language Models
- **Image Gen.**: Image Generation Models
- **Video Gen.**: Video Generation Models
- **Video Gen. Post-Train**: Post-training specialized Video Generation Models
## Metrics Explained
- **Acc. β**: Accuracy score (higher values indicate better performance)
- **Mean Traj. β**: Mean trajectory error (lower values indicate better performance)
## Notes
- β indicates post-training specialized models
- XXX indicates results pending/unavailable
- β indicates not applicable or not available
*Results represent performance on world modeling evaluation benchmarks and may vary across different evaluation settings.*
""")
if __name__ == "__main__":
demo.launch() |