Spaces:

TaiMingLu
/

wiw-prototype

Sleeping

App Files Files Community

wiw-prototype / app.py

TaiMingLu

Update and fix

ff2ee62 about 1 month ago

raw

history blame contribute delete

7.25 kB

	import gradio as gr
	import pandas as pd

	# Static data - reordered columns: Method, #Param., Input Type, Control Type, Model Type, Mean Traj. ↓, Acc. ↑
	STATIC_DATA = [
	["w/o WM", "72B", "RGB", "–", "VLM", 6.24, 50.27],
	["PathDreamer [36]", "0.69B", "RGB-D; Sem; Pano", "Viewpoint", "Image Gen.", 5.28, 56.99],
	["SE3DS [11]", "1.1B", "RGB-D; Pano", "Viewpoint", "Image Gen.", 5.29, 57.53],
	["NWM [25]", "1B", "RGB", "Trajectory", "Video Gen.", 5.68, 57.35],
	["SVD [6]", "1.5B", "RGB", "Image", "Video Gen.", 5.29, 57.71],
	["LTX-Video [5]", "2B", "RGB", "Text", "Video Gen.", 5.37, 56.08],
	["Hunyuan [4]", "13B", "RGB", "Text", "Video Gen.", 5.21, 57.71],
	["Wan2.1 [23]", "14B", "RGB", "Text", "Video Gen.", 5.24, 58.26],
	["Cosmos [1]", "2B", "RGB", "Text", "Video Gen.", 5.898, 52.27],
	["Runway", "–", "–", "Text", "Video Gen.", "–", "–"],
	["SVD† [6]", "1.5B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.02, 60.98],
	["LTX† [5]", "2B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.49, 57.53],
	["WAN2.1† [23]", "14B", "RGB; Pano", "Action", "Video Gen. Post-Train", "XXX", "XXX"],
	["Cosmos† [1]", "2B", "RGB; Pano", "Action", "Video Gen. Post-Train", 5.08, 60.25],
	]

	COLUMNS = ["Method", "#Param.", "Input Type", "Control Type", "Model Type", "Mean Traj. ↓", "Acc. ↑"]

	def create_leaderboard():
	df = pd.DataFrame(STATIC_DATA, columns=COLUMNS)
	# Sort by accuracy in descending order (highest first), handling non-numeric values
	df_clean = df.copy()
	# Replace non-numeric values with -1 for sorting (so they appear at bottom)
	df_clean['Acc. ↑'] = pd.to_numeric(df_clean['Acc. ↑'], errors='coerce').fillna(-1)
	df_sorted = df_clean.sort_values('Acc. ↑', ascending=False)
	# Return original df with the sorted order but original values
	return df.iloc[df_sorted.index].reset_index(drop=True)

	with gr.Blocks(title="World-in-World: Building a Closed-Loop World Interface to Evaluate World Models", theme=gr.themes.Soft()) as demo:
	gr.HTML("<h1 style='text-align: center; margin-bottom: 1rem'>🏆 World-in-World: Building a Closed-Loop World Interface to Evaluate World Models</h1>")

	with gr.Tabs():
	with gr.TabItem("🧑‍🏫 Interactive Demo"):
	with gr.Row():
	# Left Zone: Agent's View
	with gr.Column(scale=2, min_width=350):
	gr.HTML("<h2 style='text-align: center;'>Agent's View</h2>")
	# Mimicking the blue instruction box from the image
	gr.HTML("""
	<div style='background-color: #e6f3ff; border: 1px solid #b3d9ff; border-radius: 8px; padding: 15px; font-family: sans-serif;'>
	<div style='display: flex; align-items: center; margin-bottom: 10px;'>
	<span style='font-size: 24px; margin-right: 10px;'>🧠</span>
	<h3 style='margin: 0; color: #333;'>Instruction:</h3>
	</div>
	<p style='margin: 0; color: #555;'>Navigate to the Toaster in the room and be as close as possible to it.</p>
	</div>
	""")
	# Mimicking the grey planning box from the image
	gr.HTML("""
	<div style='background-color: #f5f5f5; border: 1px solid #e0e0e0; border-radius: 8px; padding: 15px; margin-top: 20px; font-family: sans-serif;'>
	<div style='display: flex; align-items: center; margin-bottom: 10px;'>
	<span style='font-size: 24px; margin-right: 10px;'>🦾</span>
	<h3 style='margin: 0; color: #333;'>Environment Step 4-7:</h3>
	</div>
	<h4 style='margin-top: 10px; margin-bottom: 5px; color: #444;'>Planning:</h4>
	<ol start="4" style='padding-left: 20px; margin: 0; color: #555;'>
	<li>Move leftward by 0.25.</li>
	<li>Move leftward by 0.25.</li>
	<li>Move forward by 0.25.</li>
	<li>Move forward by 0.25.</li>
	</ol>
	</div>
	""")

	# Middle Zone: Closed-Loop Environmental Feedback
	with gr.Column(scale=4, min_width=500):
	gr.HTML("<h2 style='text-align: center; color: #db83b5;'>Closed-Loop Environmental Feedback</h2>")
	with gr.Row():
	gr.Video("/home/user/app/demo_source_data/AR/FTwan21_lora/X7HyMhZNoso/E145/A001/world_model_gen/bbox_gen_video_1.mp4", label="First Person View", interactive=False)
	gr.Image("/home/user/app/demo_source_data/AR/FTwan21_lora/5ZKStnWn8Zo/E014/A000/real_obs_bbox.png", label="Bird's Eye View", type="pil", interactive=False)
	gr.Model3D("/home/user/app/demo_source_data/scenes_glb/5ZKStnWn8Zo.glb", label="3D Scene", interactive=False)

	# Right Zone: World Model's Generation
	with gr.Column(scale=3, min_width=400):
	gr.HTML("<h2 style='text-align: center;'>World Model's Generation</h2>")
	# Using the new video path provided by the user
	gr.Video("/home/user/app/demo_source_data/AR/FTwan21_lora/5ZKStnWn8Zo/E014/A005/world_model_gen/obj_centered_gen_video_1.mp4", label="Generated View", interactive=False)

	with gr.TabItem("📊 Leaderboard"):
	leaderboard_table = gr.DataFrame(
	value=create_leaderboard(),
	headers=COLUMNS,
	datatype=["str", "str", "str", "str", "str", "number", "number"],
	interactive=False,
	wrap=True
	)

	with gr.TabItem("📝 About"):
	gr.Markdown("""
	# World-in-World: Building a Closed-Loop World Interface to Evaluate World Models

	This leaderboard showcases performance metrics across different types of AI models in world modeling tasks:

	## Model Categories
	- VLM: Vision-Language Models
	- Image Gen.: Image Generation Models
	- Video Gen.: Video Generation Models
	- Video Gen. Post-Train: Post-training specialized Video Generation Models

	## Metrics Explained
	- Acc. ↑: Accuracy score (higher values indicate better performance)
	- Mean Traj. ↓: Mean trajectory error (lower values indicate better performance)

	## Notes
	- † indicates post-training specialized models
	- XXX indicates results pending/unavailable
	- – indicates not applicable or not available

	Results represent performance on world modeling evaluation benchmarks and may vary across different evaluation settings.
	""")

	if __name__ == "__main__":
	demo.launch()