Spaces:

Victarry
/

PP-schedule-visualizer

Running

App Files Files Community

Victarry commited on Mar 12

Commit

bb52925

1 Parent(s): 1170f1a

Add 1F1B-overlap implementation.

Browse files

Files changed (7) hide show

.gitignore +1 -0
README.md +7 -0
assets/1f1b.png +2 -2
assets/1f1b_overlap.png +3 -0
main.py +43 -10
src/strategies.py +36 -0
src/visualizer.py +225 -165

.gitignore CHANGED Viewed

@@ -2,6 +2,7 @@
 ./venv
 uv.lock
 outputs/
 # Uncomment below if you want to include these files
 # !assets/*.png

 ./venv
 uv.lock
 outputs/
+.cursor/*
 # Uncomment below if you want to include these files
 # !assets/*.png

README.md CHANGED Viewed

@@ -50,6 +50,13 @@ uv run python main.py strategy=zb1p num_devices=4 num_stages=4 num_batches=8
 ```
 ![zb1p](assets/zb1p.png)
 ## Configuration
 The default configuration is in `conf/config.yaml`. You can override any parameter on the command line or create configuration groups for different scenarios.

 ```
 ![zb1p](assets/zb1p.png)
+Running for 1F1B-batch-overlap strategy:
+```bah
+uv run python main.py strategy=1f1b_overlap num_devices=4 num_stages=4 num_batches=8
+```
+![1f1b_overlap](assets/1f1b_overlap.png)
 ## Configuration
 The default configuration is in `conf/config.yaml`. You can override any parameter on the command line or create configuration groups for different scenarios.

assets/1f1b.png CHANGED Viewed

Git LFS Details

SHA256: 1239653e169e1e43a007259fc8712d4c9391ff474d5b8980d5e1eff02b3d79b1
Pointer size: 130 Bytes
Size of remote file: 64.8 kB

Git LFS Details

SHA256: 693988b237729a174a252e277d65c687cf48330ddba575351b5095647507f078
Pointer size: 130 Bytes
Size of remote file: 67.2 kB

assets/1f1b_overlap.png ADDED Viewed

Git LFS Details

SHA256: d28d6ae5675cc9bfb1d24b3dafef358a70c61c48232c6ef942967bb46b28c587
Pointer size: 130 Bytes
Size of remote file: 67.7 kB

main.py CHANGED Viewed

@@ -1,5 +1,10 @@
 from src.execution_model import ScheduleConfig
-from src.strategies import generate_1f1b_interleave_schedule, generate_1f1b_schedule, generate_zero_bubble_1p_schedule
 from src.visualizer import visualize_pipeline_parallelism_dash
 import hydra
 from omegaconf import DictConfig, OmegaConf
@@ -16,6 +21,8 @@ def main(cfg: DictConfig) -> None:
         run_interleave(cfg)
     elif cfg.strategy == "zb1p":
         run_zero_bubble_1p(cfg)
     else:
         raise ValueError(f"Unknown strategy: {cfg.strategy}")
@@ -23,7 +30,9 @@ def main(cfg: DictConfig) -> None:
 def run_1f1b(cfg: DictConfig) -> None:
     """Run 1F1B pipeline parallelism simulation."""
     # Convert OmegaConf to dict for op_times if it exists
-    op_times = OmegaConf.to_container(cfg.op_times) if hasattr(cfg, 'op_times') else None
     schedule_config = ScheduleConfig(
         num_devices=cfg.num_devices,
@@ -31,7 +40,7 @@ def run_1f1b(cfg: DictConfig) -> None:
         num_batches=cfg.num_batches,
         p2p_latency=cfg.p2p_latency,
         op_times=op_times,
-        placement_strategy="standard"
     )
     schedule = generate_1f1b_schedule(schedule_config)
     schedule.execute()
@@ -42,15 +51,17 @@ def run_1f1b(cfg: DictConfig) -> None:
 def run_interleave(cfg: DictConfig) -> None:
     """Run interleaved pipeline parallelism simulation."""
     # Convert OmegaConf to dict for op_times if it exists
-    op_times = OmegaConf.to_container(cfg.op_times) if hasattr(cfg, 'op_times') else None
     schedule_config = ScheduleConfig(
         num_devices=cfg.num_devices,
         num_stages=cfg.num_stages,
         num_batches=cfg.num_batches,
         p2p_latency=cfg.p2p_latency,
         placement_strategy="interleave",
-        op_times=op_times
     )
     schedule = generate_1f1b_interleave_schedule(schedule_config)
     schedule.execute()
@@ -60,20 +71,42 @@ def run_interleave(cfg: DictConfig) -> None:
 def run_zero_bubble_1p(cfg: DictConfig) -> None:
     """Run zero bubble 1P pipeline parallelism simulation."""
     # Convert OmegaConf to dict for op_times if it exists
-    op_times = OmegaConf.to_container(cfg.op_times) if hasattr(cfg, 'op_times') else None
     schedule_config = ScheduleConfig(
         num_devices=cfg.num_devices,
         num_stages=cfg.num_stages,
         num_batches=cfg.num_batches,
         p2p_latency=cfg.p2p_latency,
         op_times=op_times,
-        split_backward=True
     )
     schedule = generate_zero_bubble_1p_schedule(schedule_config)
     schedule.execute()
     visualize_pipeline_parallelism_dash(schedule, port=cfg.visualization_port)
 if __name__ == "__main__":
-    main()

 from src.execution_model import ScheduleConfig
+from src.strategies import (
+    generate_1f1b_interleave_schedule,
+    generate_1f1b_overlap_schedule,
+    generate_1f1b_schedule,
+    generate_zero_bubble_1p_schedule,
+)
 from src.visualizer import visualize_pipeline_parallelism_dash
 import hydra
 from omegaconf import DictConfig, OmegaConf
         run_interleave(cfg)
     elif cfg.strategy == "zb1p":
         run_zero_bubble_1p(cfg)
+    elif cfg.strategy == "1f1b_overlap":
+        run_1f1b_overlap(cfg)
     else:
         raise ValueError(f"Unknown strategy: {cfg.strategy}")
 def run_1f1b(cfg: DictConfig) -> None:
     """Run 1F1B pipeline parallelism simulation."""
     # Convert OmegaConf to dict for op_times if it exists
+    op_times = (
+        OmegaConf.to_container(cfg.op_times) if hasattr(cfg, "op_times") else None
+    )
     schedule_config = ScheduleConfig(
         num_devices=cfg.num_devices,
         num_batches=cfg.num_batches,
         p2p_latency=cfg.p2p_latency,
         op_times=op_times,
+        placement_strategy="standard",
     )
     schedule = generate_1f1b_schedule(schedule_config)
     schedule.execute()
 def run_interleave(cfg: DictConfig) -> None:
     """Run interleaved pipeline parallelism simulation."""
     # Convert OmegaConf to dict for op_times if it exists
+    op_times = (
+        OmegaConf.to_container(cfg.op_times) if hasattr(cfg, "op_times") else None
+    )
     schedule_config = ScheduleConfig(
         num_devices=cfg.num_devices,
         num_stages=cfg.num_stages,
         num_batches=cfg.num_batches,
         p2p_latency=cfg.p2p_latency,
         placement_strategy="interleave",
+        op_times=op_times,
     )
     schedule = generate_1f1b_interleave_schedule(schedule_config)
     schedule.execute()
 def run_zero_bubble_1p(cfg: DictConfig) -> None:
     """Run zero bubble 1P pipeline parallelism simulation."""
     # Convert OmegaConf to dict for op_times if it exists
+    op_times = (
+        OmegaConf.to_container(cfg.op_times) if hasattr(cfg, "op_times") else None
+    )
     schedule_config = ScheduleConfig(
         num_devices=cfg.num_devices,
         num_stages=cfg.num_stages,
         num_batches=cfg.num_batches,
         p2p_latency=cfg.p2p_latency,
         op_times=op_times,
+        split_backward=True,
     )
     schedule = generate_zero_bubble_1p_schedule(schedule_config)
     schedule.execute()
     visualize_pipeline_parallelism_dash(schedule, port=cfg.visualization_port)
+def run_1f1b_overlap(cfg: DictConfig) -> None:
+    """Run 1F1B overlap pipeline parallelism simulation."""
+    # Convert OmegaConf to dict for op_times if it exists
+    op_times = (
+        OmegaConf.to_container(cfg.op_times) if hasattr(cfg, "op_times") else None
+    )
+    schedule_config = ScheduleConfig(
+        num_devices=cfg.num_devices,
+        num_stages=cfg.num_stages,
+        num_batches=cfg.num_batches,
+        p2p_latency=cfg.p2p_latency,
+        op_times=op_times,
+        split_backward=False,
+    )
+    schedule = generate_1f1b_overlap_schedule(schedule_config)
+    schedule.execute()
+    visualize_pipeline_parallelism_dash(schedule, port=cfg.visualization_port)
 if __name__ == "__main__":
+    main()

src/strategies.py CHANGED Viewed

@@ -94,6 +94,42 @@ def generate_zero_bubble_1p_schedule(config: ScheduleConfig):
     return schedule
 # Some codes are copied from Megatron-LM
 def generate_1f1b_interleave_schedule(config: ScheduleConfig):
     schedule = Schedule(config)

     return schedule
+def generate_1f1b_overlap_schedule(config: ScheduleConfig):
+    schedule = Schedule(config)
+    assert config.num_devices == config.num_stages, "num_devices must be equal to num_stages for 1F1B"
+    for i in range(config.num_devices):
+        fwd_batch_id = 0
+        bwd_batch_id = 0
+        cooldown_batches = warmup_batches = 2 * (config.num_devices - i - 1) + 1
+        steady_batches = config.num_batches - warmup_batches
+        for _ in range(warmup_batches):
+            schedule.dev_queues[i].add_operation(
+                schedule.get_op(fwd_batch_id, i, "forward")
+            )
+            fwd_batch_id += 1
+        for _ in range(steady_batches):
+            schedule.dev_queues[i].add_operation(
+                schedule.get_op(fwd_batch_id, i, "forward")
+            )
+            fwd_batch_id += 1
+            schedule.dev_queues[i].add_operation(
+                schedule.get_op(bwd_batch_id, i, "backward")
+            )
+            bwd_batch_id += 1
+        for _ in range(cooldown_batches):
+            schedule.dev_queues[i].add_operation(
+                schedule.get_op(bwd_batch_id, i, "backward")
+            )
+            bwd_batch_id += 1
+    return schedule
 # Some codes are copied from Megatron-LM
 def generate_1f1b_interleave_schedule(config: ScheduleConfig):
     schedule = Schedule(config)

src/visualizer.py CHANGED Viewed

@@ -12,30 +12,34 @@ from src.execution_model import Schedule
 def convert_schedule_to_visualization_format(schedule: Schedule):
     """
     Converts a Schedule object to the format needed for visualization.
     Returns:
         Dict[int, List[Dict]]: Dictionary mapping device_id to a list of operation dictionaries
     """
     # Make sure all operations have start and end times
     for op in schedule.ops.values():
         if op.start_time is None or op.end_time is None:
-            raise ValueError("Operations must have start and end times. Run ScheduleExecutor.execute() first.")
     visualization_data = {}
     # Organize operations by device
     for device_id, device_queue in enumerate(schedule.dev_queues):
         visualization_data[device_id] = []
         for op in device_queue.ops:
-            visualization_data[device_id].append({
-                "type": op.op_type,
-                "batch": op.batch_id + 1, # +1 because batch_id is 0-indexed
-                "stage": op.stage_id,
-                "start_time": op.start_time,
-                "duration": op.end_time - op.start_time
-            })
     return visualization_data
@@ -44,58 +48,58 @@ def convert_schedule_to_visualization_format(schedule: Schedule):
 def get_color(op_type: str, stage_id: int, num_devices: int):
     # A more harmonious blue palette with better progression for forward operations
     forward_colors = [
-        "#5c88f2",      # Periwinkle blue
-        "#1a53ff",      # Deep blue
-        "#b3c6ff",      # Light blue
-        "#4d79ff",      # Strong blue
-        "#809fff",      # Medium blue
-        "#0039e6",      # Rich navy
-        "#002db3",      # Dark navy
-        "#264db3",      # Royal blue
-        "#7094db",      # Steel blue
-        "#99b3e6"       # Pale blue
     ]
     # Orange palette for backward operations
     backward_colors = [
-        "#ff9933",      # Bright orange
-        "#ffad5c",      # Medium orange
-        "#ffc285",      # Light orange
-        "#ffd6ad",      # Pale orange
-        "#ff8000",      # Deep orange
-        "#cc6600",      # Dark orange
-        "#ff9933",      # Vivid orange
-        "#ffb366",      # Soft orange
-        "#cc9966",      # Muted orange
-        "#ffd699"       # Light amber
     ]
     # Improved teal/turquoise palette with better progression for backward_D operations
     backward_d_colors = [
-        "#80ffff",      # Light cyan
-        "#00cccc",      # Teal
-        "#00e6e6",      # Bright teal
-        "#33ffff",      # Cyan
-        "#00b3b3",      # Medium teal
-        "#008080",      # Dark teal
-        "#00e6cc",      # Turquoise
-        "#4ddbbd",      # Aqua
-        "#80d4c8",      # Pale teal
-        "#b3e6e0"       # Ice
     ]
     # Improved green palette with better progression for backward_W operations
     backward_w_colors = [
-        "#00cc66",      # Medium green
-        "#00e673",      # Bright green
-        "#33ff99",      # Mint green
-        "#80ffbf",      # Light green
-        "#009933",      # Forest green
-        "#006622",      # Dark green
-        "#33cc33",      # True green
-        "#66cc66",      # Sage green
-        "#99cc99",      # Pale green
-        "#c6e6c6"       # Pastel green
     ]
     virtual_stage = stage_id // num_devices
@@ -115,7 +119,9 @@ def get_color(op_type: str, stage_id: int, num_devices: int):
         raise ValueError(f"Invalid operation type: {op_type}")
-def create_pipeline_figure(schedule_data: Dict[int, List[Dict]], max_time=None, show_progress=True):
     """
     Create a Plotly figure for pipeline parallelism scheduling.
@@ -126,9 +132,9 @@ def create_pipeline_figure(schedule_data: Dict[int, List[Dict]], max_time=None,
     """
     # Find the number of devices
     num_devices = len(schedule_data)
     empty_color = "whitesmoke"
     # Find the maximum time in the schedule if not provided
     if max_time is None:
         max_time = 0
@@ -146,7 +152,9 @@ def create_pipeline_figure(schedule_data: Dict[int, List[Dict]], max_time=None,
     tasks_processed = 0
     if show_progress:
-        progress_bar = tqdm(total=total_tasks + num_devices + 3, desc="Creating visualization")
     # Create a custom y-axis with no gaps between devices
     y_spacing = 1.0  # Use 1.0 for no gaps
@@ -159,7 +167,7 @@ def create_pipeline_figure(schedule_data: Dict[int, List[Dict]], max_time=None,
     # Add rectangles for each task
     for device_idx, device in enumerate(schedule_data):
         device_idx_reversed = num_devices - device_idx - 1
         # Sort tasks by start time to ensure correct rendering
         sorted_tasks = sorted(schedule_data[device], key=lambda t: t["start_time"])
@@ -189,44 +197,50 @@ def create_pipeline_figure(schedule_data: Dict[int, List[Dict]], max_time=None,
             # Add rectangle for the task
             start_time = task["start_time"]
             duration = task["duration"]
             # Calculate y positions with no gaps
             y_pos = device_idx_reversed * y_spacing
             # Create rectangle using shape (batch-add later)
-            shapes.append(dict(
-                type="rect",
-                x0=start_time,
-                y0=y_pos - 0.5,
-                x1=start_time + duration,
-                y1=y_pos + 0.5,
-                line=dict(color="black", width=0.5),
-                fillcolor=color,
-                layer="above",
-            ))
             # Add batch number text (batch-add later)
-            annotations.append(dict(
-                x=start_time + duration / 2,
-                y=y_pos,
-                text=f"{task['batch']}",
-                showarrow=False,
-                font=dict(color=text_color, size=12, family="Arial, bold"),
-            ))
             # Prepare hover data (add traces in batches later)
             hover_text = f"Batch: {task['batch']}<br>Stage: {task['stage']}<br>Type: {name}<br>Start: {task['start_time']:.2f}<br>End: {task['start_time'] + task['duration']:.2f}<br>Duration: {task['duration']:.2f}"
-            hover_traces.append(dict(
-                x=[start_time + duration / 2],
-                y=[y_pos],
-                mode='markers',
-                marker=dict(opacity=0),  # Invisible marker
-                hoverinfo='text',
-                text=hover_text,
-                showlegend=False
-            ))
             # Update progress
             if show_progress:
                 tasks_processed += 1
@@ -234,63 +248,83 @@ def create_pipeline_figure(schedule_data: Dict[int, List[Dict]], max_time=None,
     # Add all shapes at once for better performance
     fig.update_layout(shapes=shapes)
     # Add all annotations at once
     fig.update_layout(annotations=annotations)
     # Add all hover traces at once
     for trace in hover_traces:
         fig.add_trace(go.Scatter(**trace))
     # Add custom legend
     legend_items = []
     # Find the maximum virtual stage in the data
     max_virtual_stage = 0
     for device in schedule_data:
         for task in schedule_data[device]:
             virtual_stage = task["stage"] // num_devices
             max_virtual_stage = max(max_virtual_stage, virtual_stage)
     # Add forward and backward items for each virtual stage
     for vs in range(max_virtual_stage + 1):
-        legend_items.append(dict(
-            name=f"Forward (VS {vs})",
-            color=get_color("forward", vs * num_devices, num_devices)
-        ))
-        legend_items.append(dict(
-            name=f"Backward (VS {vs})",
-            color=get_color("backward", vs * num_devices, num_devices)
-        ))
         # Add entries for split backward operations if this is a zb1p schedule
-        if any(task["type"] in ["backward_D", "backward_W"] for device in schedule_data for task in schedule_data[device]):
-            legend_items.append(dict(
-                name=f"Backward Grad (VS {vs})",
-                color=get_color("backward_D", vs * num_devices, num_devices)
-            ))
-            legend_items.append(dict(
-                name=f"Backward Weight (VS {vs})",
-                color=get_color("backward_W", vs * num_devices, num_devices)
-            ))
     # If no tasks found, add default legend items
     if not legend_items:
         legend_items = [
             dict(name="Forward (VS 0)", color=get_color("forward", 0, num_devices)),
             dict(name="Backward (VS 0)", color=get_color("backward", 0, num_devices)),
-            dict(name="Backward Grad (VS 0)", color=get_color("backward_D", 0, num_devices)),
-            dict(name="Backward Weight (VS 0)", color=get_color("backward_W", 0, num_devices)),
         ]
     for i, item in enumerate(legend_items):
-        fig.add_trace(go.Scatter(
-            x=[None],
-            y=[None],
-            mode='markers',
-            marker=dict(size=10, color=item['color']),
-            name=item['name'],
-            showlegend=True
-        ))
         if show_progress and i < len(legend_items) - 1:
             progress_bar.update(1)
@@ -299,11 +333,15 @@ def create_pipeline_figure(schedule_data: Dict[int, List[Dict]], max_time=None,
     # Modify the ordering to put Device 1 at the top, then Device 0, then the rest
     if num_devices >= 2:
         # Move Device 1 to the top, followed by Device 0
-        device_labels = [device_labels[1], device_labels[0]] + device_labels[2:] if num_devices > 1 else device_labels
     # Calculate tick positions with no gaps
     tick_positions = [(num_devices - i - 1) * y_spacing for i in range(num_devices)]
     # Adjust the range to ensure there are no empty spaces at the end
     x_end = max_time * 1.05  # Add a small margin
@@ -323,17 +361,17 @@ def create_pipeline_figure(schedule_data: Dict[int, List[Dict]], max_time=None,
             text=title_text,
             x=0.5,
             y=0.98,  # Move title position closer to the top
-            font=dict(size=20)
         ),
         legend=dict(
             orientation="v",  # Changed from horizontal to vertical
             yanchor="top",
             y=1.02,  # Position at the top
             xanchor="right",
-            x=1.20,   # Position further to the right to accommodate more items
             title=dict(text="<b>Operation Types:</b>"),
             itemsizing="constant",
-            tracegroupgap=0
         ),
         width=2000,  # Increase width to accommodate the expanded legend
         height=400,  # Maintain current height
@@ -351,10 +389,13 @@ def create_pipeline_figure(schedule_data: Dict[int, List[Dict]], max_time=None,
 # Cache for storing processed schedule data
 _schedule_data_cache = {}
-def create_dash_app(schedule: Schedule, schedule_type="1f1b", enable_caching: bool = True):
     """
     Create a Dash app to visualize the pipeline schedule.
     Args:
         schedule: Schedule object to visualize
         schedule_type: Type of schedule ("1f1b", "zb1p", or custom description)
@@ -363,7 +404,7 @@ def create_dash_app(schedule: Schedule, schedule_type="1f1b", enable_caching: bo
     # Process schedule data only once and cache it
     global _schedule_data_cache
     cache_key = id(schedule)
     if enable_caching and cache_key in _schedule_data_cache:
         schedule_data = _schedule_data_cache[cache_key]
         print("Using cached schedule data")
@@ -372,7 +413,7 @@ def create_dash_app(schedule: Schedule, schedule_type="1f1b", enable_caching: bo
         if enable_caching:
             _schedule_data_cache[cache_key] = schedule_data
             print("Cached schedule data")
     total_tasks = sum(len(tasks) for tasks in schedule_data.values())
     print(f"Total tasks in schedule: {total_tasks}")
@@ -380,31 +421,48 @@ def create_dash_app(schedule: Schedule, schedule_type="1f1b", enable_caching: bo
     app.title = f"Pipeline Parallelism Visualization - {schedule_type}"
     # Create a more informative layout with data size information
-    app.layout = html.Div([
-        html.H1(f"Pipeline Parallelism Visualization - {schedule_type}", style={"textAlign": "center"}),
-        html.Div([
-            html.P(f"Number of devices: {len(schedule_data)}", style={"display": "inline-block", "marginRight": "20px"}),
-            html.P(f"Total tasks: {total_tasks}", style={"display": "inline-block", "marginRight": "20px"}),
-        ], style={"marginBottom": "20px"}),
-        html.Div(id="graph-container", children=[]),
-        dcc.Loading(
-            id="loading-graph",
-            type="circle",
-            children=[
-                dcc.Graph(
-                    id="pipeline-graph",
-                    config={'displayModeBar': True, 'toImageButtonOptions': {'format': 'png', 'filename': 'pipeline_visualization'}}
-                ),
-            ]
-        ),
-    ])
     # Cache for storing figure to avoid regenerating it
     figure_cache = {}
     @app.callback(
         Output("pipeline-graph", "figure"),
         Input("graph-container", "children"),
@@ -416,15 +474,15 @@ def create_dash_app(schedule: Schedule, schedule_type="1f1b", enable_caching: bo
         if enable_caching and cache_key in figure_cache:
             print("Using cached figure")
             return figure_cache[cache_key]
         # Create the figure
         figure = create_pipeline_figure(schedule_data, show_progress=True)
         # Cache the figure
         if enable_caching:
             figure_cache[cache_key] = figure
             print("Cached figure")
         return figure
     return app
@@ -435,11 +493,11 @@ def visualize_pipeline_parallelism_dash(
     port: int = 8050,
     debug: bool = False,
     enable_caching: bool = True,
-    schedule_type="1f1b"
 ):
     """
     Launch a Dash app to visualize the pipeline schedule interactively.
     Args:
         schedule: Schedule object to visualize
         port: Port to run the Dash app on
@@ -447,6 +505,8 @@ def visualize_pipeline_parallelism_dash(
         enable_caching: Whether to cache schedule data and figures
         schedule_type: Type of schedule ("1f1b", "zb1p", or custom description)
     """
-    app = create_dash_app(schedule, schedule_type=schedule_type, enable_caching=enable_caching)
     print(f"Starting Dash app on http://localhost:{port}/")
     app.run_server(debug=debug, port=port)

 def convert_schedule_to_visualization_format(schedule: Schedule):
     """
     Converts a Schedule object to the format needed for visualization.
     Returns:
         Dict[int, List[Dict]]: Dictionary mapping device_id to a list of operation dictionaries
     """
     # Make sure all operations have start and end times
     for op in schedule.ops.values():
         if op.start_time is None or op.end_time is None:
+            raise ValueError(
+                "Operations must have start and end times. Run ScheduleExecutor.execute() first."
+            )
     visualization_data = {}
     # Organize operations by device
     for device_id, device_queue in enumerate(schedule.dev_queues):
         visualization_data[device_id] = []
         for op in device_queue.ops:
+            visualization_data[device_id].append(
+                {
+                    "type": op.op_type,
+                    "batch": op.batch_id + 1,  # +1 because batch_id is 0-indexed
+                    "stage": op.stage_id,
+                    "start_time": op.start_time,
+                    "duration": op.end_time - op.start_time,
+                }
+            )
     return visualization_data
 def get_color(op_type: str, stage_id: int, num_devices: int):
     # A more harmonious blue palette with better progression for forward operations
     forward_colors = [
+        "#5c88f2",  # Periwinkle blue
+        "#1a53ff",  # Deep blue
+        "#b3c6ff",  # Light blue
+        "#4d79ff",  # Strong blue
+        "#809fff",  # Medium blue
+        "#0039e6",  # Rich navy
+        "#002db3",  # Dark navy
+        "#264db3",  # Royal blue
+        "#7094db",  # Steel blue
+        "#99b3e6",  # Pale blue
     ]
     # Orange palette for backward operations
     backward_colors = [
+        "#ff9933",  # Bright orange
+        "#ffad5c",  # Medium orange
+        "#ffc285",  # Light orange
+        "#ffd6ad",  # Pale orange
+        "#ff8000",  # Deep orange
+        "#cc6600",  # Dark orange
+        "#ff9933",  # Vivid orange
+        "#ffb366",  # Soft orange
+        "#cc9966",  # Muted orange
+        "#ffd699",  # Light amber
     ]
     # Improved teal/turquoise palette with better progression for backward_D operations
     backward_d_colors = [
+        "#80ffff",  # Light cyan
+        "#00cccc",  # Teal
+        "#00e6e6",  # Bright teal
+        "#33ffff",  # Cyan
+        "#00b3b3",  # Medium teal
+        "#008080",  # Dark teal
+        "#00e6cc",  # Turquoise
+        "#4ddbbd",  # Aqua
+        "#80d4c8",  # Pale teal
+        "#b3e6e0",  # Ice
     ]
     # Improved green palette with better progression for backward_W operations
     backward_w_colors = [
+        "#00cc66",  # Medium green
+        "#00e673",  # Bright green
+        "#33ff99",  # Mint green
+        "#80ffbf",  # Light green
+        "#009933",  # Forest green
+        "#006622",  # Dark green
+        "#33cc33",  # True green
+        "#66cc66",  # Sage green
+        "#99cc99",  # Pale green
+        "#c6e6c6",  # Pastel green
     ]
     virtual_stage = stage_id // num_devices
         raise ValueError(f"Invalid operation type: {op_type}")
+def create_pipeline_figure(
+    schedule_data: Dict[int, List[Dict]], max_time=None, show_progress=True
+):
     """
     Create a Plotly figure for pipeline parallelism scheduling.
     """
     # Find the number of devices
     num_devices = len(schedule_data)
     empty_color = "whitesmoke"
     # Find the maximum time in the schedule if not provided
     if max_time is None:
         max_time = 0
     tasks_processed = 0
     if show_progress:
+        progress_bar = tqdm(
+            total=total_tasks + num_devices + 3, desc="Creating visualization"
+        )
     # Create a custom y-axis with no gaps between devices
     y_spacing = 1.0  # Use 1.0 for no gaps
     # Add rectangles for each task
     for device_idx, device in enumerate(schedule_data):
         device_idx_reversed = num_devices - device_idx - 1
         # Sort tasks by start time to ensure correct rendering
         sorted_tasks = sorted(schedule_data[device], key=lambda t: t["start_time"])
             # Add rectangle for the task
             start_time = task["start_time"]
             duration = task["duration"]
             # Calculate y positions with no gaps
             y_pos = device_idx_reversed * y_spacing
             # Create rectangle using shape (batch-add later)
+            shapes.append(
+                dict(
+                    type="rect",
+                    x0=start_time,
+                    y0=y_pos - 0.5,
+                    x1=start_time + duration,
+                    y1=y_pos + 0.5,
+                    line=dict(color="black", width=0.5),
+                    fillcolor=color,
+                    layer="above",
+                )
+            )
             # Add batch number text (batch-add later)
+            annotations.append(
+                dict(
+                    x=start_time + duration / 2,
+                    y=y_pos,
+                    text=f"{task['batch']}",
+                    showarrow=False,
+                    font=dict(color=text_color, size=12, family="Arial, bold"),
+                )
+            )
             # Prepare hover data (add traces in batches later)
             hover_text = f"Batch: {task['batch']}<br>Stage: {task['stage']}<br>Type: {name}<br>Start: {task['start_time']:.2f}<br>End: {task['start_time'] + task['duration']:.2f}<br>Duration: {task['duration']:.2f}"
+            hover_traces.append(
+                dict(
+                    x=[start_time + duration / 2],
+                    y=[y_pos],
+                    mode="markers",
+                    marker=dict(opacity=0),  # Invisible marker
+                    hoverinfo="text",
+                    text=hover_text,
+                    showlegend=False,
+                )
+            )
             # Update progress
             if show_progress:
                 tasks_processed += 1
     # Add all shapes at once for better performance
     fig.update_layout(shapes=shapes)
     # Add all annotations at once
     fig.update_layout(annotations=annotations)
     # Add all hover traces at once
     for trace in hover_traces:
         fig.add_trace(go.Scatter(**trace))
     # Add custom legend
     legend_items = []
     # Find the maximum virtual stage in the data
     max_virtual_stage = 0
     for device in schedule_data:
         for task in schedule_data[device]:
             virtual_stage = task["stage"] // num_devices
             max_virtual_stage = max(max_virtual_stage, virtual_stage)
     # Add forward and backward items for each virtual stage
     for vs in range(max_virtual_stage + 1):
+        legend_items.append(
+            dict(
+                name=f"Forward (VS {vs})",
+                color=get_color("forward", vs * num_devices, num_devices),
+            )
+        )
+        legend_items.append(
+            dict(
+                name=f"Backward (VS {vs})",
+                color=get_color("backward", vs * num_devices, num_devices),
+            )
+        )
         # Add entries for split backward operations if this is a zb1p schedule
+        if any(
+            task["type"] in ["backward_D", "backward_W"]
+            for device in schedule_data
+            for task in schedule_data[device]
+        ):
+            legend_items.append(
+                dict(
+                    name=f"Backward Grad (VS {vs})",
+                    color=get_color("backward_D", vs * num_devices, num_devices),
+                )
+            )
+            legend_items.append(
+                dict(
+                    name=f"Backward Weight (VS {vs})",
+                    color=get_color("backward_W", vs * num_devices, num_devices),
+                )
+            )
     # If no tasks found, add default legend items
     if not legend_items:
         legend_items = [
             dict(name="Forward (VS 0)", color=get_color("forward", 0, num_devices)),
             dict(name="Backward (VS 0)", color=get_color("backward", 0, num_devices)),
+            dict(
+                name="Backward Grad (VS 0)",
+                color=get_color("backward_D", 0, num_devices),
+            ),
+            dict(
+                name="Backward Weight (VS 0)",
+                color=get_color("backward_W", 0, num_devices),
+            ),
         ]
     for i, item in enumerate(legend_items):
+        fig.add_trace(
+            go.Scatter(
+                x=[None],
+                y=[None],
+                mode="markers",
+                marker=dict(size=10, color=item["color"]),
+                name=item["name"],
+                showlegend=True,
+            )
+        )
         if show_progress and i < len(legend_items) - 1:
             progress_bar.update(1)
     # Modify the ordering to put Device 1 at the top, then Device 0, then the rest
     if num_devices >= 2:
         # Move Device 1 to the top, followed by Device 0
+        device_labels = (
+            [device_labels[1], device_labels[0]] + device_labels[2:]
+            if num_devices > 1
+            else device_labels
+        )
     # Calculate tick positions with no gaps
     tick_positions = [(num_devices - i - 1) * y_spacing for i in range(num_devices)]
     # Adjust the range to ensure there are no empty spaces at the end
     x_end = max_time * 1.05  # Add a small margin
             text=title_text,
             x=0.5,
             y=0.98,  # Move title position closer to the top
+            font=dict(size=20),
         ),
         legend=dict(
             orientation="v",  # Changed from horizontal to vertical
             yanchor="top",
             y=1.02,  # Position at the top
             xanchor="right",
+            x=1.20,  # Position further to the right to accommodate more items
             title=dict(text="<b>Operation Types:</b>"),
             itemsizing="constant",
+            tracegroupgap=0,
         ),
         width=2000,  # Increase width to accommodate the expanded legend
         height=400,  # Maintain current height
 # Cache for storing processed schedule data
 _schedule_data_cache = {}
+def create_dash_app(
+    schedule: Schedule, schedule_type="1f1b", enable_caching: bool = True
+):
     """
     Create a Dash app to visualize the pipeline schedule.
     Args:
         schedule: Schedule object to visualize
         schedule_type: Type of schedule ("1f1b", "zb1p", or custom description)
     # Process schedule data only once and cache it
     global _schedule_data_cache
     cache_key = id(schedule)
     if enable_caching and cache_key in _schedule_data_cache:
         schedule_data = _schedule_data_cache[cache_key]
         print("Using cached schedule data")
         if enable_caching:
             _schedule_data_cache[cache_key] = schedule_data
             print("Cached schedule data")
     total_tasks = sum(len(tasks) for tasks in schedule_data.values())
     print(f"Total tasks in schedule: {total_tasks}")
     app.title = f"Pipeline Parallelism Visualization - {schedule_type}"
     # Create a more informative layout with data size information
+    app.layout = html.Div(
+        [
+            html.H1(
+                f"Pipeline Parallelism Visualization - {schedule_type}",
+                style={"textAlign": "center"},
+            ),
+            html.Div(
+                [
+                    html.P(
+                        f"Number of devices: {len(schedule_data)}",
+                        style={"display": "inline-block", "marginRight": "20px"},
+                    ),
+                    html.P(
+                        f"Total tasks: {total_tasks}",
+                        style={"display": "inline-block", "marginRight": "20px"},
+                    ),
+                ],
+                style={"marginBottom": "20px"},
+            ),
+            html.Div(id="graph-container", children=[]),
+            dcc.Loading(
+                id="loading-graph",
+                type="circle",
+                children=[
+                    dcc.Graph(
+                        id="pipeline-graph",
+                        config={
+                            "displayModeBar": True,
+                            "toImageButtonOptions": {
+                                "format": "png",
+                                "filename": "pipeline_visualization",
+                            },
+                        },
+                    ),
+                ],
+            ),
+        ]
+    )
     # Cache for storing figure to avoid regenerating it
     figure_cache = {}
     @app.callback(
         Output("pipeline-graph", "figure"),
         Input("graph-container", "children"),
         if enable_caching and cache_key in figure_cache:
             print("Using cached figure")
             return figure_cache[cache_key]
         # Create the figure
         figure = create_pipeline_figure(schedule_data, show_progress=True)
         # Cache the figure
         if enable_caching:
             figure_cache[cache_key] = figure
             print("Cached figure")
         return figure
     return app
     port: int = 8050,
     debug: bool = False,
     enable_caching: bool = True,
+    schedule_type="1f1b",
 ):
     """
     Launch a Dash app to visualize the pipeline schedule interactively.
     Args:
         schedule: Schedule object to visualize
         port: Port to run the Dash app on
         enable_caching: Whether to cache schedule data and figures
         schedule_type: Type of schedule ("1f1b", "zb1p", or custom description)
     """
+    app = create_dash_app(
+        schedule, schedule_type=schedule_type, enable_caching=enable_caching
+    )
     print(f"Starting Dash app on http://localhost:{port}/")
     app.run_server(debug=debug, port=port)