Victarry commited on
Commit
6688c1a
·
1 Parent(s): 2623d17

Update formula for 1F1B-interleave-overlap.

Browse files
Files changed (1) hide show
  1. formula.py +37 -3
formula.py CHANGED
@@ -1,18 +1,18 @@
1
  # PP schedule config
2
  from src.execution_model import ScheduleConfig
3
- from src.strategies import generate_dualpipe_v_schedule
4
 
5
 
6
  p = 4 # PP size
7
  v = 2 # number of virtual stages
8
- m = 10 # total microbatches
9
 
10
  # stage time config
11
  F = 2.0 # forward time in one PP rank for all stages
12
  W = 2.0 # backward_W time in one PP rank for all stages
13
  D = 2.0 # backward_D time in one PP rank for all stages
14
  B = W + D # backward time in one PP rank for all stages
15
- FwB = 6 # overlapped forward backward time in one PP rank for all stages
16
 
17
  op_times = {
18
  "forward": F,
@@ -78,5 +78,39 @@ def dualpipe_v_execution_time_by_emulate():
78
 
79
  return dual_pipe_schedule.get_total_execution_time()
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  print(f"DualPipe-V by emulate: {dualpipe_v_execution_time_by_emulate()}")
82
  print(f"DualPipe-V by formula detailed: {dualpipe_v_execution_time_by_formula_detailed()}")
 
 
 
1
  # PP schedule config
2
  from src.execution_model import ScheduleConfig
3
+ from src.strategies import generate_1f1b_interleave_overlap_schedule, generate_dualpipe_v_schedule
4
 
5
 
6
  p = 4 # PP size
7
  v = 2 # number of virtual stages
8
+ m = 16 # total microbatches
9
 
10
  # stage time config
11
  F = 2.0 # forward time in one PP rank for all stages
12
  W = 2.0 # backward_W time in one PP rank for all stages
13
  D = 2.0 # backward_D time in one PP rank for all stages
14
  B = W + D # backward time in one PP rank for all stages
15
+ FwB = 5.5 # overlapped forward backward time in one PP rank for all stages
16
 
17
  op_times = {
18
  "forward": F,
 
78
 
79
  return dual_pipe_schedule.get_total_execution_time()
80
 
81
+ def overlap_1f1b_execution_time_by_emulate():
82
+ op_times_per_stage = {
83
+ "forward": F / v,
84
+ "backward": B / v,
85
+ "backward_D": D / v,
86
+ "backward_W": W / v,
87
+ "overlapped_forward_backward": FwB / v
88
+ }
89
+ overlap_1f1b_schedule_config = ScheduleConfig(
90
+ num_devices=p,
91
+ num_stages=p*v,
92
+ num_batches=m,
93
+ p2p_latency=0.0,
94
+ op_times=op_times_per_stage,
95
+ split_backward=False,
96
+ placement_strategy="interleave",
97
+ )
98
+ overlap_1f1b_schedule = generate_1f1b_interleave_overlap_schedule(overlap_1f1b_schedule_config)
99
+ overlap_1f1b_schedule.execute()
100
+ return overlap_1f1b_schedule.get_total_execution_time()
101
+
102
+ def overlap_1f1b_execution_time_by_formula():
103
+ forward_bubble = (p-1) * F / v
104
+ backward_bubble = (p-1) * B / v
105
+
106
+ non_overlapped_batches = p*(v - 1) + 1
107
+ forward_backward_time = non_overlapped_batches * (F + B) / v
108
+ overlapped_time = (m*v - non_overlapped_batches) * FwB / v
109
+
110
+ total_time = forward_bubble + backward_bubble + forward_backward_time + overlapped_time
111
+ return total_time
112
+
113
  print(f"DualPipe-V by emulate: {dualpipe_v_execution_time_by_emulate()}")
114
  print(f"DualPipe-V by formula detailed: {dualpipe_v_execution_time_by_formula_detailed()}")
115
+ print(f"Overlap-1f1b by emulate: {overlap_1f1b_execution_time_by_emulate()}")
116
+ print(f"Overlap-1f1b by formula: {overlap_1f1b_execution_time_by_formula()}")