Spaces:

ma7583
/

flops_calc

Sleeping

App Files Files Community

ma7583 commited on May 19

Commit

1b94cd6

verified ·

1 Parent(s): f16abf0

Create app.py

Browse files

Files changed (1) hide show

app.py +66 -0

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import gradio as gr
+def estimate_transformer_stats(batch_size, seq_len, num_layers, hidden_dim, vocab_size, show_breakdown):
+    B = batch_size
+    S = seq_len
+    L = num_layers
+    D = hidden_dim
+    V = vocab_size
+    # --- Parameters ---
+    num_params = L * 12 * (D ** 2) + D * V
+    # --- FLOPs --- (using 2 * m * n * p per matmul)
+    attn_proj_flops = 2 * 3 * S * D * D
+    attn_score_flops = 2 * S * D * S
+    attn_out_proj_flops = 2 * S * D * D
+    ffn_flops = 2 * 2 * S * D * 4 * D
+    logit_flops = 2 * S * D * V / L
+    total_layer_flops = attn_proj_flops + attn_score_flops + attn_out_proj_flops + ffn_flops + logit_flops
+    total_flops = 6 * B * L * total_layer_flops
+    output_lines = [
+        f"Parameters: P = 12 * L * D^2 + D * V",
+        f"           = 12 * {L} * {D}^2 + {D} * {V} = {num_params:.2e}",
+        f"",
+        f"FLOPs per layer (per sequence):",
+        f"  Attention Projections (QKV): 2 * 3 * S * D^2 = 2 * 3 * {S} * {D}^2 = {attn_proj_flops:.2e}",
+        f"  Attention Scores (QKᵀ):      2 * S * D * S = 2 * {S} * {D} * {S} = {attn_score_flops:.2e}",
+        f"  Attention Output Proj:       2 * S * D^2   = 2 * {S} * {D}^2 = {attn_out_proj_flops:.2e}",
+        f"  Feedforward Network:         2 * 2 * S * D * 4D = 2*2*{S}*{D}*{4*D} = {ffn_flops:.2e}",
+        f"  Logits:                      2 * S * D * V / L = 2*{S}*{D}*{V} / {L} = {logit_flops:.2e}",
+        f"",
+        f"Layer Total FLOPs = {total_layer_flops:.2e}",
+        f"",
+        f"Total Training FLOPs = 6 * B * L * Layer_FLOPs",
+        f"                    = 6 * {B} * {L} * {total_layer_flops:.2e} = {total_flops:.2e}"
+    ]
+    if show_breakdown:
+        output_lines.append("\nComponent-wise totals across training batch:")
+        output_lines.append(f"  - QKV Projections: {attn_proj_flops * B * L:.2e}")
+        output_lines.append(f"  - Attention Scores: {attn_score_flops * B * L:.2e}")
+        output_lines.append(f"  - Attention Output: {attn_out_proj_flops * B * L:.2e}")
+        output_lines.append(f"  - FFN: {ffn_flops * B * L:.2e}")
+        output_lines.append(f"  - Logits: {logit_flops * B * L:.2e}")
+    return "\n".join(output_lines)
+iface = gr.Interface(
+    fn=estimate_transformer_stats,
+    inputs=[
+        gr.Number(label="Batch Size", value=32),
+        gr.Number(label="Sequence Length", value=2048),
+        gr.Number(label="Number of Layers", value=24),
+        gr.Number(label="Hidden Size (d_model)", value=2048),
+        gr.Number(label="Vocabulary Size", value=50272),
+        gr.Checkbox(label="Show FLOPs Breakdown", value=True),
+    ],
+    outputs=gr.Textbox(label="Estimates"),
+    title="Transformer Parameter and FLOPs Estimator",
+    description="Estimates parameter count and training FLOPs for decoder-only Transformers (like OPT/GPT). Shows formulas and per-component breakdown."
+)
+if __name__ == "__main__":
+    iface.launch()