delightfulrachel commited on
Commit
c309035
·
verified ·
1 Parent(s): 29a68b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -548
app.py CHANGED
@@ -2,10 +2,8 @@ import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  import plotly.express as px
5
- import plotly.graph_objects as go
6
 
7
- # Initialize pricing data
8
- # AWS pricing - Instance types and their properties
9
  aws_instances = {
10
  "g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
11
  "g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
@@ -15,7 +13,6 @@ aws_instances = {
15
  "p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
16
  }
17
 
18
- # GCP pricing - Instance types and their properties
19
  gcp_instances = {
20
  "a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
21
  "a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
@@ -25,7 +22,6 @@ gcp_instances = {
25
  "g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
26
  }
27
 
28
- # API pricing - Models and their prices
29
  api_pricing = {
30
  "OpenAI": {
31
  "GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
@@ -46,564 +42,115 @@ api_pricing = {
46
  }
47
  }
48
 
49
- # Model sizes and memory requirements
50
  model_sizes = {
51
- "Small (7B parameters)": {"memory_required": 14, "throughput_factor": 1.0},
52
- "Medium (13B parameters)": {"memory_required": 26, "throughput_factor": 0.7},
53
- "Large (70B parameters)": {"memory_required": 140, "throughput_factor": 0.3},
54
- "XL (180B parameters)": {"memory_required": 360, "throughput_factor": 0.15},
55
  }
56
 
57
- # Calculate costs
58
- def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
59
- instance_data = aws_instances[instance]
60
- base_hourly = instance_data["hourly_rate"]
61
-
62
- # Apply discounts for reservation or spot
63
  if spot:
64
- hourly_rate = base_hourly * 0.3 # 70% discount for spot
65
  elif reserved:
66
- discount_factors = {1: 0.6, 3: 0.4} # 40% for 1 year, 60% for 3 years
67
- hourly_rate = base_hourly * discount_factors.get(years, 0.6)
68
- else:
69
- hourly_rate = base_hourly
70
-
71
- compute_cost = hourly_rate * hours
72
- storage_cost = storage * 0.10 # $0.10 per GB for EBS
73
-
74
- return {
75
- "compute_cost": compute_cost,
76
- "storage_cost": storage_cost,
77
- "total_cost": compute_cost + storage_cost,
78
- "instance_details": instance_data
79
- }
80
-
81
- def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
82
- instance_data = gcp_instances[instance]
83
- base_hourly = instance_data["hourly_rate"]
84
-
85
- # Apply discounts
86
- if spot:
87
- hourly_rate = base_hourly * 0.2 # 80% discount for preemptible
88
- elif reserved:
89
- discount_factors = {1: 0.7, 3: 0.5} # 30% for 1 year, 50% for 3 years
90
- hourly_rate = base_hourly * discount_factors.get(years, 0.7)
91
- else:
92
- hourly_rate = base_hourly
93
-
94
- compute_cost = hourly_rate * hours
95
- storage_cost = storage * 0.04 # $0.04 per GB for Standard SSD
96
-
97
- return {
98
- "compute_cost": compute_cost,
99
- "storage_cost": storage_cost,
100
- "total_cost": compute_cost + storage_cost,
101
- "instance_details": instance_data
102
- }
103
 
104
- def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
105
- model_data = api_pricing[provider][model]
106
-
107
- input_cost = (input_tokens * model_data["input_per_1M"]) / 1
108
- output_cost = (output_tokens * model_data["output_per_1M"]) / 1
109
-
110
- # Add a small cost for API calls for some providers
111
- api_call_costs = 0
112
- if provider == "TogetherAI":
113
- api_call_costs = api_calls * 0.0001 # $0.0001 per request
114
-
115
- total_cost = input_cost + output_cost + api_call_costs
116
-
117
- return {
118
- "input_cost": input_cost,
119
- "output_cost": output_cost,
120
- "api_call_cost": api_call_costs,
121
- "total_cost": total_cost,
122
- "model_details": model_data
123
- }
124
 
125
- # Filter instances based on model size requirements
126
- def filter_compatible_instances(instances_dict, min_memory_required):
127
- compatible = {}
128
- for name, data in instances_dict.items():
129
- # Parse GPU memory
130
- memory_str = data["gpu_memory"]
131
-
132
- # Handle multiple GPUs
133
- if "x" in memory_str and not memory_str.startswith(("1x", "2x", "4x", "8x")):
134
- # Format: "16GB"
135
- memory_val = int(memory_str.split("GB")[0])
136
- elif "x" in memory_str:
137
- # Format: "8x40GB"
138
- parts = memory_str.split("x")
139
- num_gpus = int(parts[0])
140
- memory_per_gpu = int(parts[1].split("GB")[0])
141
- memory_val = num_gpus * memory_per_gpu
142
  else:
143
- # Format: "40GB"
144
- memory_val = int(memory_str.split("GB")[0])
145
-
146
- if memory_val >= min_memory_required:
147
- compatible[name] = data
148
-
149
- return compatible
150
 
151
  def generate_cost_comparison(
152
- compute_hours,
153
- tokens_per_month,
154
- input_ratio,
155
- api_calls,
156
- model_size,
157
- storage_gb,
158
- reserved_instances,
159
- spot_instances,
160
- multi_year_commitment
161
  ):
162
- # Calculate input and output tokens
163
- input_tokens = tokens_per_month * (input_ratio / 100)
164
- output_tokens = tokens_per_month * (1 - (input_ratio / 100))
165
-
166
- # Check model memory requirements
167
- min_memory_required = model_sizes[model_size]["memory_required"]
168
-
169
- # Filter compatible instances
170
- compatible_aws = filter_compatible_instances(aws_instances, min_memory_required)
171
- compatible_gcp = filter_compatible_instances(gcp_instances, min_memory_required)
172
-
173
  results = []
174
-
175
- # Generate HTML for AWS options
176
- if compatible_aws:
177
- aws_results = "<h3>AWS Compatible Instances</h3>"
178
- aws_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>"
179
-
180
- best_aws = None
181
- best_aws_cost = float('inf')
182
-
183
- for instance in compatible_aws:
184
- cost_result = calculate_aws_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
185
- total_cost = cost_result["total_cost"]
186
-
187
- if total_cost < best_aws_cost:
188
- best_aws = instance
189
- best_aws_cost = total_cost
190
-
191
- aws_results += f"<tr><td>{instance}</td><td>{compatible_aws[instance]['vcpus']}</td><td>{compatible_aws[instance]['memory']}GB</td><td>{compatible_aws[instance]['gpu']}</td><td>${compatible_aws[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>"
192
-
193
- aws_results += "</table>"
194
-
195
- if best_aws:
196
- best_aws_data = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
197
- results.append({
198
- "provider": f"AWS ({best_aws})",
199
- "cost": best_aws_data["total_cost"],
200
- "type": "Cloud"
201
- })
202
- else:
203
- aws_results = "<h3>AWS Compatible Instances</h3><p>No compatible AWS instances found for this model size.</p>"
204
- best_aws = None
205
- best_aws_cost = float('inf')
206
-
207
- # Generate HTML for GCP options
208
- if compatible_gcp:
209
- gcp_results = "<h3>Google Cloud Compatible Instances</h3>"
210
- gcp_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>"
211
-
212
- best_gcp = None
213
- best_gcp_cost = float('inf')
214
-
215
- for instance in compatible_gcp:
216
- cost_result = calculate_gcp_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
217
- total_cost = cost_result["total_cost"]
218
-
219
- if total_cost < best_gcp_cost:
220
- best_gcp = instance
221
- best_gcp_cost = total_cost
222
-
223
- gcp_results += f"<tr><td>{instance}</td><td>{compatible_gcp[instance]['vcpus']}</td><td>{compatible_gcp[instance]['memory']}GB</td><td>{compatible_gcp[instance]['gpu']}</td><td>${compatible_gcp[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>"
224
-
225
- gcp_results += "</table>"
226
-
227
- if best_gcp:
228
- best_gcp_data = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
229
- results.append({
230
- "provider": f"GCP ({best_gcp})",
231
- "cost": best_gcp_data["total_cost"],
232
- "type": "Cloud"
233
- })
234
- else:
235
- gcp_results = "<h3>Google Cloud Compatible Instances</h3><p>No compatible Google Cloud instances found for this model size.</p>"
236
- best_gcp = None
237
- best_gcp_cost = float('inf')
238
-
239
- # Generate HTML for API options
240
- api_results = "<h3>API Options</h3>"
241
- api_results += "<table width='100%'><tr><th>Provider</th><th>Model</th><th>Input Cost</th><th>Output Cost</th><th>Total Cost</th><th>Context Length</th></tr>"
242
-
243
- api_costs = {}
244
-
245
- for provider in api_pricing:
246
- for model in api_pricing[provider]:
247
- cost_data = calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls)
248
- api_costs[(provider, model)] = cost_data
249
-
250
- api_results += f"<tr><td>{provider}</td><td>{model}</td><td>${cost_data['input_cost']:.2f}</td><td>${cost_data['output_cost']:.2f}</td><td>${cost_data['total_cost']:.2f}</td><td>{api_pricing[provider][model]['token_context']:,}</td></tr>"
251
-
252
- api_results += "</table>"
253
-
254
- # Find best API option
255
- best_api = min(api_costs.keys(), key=lambda x: api_costs[x]["total_cost"])
256
- best_api_cost = api_costs[best_api]
257
-
258
- results.append({
259
- "provider": f"{best_api[0]} ({best_api[1]})",
260
- "cost": best_api_cost["total_cost"],
261
- "type": "API"
262
  })
263
-
264
- # Create recommendation HTML
265
- recommendation = "<h3>Recommendation</h3>"
266
-
267
- # Find the cheapest option
268
- cheapest = min(results, key=lambda x: x["cost"])
269
-
270
- if cheapest["type"] == "API":
271
- recommendation += f"<p>Based on your usage parameters, the <strong>{cheapest['provider']}</strong> API endpoint is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>"
272
-
273
- # Calculate API vs cloud cost ratio
274
- cheapest_cloud = None
275
- for result in results:
276
- if result["type"] == "Cloud":
277
- if cheapest_cloud is None or result["cost"] < cheapest_cloud["cost"]:
278
- cheapest_cloud = result
279
-
280
- if cheapest_cloud:
281
- ratio = cheapest_cloud["cost"] / cheapest["cost"]
282
- recommendation += f"<p>This is <strong>{ratio:.1f}x cheaper</strong> than the most affordable cloud option ({cheapest_cloud['provider']}).</p>"
283
- else:
284
- recommendation += f"<p>Based on your usage parameters, <strong>{cheapest['provider']}</strong> is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>"
285
-
286
- # Find cheapest API
287
- cheapest_api = None
288
- for result in results:
289
- if result["type"] == "API":
290
- if cheapest_api is None or result["cost"] < cheapest_api["cost"]:
291
- cheapest_api = result
292
-
293
- if cheapest_api:
294
- ratio = cheapest_api["cost"] / cheapest["cost"]
295
- if ratio > 1:
296
- recommendation += f"<p>This is <strong>{1/ratio:.1f}x cheaper</strong> than the most affordable API option ({cheapest_api['provider']}).</p>"
297
- else:
298
- recommendation += f"<p>However, the API option ({cheapest_api['provider']}) is <strong>{ratio:.1f}x cheaper</strong>.</p>"
299
-
300
- # Additional recommendation text
301
- if tokens_per_month > 100 and cheapest["type"] == "Cloud":
302
- recommendation += "<p>With your high token volume, cloud hardware becomes more cost-effective despite the higher upfront costs.</p>"
303
- elif compute_hours < 50 and cheapest["type"] == "API":
304
- recommendation += "<p>With your low usage hours, API endpoints are more cost-effective as you only pay for what you use.</p>"
305
-
306
- # Create breakeven analysis HTML
307
- breakeven = "<h3>Breakeven Analysis</h3>"
308
-
309
- if best_aws is not None and best_api_cost["total_cost"] > 0:
310
- aws_hourly = aws_instances[best_aws]["hourly_rate"]
311
- breakeven_hours = best_api_cost["total_cost"] / aws_hourly
312
-
313
- breakeven += f"<p>API vs AWS: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>"
314
-
315
- if compute_hours > breakeven_hours:
316
- breakeven += "<p>You're past the breakeven point - AWS hardware is more cost-effective than API usage.</p>"
317
- else:
318
- breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than AWS hardware.</p>"
319
-
320
- if best_gcp is not None and best_api_cost["total_cost"] > 0:
321
- gcp_hourly = gcp_instances[best_gcp]["hourly_rate"]
322
- breakeven_hours = best_api_cost["total_cost"] / gcp_hourly
323
-
324
- breakeven += f"<p>API vs GCP: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>"
325
-
326
- if compute_hours > breakeven_hours:
327
- breakeven += "<p>You're past the breakeven point - GCP hardware is more cost-effective than API usage.</p>"
328
- else:
329
- breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than GCP hardware.</p>"
330
-
331
- # Generate cost comparison chart
332
- fig = px.bar(
333
- pd.DataFrame(results),
334
- x="provider",
335
- y="cost",
336
- color="type",
337
- color_discrete_map={"Cloud": "#3B82F6", "API": "#8B5CF6"},
338
- title="Monthly Cost Comparison",
339
- labels={"provider": "Provider & Instance", "cost": "Monthly Cost ($)"}
340
- )
341
-
342
- fig.update_layout(height=500)
343
-
344
- # Create HTML structure for the results
345
- html_output = f"""
346
- <div style="padding: 20px; font-family: Arial, sans-serif;">
347
- <h2>Cost Comparison Results</h2>
348
-
349
- <div style="margin-bottom: 20px;">
350
- {aws_results}
351
- </div>
352
-
353
- <div style="margin-bottom: 20px;">
354
- {gcp_results}
355
- </div>
356
-
357
- <div style="margin-bottom: 20px;">
358
- {api_results}
359
- </div>
360
-
361
- <div style="margin-bottom: 20px;">
362
- {recommendation}
363
- </div>
364
-
365
- <div style="margin-bottom: 20px;">
366
- {breakeven}
367
- </div>
368
-
369
- <div style="margin-bottom: 20px;">
370
- <h3>Additional Considerations</h3>
371
- <div style="display: flex; gap: 20px;">
372
- <div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;">
373
- <h4>Cloud Hardware Pros</h4>
374
- <ul>
375
- <li>Full control over infrastructure and customization</li>
376
- <li>Predictable costs for steady, high-volume workloads</li>
377
- <li>Can run multiple models simultaneously</li>
378
- <li>No token context limitations</li>
379
- <li>Data stays on your infrastructure</li>
380
- </ul>
381
- </div>
382
- <div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;">
383
- <h4>API Endpoints Pros</h4>
384
- <ul>
385
- <li>No infrastructure management overhead</li>
386
- <li>Pay-per-use model (ideal for sporadic usage)</li>
387
- <li>Instant scalability</li>
388
- <li>No upfront costs or commitment</li>
389
- <li>Automatic updates to newer model versions</li>
390
- </ul>
391
- </div>
392
- </div>
393
- </div>
394
-
395
- <div style="background-color: #FEF3C7; padding: 15px; border-radius: 8px; margin-bottom: 20px;">
396
- <p><strong>Note:</strong> These estimates are based on current pricing as of May 2025 and may vary based on regional pricing differences, discounts, and usage patterns.</p>
397
- </div>
398
- </div>
399
- """
400
-
401
- return html_output, fig
402
 
403
- # Main app function
404
  def app_function(
405
- compute_hours,
406
- tokens_per_month,
407
- input_ratio,
408
- api_calls,
409
- model_size,
410
- storage_gb,
411
- batch_size,
412
- reserved_instances,
413
- spot_instances,
414
- multi_year_commitment
415
  ):
416
- html_output, fig = generate_cost_comparison(
417
- compute_hours,
418
- tokens_per_month,
419
- input_ratio,
420
- api_calls,
421
- model_size,
422
- storage_gb,
423
- reserved_instances,
424
- spot_instances,
425
- multi_year_commitment
426
- )
427
-
428
- return html_output, fig
429
-
430
- # Define the Gradio interface
431
- with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
432
- gr.HTML("""
433
- <div style="text-align: center; margin-bottom: 20px;">
434
- <h1 style="color: #4F46E5; font-size: 2.5rem;">Cloud Cost Estimator</h1>
435
- <p style="font-size: 1.2rem;">Compare costs between cloud hardware configurations and inference API endpoints</p>
436
- </div>
437
- """)
438
-
439
- with gr.Row():
440
- with gr.Column(scale=1):
441
- gr.HTML("<h3>Usage Parameters</h3>")
442
-
443
- compute_hours = gr.Slider(
444
- label="Compute Hours per Month",
445
- minimum=1,
446
- maximum=730,
447
- value=100,
448
- info="Number of hours you'll run the model per month"
449
- )
450
-
451
- tokens_per_month = gr.Slider(
452
- label="Tokens Processed per Month (millions)",
453
- minimum=1,
454
- maximum=1000,
455
- value=10,
456
- info="Total number of tokens processed per month in millions"
457
- )
458
-
459
- input_ratio = gr.Slider(
460
- label="Input Token Ratio (%)",
461
- minimum=10,
462
- maximum=90,
463
- value=30,
464
- info="Percentage of total tokens that are input tokens"
465
- )
466
-
467
- api_calls = gr.Slider(
468
- label="API Calls per Month",
469
- minimum=100,
470
- maximum=1000000,
471
- value=10000,
472
- step=100,
473
- info="Number of API calls made per month"
474
- )
475
-
476
- model_size = gr.Dropdown(
477
- label="Model Size",
478
- choices=list(model_sizes.keys()),
479
- value="Medium (13B parameters)",
480
- info="Size of the language model you want to run"
481
- )
482
-
483
- storage_gb = gr.Slider(
484
- label="Storage Required (GB)",
485
- minimum=10,
486
- maximum=1000,
487
- value=100,
488
- info="Amount of storage required for models and data"
489
- )
490
-
491
- batch_size = gr.Slider(
492
- label="Batch Size",
493
- minimum=1,
494
- maximum=64,
495
- value=4,
496
- info="Batch size for inference (affects throughput)"
497
- )
498
-
499
- gr.HTML("<h3>Advanced Options</h3>")
500
-
501
- reserved_instances = gr.Checkbox(
502
- label="Use Reserved Instances",
503
- value=False,
504
- info="Reserved instances offer significant discounts with 1-3 year commitments"
505
- )
506
-
507
- spot_instances = gr.Checkbox(
508
- label="Use Spot/Preemptible Instances",
509
- value=False,
510
- info="Spot instances can be 70-90% cheaper but may be terminated with little notice"
511
- )
512
-
513
- multi_year_commitment = gr.Radio(
514
- label="Commitment Period (if using Reserved Instances)",
515
- choices=["1", "3"],
516
- value="1",
517
- info="Length of reserved instance commitment in years"
518
- )
519
-
520
- submit_button = gr.Button("Calculate Costs", variant="primary")
521
-
522
- with gr.Column(scale=2):
523
- results_html = gr.HTML(label="Results")
524
- plot_output = gr.Plot(label="Cost Comparison")
525
-
526
- submit_button.click(
527
- app_function,
528
- inputs=[
529
- compute_hours,
530
- tokens_per_month,
531
- input_ratio,
532
- api_calls,
533
- model_size,
534
- storage_gb,
535
- reserved_instances,
536
- spot_instances,
537
- multi_year_commitment
538
- ],
539
- outputs=[results_html, plot_output]
540
- )
541
-
542
- gr.HTML("""
543
- <div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;">
544
- <h3>Help & Resources</h3>
545
- <p><strong>Cloud Provider Documentation:</strong>
546
- <a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> |
547
- <a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a>
548
- </p>
549
- <p><strong>API Provider Documentation:</strong>
550
- <a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> |
551
- <a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> |
552
- <a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a>
553
- </p>
554
- <p>Made with ❤️ by Cloud Cost Estimator | Data last updated: May 2025</p>
555
- </div>
556
- """)
557
-
558
- demo.launch()
559
- value=False,
560
- info="Spot instances can be 70-90% cheaper but may be terminated with little notice"
561
- )
562
-
563
- multi_year_commitment = gr.Radio(
564
- label="Commitment Period (if using Reserved Instances)",
565
- choices=[1, 3],
566
- value=1,
567
- info="Length of reserved instance commitment in years"
568
- )
569
-
570
- submit_button = gr.Button("Calculate Costs", variant="primary")
571
-
572
- with gr.Column(scale=2):
573
- results_html = gr.HTML(label="Results")
574
- plot_output = gr.Plot(label="Cost Comparison")
575
-
576
- submit_button.click(
577
- app_function,
578
- inputs=[
579
- compute_hours,
580
- tokens_per_month,
581
- input_ratio,
582
- api_calls,
583
- model_size,
584
- storage_gb,
585
- batch_size,
586
- reserved_instances,
587
- spot_instances,
588
- multi_year_commitment
589
- ],
590
- outputs=[results_html, plot_output]
591
  )
592
-
593
- gr.HTML("""
594
- <div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;">
595
- <h3>Help & Resources</h3>
596
- <p><strong>Cloud Provider Documentation:</strong>
597
- <a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> |
598
- <a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a>
599
- </p>
600
- <p><strong>API Provider Documentation:</strong>
601
- <a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> |
602
- <a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> |
603
- <a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a>
604
- </p>
605
- <p>Made with ❤️ by Cloud Cost Estimator | Data last updated: May 2025</p>
606
- </div>
607
- """)
608
 
609
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import pandas as pd
3
  import numpy as np
4
  import plotly.express as px
 
5
 
6
+ # Pricing data
 
7
  aws_instances = {
8
  "g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
9
  "g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
 
13
  "p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
14
  }
15
 
 
16
  gcp_instances = {
17
  "a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
18
  "a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
 
22
  "g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
23
  }
24
 
 
25
  api_pricing = {
26
  "OpenAI": {
27
  "GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
 
42
  }
43
  }
44
 
 
45
  model_sizes = {
46
+ "Small (7B parameters)": {"memory_required": 14},
47
+ "Medium (13B parameters)": {"memory_required": 26},
48
+ "Large (70B parameters)": {"memory_required": 140},
49
+ "XL (180B parameters)": {"memory_required": 360},
50
  }
51
 
52
+ def calculate_costs(instance, hours, storage, reserved, spot, years, instances):
53
+ data = instances[instance]
54
+ rate = data['hourly_rate']
 
 
 
55
  if spot:
56
+ rate *= 0.3 if instances is aws_instances else 0.2
57
  elif reserved:
58
+ factors = {1: 0.6, 3: 0.4} if instances is aws_instances else {1: 0.7, 3: 0.5}
59
+ rate *= factors.get(years, factors[1])
60
+ return rate * hours + storage * (0.10 if instances is aws_instances else 0.04)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ def calculate_api_cost(provider, model, in_tokens, out_tokens, calls):
63
+ m = api_pricing[provider][model]
64
+ cost = in_tokens * m['input_per_1M'] + out_tokens * m['output_per_1M']
65
+ return cost + (calls * 0.0001 if provider == 'TogetherAI' else 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ def filter_compatible(instances, min_mem):
68
+ res = {}
69
+ for name, data in instances.items():
70
+ mem_str = data['gpu_memory']
71
+ if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
72
+ val = int(mem_str.replace('GB',''))
 
 
 
 
 
 
 
 
 
 
 
73
  else:
74
+ parts = mem_str.split('x')
75
+ val = int(parts[0]) * int(parts[1].replace('GB','')) if len(parts)>1 else int(parts[0].replace('GB',''))
76
+ if val >= min_mem:
77
+ res[name] = data
78
+ return res
 
 
79
 
80
  def generate_cost_comparison(
81
+ compute_hours, tokens_per_month, input_ratio, api_calls,
82
+ model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 
 
 
 
 
 
 
83
  ):
84
+ years = int(multi_year_commitment)
85
+ in_tokens = tokens_per_month * (input_ratio/100)
86
+ out_tokens = tokens_per_month - in_tokens
87
+ min_mem = model_sizes[model_size]['memory_required']
88
+
89
+ aws_comp = filter_compatible(aws_instances, min_mem)
90
+ gcp_comp = filter_compatible(gcp_instances, min_mem)
91
+
 
 
 
92
  results = []
93
+ if aws_comp:
94
+ best_aws = min(aws_comp, key=lambda x: calculate_costs(x, compute_hours, storage_gb, reserved_instances, spot_instances, years, aws_instances))
95
+ cost_aws = calculate_costs(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, years, aws_instances)
96
+ results.append({'provider': f'AWS ({best_aws})', 'cost': cost_aws, 'type': 'Cloud'})
97
+ if gcp_comp:
98
+ best_gcp = min(gcp_comp, key=lambda x: calculate_costs(x, compute_hours, storage_gb, reserved_instances, spot_instances, years, gcp_instances))
99
+ cost_gcp = calculate_costs(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, years, gcp_instances)
100
+ results.append({'provider': f'GCP ({best_gcp})', 'cost': cost_gcp, 'type': 'Cloud'})
101
+
102
+ api_opts = {(prov, mdl): calculate_api_cost(prov, mdl, in_tokens, out_tokens, api_calls)
103
+ for prov in api_pricing for mdl in api_pricing[prov]}
104
+ best_api = min(api_opts, key=api_opts.get)
105
+ results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_opts[best_api], 'type': 'API'})
106
+
107
+ df = pd.DataFrame(results)
108
+ aws_label = df[df['type']=='Cloud']['provider'].iloc[0]
109
+ gcp_label = df[df['type']=='Cloud']['provider'].iloc[1] if len(df[df['type']=='Cloud'])>1 else aws_label
110
+ api_label = df[df['type']=='API']['provider'].iloc[0]
111
+
112
+ fig = px.bar(df, x='provider', y='cost', color='provider', color_discrete_map={
113
+ aws_label: '#FF9900',
114
+ gcp_label: '#4285F4',
115
+ api_label: '#D62828'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  })
117
+ fig.update_yaxes(tickprefix='$')
118
+ fig.update_layout(showlegend=False, height=500)
119
+
120
+ html = '<div></div>' # your tables here if needed
121
+ return html, fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
 
123
  def app_function(
124
+ compute_hours, tokens_per_month, input_ratio, api_calls,
125
+ model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 
 
 
 
 
 
 
 
126
  ):
127
+ return generate_cost_comparison(
128
+ compute_hours, tokens_per_month, input_ratio, api_calls,
129
+ model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
+ if __name__ == "__main__":
133
+ with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
134
+ gr.HTML('<h1 style="text-align:center;">Cloud Cost Estimator</h1>')
135
+ with gr.Row():
136
+ with gr.Column(scale=1):
137
+ compute_hours = gr.Slider(label="Compute Hours per Month", minimum=1, maximum=730, value=100)
138
+ tokens_per_month = gr.Slider(label="Tokens per Month (M)", minimum=1, maximum=1000, value=10)
139
+ input_ratio = gr.Slider(label="Input Ratio (%)", minimum=10, maximum=90, value=30)
140
+ api_calls = gr.Slider(label="API Calls per Month", minimum=100, maximum=1000000, value=10000, step=100)
141
+ model_size = gr.Dropdown(label="Model Size", choices=list(model_sizes.keys()), value="Medium (13B parameters)")
142
+ storage_gb = gr.Slider(label="Storage (GB)", minimum=10, maximum=1000, value=100)
143
+ reserved_instances = gr.Checkbox(label="Reserved Instances", value=False)
144
+ spot_instances = gr.Checkbox(label="Spot Instances", value=False)
145
+ multi_year_commitment = gr.Radio(label="Commitment Period (years)", choices=["1","3"], value="1")
146
+ submit = gr.Button("Calculate Costs")
147
+ with gr.Column(scale=2):
148
+ out_html = gr.HTML()
149
+ out_plot = gr.Plot()
150
+ submit.click(
151
+ app_function,
152
+ inputs=[compute_hours, tokens_per_month, input_ratio, api_calls,
153
+ model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment],
154
+ outputs=[out_html, out_plot]
155
+ )
156
+ demo.launch()