delightfulrachel commited on
Commit
41a2f73
·
verified ·
1 Parent(s): 34b4396

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -529
app.py CHANGED
@@ -2,10 +2,8 @@ import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  import plotly.express as px
5
- import plotly.graph_objects as go
6
 
7
  # Initialize pricing data
8
- # AWS pricing - Instance types and their properties
9
  aws_instances = {
10
  "g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
11
  "g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
@@ -15,7 +13,6 @@ aws_instances = {
15
  "p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
16
  }
17
 
18
- # GCP pricing - Instance types and their properties
19
  gcp_instances = {
20
  "a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
21
  "a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
@@ -25,7 +22,6 @@ gcp_instances = {
25
  "g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
26
  }
27
 
28
- # API pricing - Models and their prices
29
  api_pricing = {
30
  "OpenAI": {
31
  "GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
@@ -46,7 +42,6 @@ api_pricing = {
46
  }
47
  }
48
 
49
- # Model sizes and memory requirements
50
  model_sizes = {
51
  "Small (7B parameters)": {"memory_required": 14, "throughput_factor": 1.0},
52
  "Medium (13B parameters)": {"memory_required": 26, "throughput_factor": 0.7},
@@ -54,556 +49,191 @@ model_sizes = {
54
  "XL (180B parameters)": {"memory_required": 360, "throughput_factor": 0.15},
55
  }
56
 
57
- # Calculate costs
58
  def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
59
- instance_data = aws_instances[instance]
60
- base_hourly = instance_data["hourly_rate"]
61
-
62
- # Apply discounts for reservation or spot
63
  if spot:
64
- hourly_rate = base_hourly * 0.3 # 70% discount for spot
65
  elif reserved:
66
- discount_factors = {1: 0.6, 3: 0.4} # 40% for 1 year, 60% for 3 years
67
- hourly_rate = base_hourly * discount_factors.get(years, 0.6)
68
- else:
69
- hourly_rate = base_hourly
70
-
71
- compute_cost = hourly_rate * hours
72
- storage_cost = storage * 0.10 # $0.10 per GB for EBS
73
-
74
- return {
75
- "compute_cost": compute_cost,
76
- "storage_cost": storage_cost,
77
- "total_cost": compute_cost + storage_cost,
78
- "instance_details": instance_data
79
- }
80
 
81
  def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
82
- instance_data = gcp_instances[instance]
83
- base_hourly = instance_data["hourly_rate"]
84
-
85
- # Apply discounts
86
  if spot:
87
- hourly_rate = base_hourly * 0.2 # 80% discount for preemptible
88
  elif reserved:
89
- discount_factors = {1: 0.7, 3: 0.5} # 30% for 1 year, 50% for 3 years
90
- hourly_rate = base_hourly * discount_factors.get(years, 0.7)
91
- else:
92
- hourly_rate = base_hourly
93
-
94
- compute_cost = hourly_rate * hours
95
- storage_cost = storage * 0.04 # $0.04 per GB for Standard SSD
96
-
97
- return {
98
- "compute_cost": compute_cost,
99
- "storage_cost": storage_cost,
100
- "total_cost": compute_cost + storage_cost,
101
- "instance_details": instance_data
102
- }
103
 
104
  def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
105
- model_data = api_pricing[provider][model]
106
-
107
- input_cost = (input_tokens * model_data["input_per_1M"]) / 1
108
- output_cost = (output_tokens * model_data["output_per_1M"]) / 1
109
-
110
- # Add a small cost for API calls for some providers
111
- api_call_costs = 0
112
- if provider == "TogetherAI":
113
- api_call_costs = api_calls * 0.0001 # $0.0001 per request
114
-
115
- total_cost = input_cost + output_cost + api_call_costs
116
-
117
- return {
118
- "input_cost": input_cost,
119
- "output_cost": output_cost,
120
- "api_call_cost": api_call_costs,
121
- "total_cost": total_cost,
122
- "model_details": model_data
123
- }
124
 
125
- # Filter instances based on model size requirements
126
- def filter_compatible_instances(instances_dict, min_memory_required):
127
- compatible = {}
128
- for name, data in instances_dict.items():
129
- # Parse GPU memory
130
- memory_str = data["gpu_memory"]
131
-
132
- # Handle multiple GPUs
133
- if "x" in memory_str and not memory_str.startswith(("1x", "2x", "4x", "8x")):
134
- # Format: "16GB"
135
- memory_val = int(memory_str.split("GB")[0])
136
- elif "x" in memory_str:
137
- # Format: "8x40GB"
138
- parts = memory_str.split("x")
139
- num_gpus = int(parts[0])
140
- memory_per_gpu = int(parts[1].split("GB")[0])
141
- memory_val = num_gpus * memory_per_gpu
142
  else:
143
- # Format: "40GB"
144
- memory_val = int(memory_str.split("GB")[0])
145
-
146
- if memory_val >= min_memory_required:
147
- compatible[name] = data
148
-
149
- return compatible
150
 
151
  def generate_cost_comparison(
152
- compute_hours,
153
- tokens_per_month,
154
- input_ratio,
155
- api_calls,
156
- model_size,
157
- storage_gb,
158
- reserved_instances,
159
- spot_instances,
160
- multi_year_commitment
161
  ):
162
- # Calculate input and output tokens
163
- input_tokens = tokens_per_month * (input_ratio / 100)
164
- output_tokens = tokens_per_month * (1 - (input_ratio / 100))
165
-
166
- # Check model memory requirements
167
- min_memory_required = model_sizes[model_size]["memory_required"]
168
-
169
- # Filter compatible instances
170
- compatible_aws = filter_compatible_instances(aws_instances, min_memory_required)
171
- compatible_gcp = filter_compatible_instances(gcp_instances, min_memory_required)
172
-
173
  results = []
174
-
175
- # Generate HTML for AWS options
176
- if compatible_aws:
177
- aws_results = "<h3>AWS Compatible Instances</h3>"
178
- aws_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>"
179
-
180
- best_aws = None
181
- best_aws_cost = float('inf')
182
-
183
- for instance in compatible_aws:
184
- cost_result = calculate_aws_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
185
- total_cost = cost_result["total_cost"]
186
-
187
- if total_cost < best_aws_cost:
188
- best_aws = instance
189
- best_aws_cost = total_cost
190
-
191
- aws_results += f"<tr><td>{instance}</td><td>{compatible_aws[instance]['vcpus']}</td><td>{compatible_aws[instance]['memory']}GB</td><td>{compatible_aws[instance]['gpu']}</td><td>${compatible_aws[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>"
192
-
193
- aws_results += "</table>"
194
-
195
  if best_aws:
196
- best_aws_data = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
197
- results.append({
198
- "provider": f"AWS ({best_aws})",
199
- "cost": best_aws_data["total_cost"],
200
- "type": "Cloud"
201
- })
202
  else:
203
- aws_results = "<h3>AWS Compatible Instances</h3><p>No compatible AWS instances found for this model size.</p>"
204
- best_aws = None
205
- best_aws_cost = float('inf')
206
-
207
- # Generate HTML for GCP options
208
- if compatible_gcp:
209
- gcp_results = "<h3>Google Cloud Compatible Instances</h3>"
210
- gcp_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>"
211
-
212
- best_gcp = None
213
- best_gcp_cost = float('inf')
214
-
215
- for instance in compatible_gcp:
216
- cost_result = calculate_gcp_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
217
- total_cost = cost_result["total_cost"]
218
-
219
- if total_cost < best_gcp_cost:
220
- best_gcp = instance
221
- best_gcp_cost = total_cost
222
-
223
- gcp_results += f"<tr><td>{instance}</td><td>{compatible_gcp[instance]['vcpus']}</td><td>{compatible_gcp[instance]['memory']}GB</td><td>{compatible_gcp[instance]['gpu']}</td><td>${compatible_gcp[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>"
224
-
225
- gcp_results += "</table>"
226
-
227
  if best_gcp:
228
- best_gcp_data = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
229
- results.append({
230
- "provider": f"GCP ({best_gcp})",
231
- "cost": best_gcp_data["total_cost"],
232
- "type": "Cloud"
233
- })
234
  else:
235
- gcp_results = "<h3>Google Cloud Compatible Instances</h3><p>No compatible Google Cloud instances found for this model size.</p>"
236
- best_gcp = None
237
- best_gcp_cost = float('inf')
238
-
239
- # Generate HTML for API options
240
- api_results = "<h3>API Options</h3>"
241
- api_results += "<table width='100%'><tr><th>Provider</th><th>Model</th><th>Input Cost</th><th>Output Cost</th><th>Total Cost</th><th>Context Length</th></tr>"
242
-
243
  api_costs = {}
244
-
245
- for provider in api_pricing:
246
- for model in api_pricing[provider]:
247
- cost_data = calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls)
248
- api_costs[(provider, model)] = cost_data
249
-
250
- api_results += f"<tr><td>{provider}</td><td>{model}</td><td>${cost_data['input_cost']:.2f}</td><td>${cost_data['output_cost']:.2f}</td><td>${cost_data['total_cost']:.2f}</td><td>{api_pricing[provider][model]['token_context']:,}</td></tr>"
251
-
252
- api_results += "</table>"
253
-
254
- # Find best API option
255
- best_api = min(api_costs.keys(), key=lambda x: api_costs[x]["total_cost"])
256
- best_api_cost = api_costs[best_api]
257
-
258
- results.append({
259
- "provider": f"{best_api[0]} ({best_api[1]})",
260
- "cost": best_api_cost["total_cost"],
261
- "type": "API"
262
- })
263
-
264
- # Create recommendation HTML
265
- recommendation = "<h3>Recommendation</h3>"
266
-
267
- # Find the cheapest option
268
- cheapest = min(results, key=lambda x: x["cost"])
269
-
270
- if cheapest["type"] == "API":
271
- recommendation += f"<p>Based on your usage parameters, the <strong>{cheapest['provider']}</strong> API endpoint is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>"
272
-
273
- # Calculate API vs cloud cost ratio
274
- cheapest_cloud = None
275
- for result in results:
276
- if result["type"] == "Cloud":
277
- if cheapest_cloud is None or result["cost"] < cheapest_cloud["cost"]:
278
- cheapest_cloud = result
279
-
280
- if cheapest_cloud:
281
- ratio = cheapest_cloud["cost"] / cheapest["cost"]
282
- recommendation += f"<p>This is <strong>{ratio:.1f}x cheaper</strong> than the most affordable cloud option ({cheapest_cloud['provider']}).</p>"
283
  else:
284
- recommendation += f"<p>Based on your usage parameters, <strong>{cheapest['provider']}</strong> is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>"
285
-
286
- # Find cheapest API
287
- cheapest_api = None
288
- for result in results:
289
- if result["type"] == "API":
290
- if cheapest_api is None or result["cost"] < cheapest_api["cost"]:
291
- cheapest_api = result
292
-
293
- if cheapest_api:
294
- ratio = cheapest_api["cost"] / cheapest["cost"]
295
- if ratio > 1:
296
- recommendation += f"<p>This is <strong>{1/ratio:.1f}x cheaper</strong> than the most affordable API option ({cheapest_api['provider']}).</p>"
297
- else:
298
- recommendation += f"<p>However, the API option ({cheapest_api['provider']}) is <strong>{ratio:.1f}x cheaper</strong>.</p>"
299
-
300
- # Additional recommendation text
301
- if tokens_per_month > 100 and cheapest["type"] == "Cloud":
302
- recommendation += "<p>With your high token volume, cloud hardware becomes more cost-effective despite the higher upfront costs.</p>"
303
- elif compute_hours < 50 and cheapest["type"] == "API":
304
- recommendation += "<p>With your low usage hours, API endpoints are more cost-effective as you only pay for what you use.</p>"
305
-
306
- # Create breakeven analysis HTML
307
- breakeven = "<h3>Breakeven Analysis</h3>"
308
-
309
- if best_aws is not None and best_api_cost["total_cost"] > 0:
310
- aws_hourly = aws_instances[best_aws]["hourly_rate"]
311
- breakeven_hours = best_api_cost["total_cost"] / aws_hourly
312
-
313
- breakeven += f"<p>API vs AWS: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>"
314
-
315
- if compute_hours > breakeven_hours:
316
- breakeven += "<p>You're past the breakeven point - AWS hardware is more cost-effective than API usage.</p>"
317
- else:
318
- breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than AWS hardware.</p>"
319
-
320
- if best_gcp is not None and best_api_cost["total_cost"] > 0:
321
- gcp_hourly = gcp_instances[best_gcp]["hourly_rate"]
322
- breakeven_hours = best_api_cost["total_cost"] / gcp_hourly
323
-
324
- breakeven += f"<p>API vs GCP: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>"
325
-
326
- if compute_hours > breakeven_hours:
327
- breakeven += "<p>You're past the breakeven point - GCP hardware is more cost-effective than API usage.</p>"
328
- else:
329
- breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than GCP hardware.</p>"
330
-
331
- # Generate cost comparison chart
332
- fig = px.bar(
333
- pd.DataFrame(results),
334
- x="provider",
335
- y="cost",
336
- color="type",
337
- color_discrete_map={"Cloud": "#3B82F6", "API": "#8B5CF6"},
338
- title="Monthly Cost Comparison",
339
- labels={"provider": "Provider & Instance", "cost": "Monthly Cost ($)"}
340
- )
341
-
342
- fig.update_layout(height=500)
343
-
344
- # Create HTML structure for the results
345
- html_output = f"""
346
- <div style="padding: 20px; font-family: Arial, sans-serif;">
347
- <h2>Cost Comparison Results</h2>
348
-
349
- <div style="margin-bottom: 20px;">
350
- {aws_results}
351
- </div>
352
-
353
- <div style="margin-bottom: 20px;">
354
- {gcp_results}
355
- </div>
356
-
357
- <div style="margin-bottom: 20px;">
358
- {api_results}
359
- </div>
360
-
361
- <div style="margin-bottom: 20px;">
362
- {recommendation}
363
- </div>
364
-
365
- <div style="margin-bottom: 20px;">
366
- {breakeven}
367
- </div>
368
-
369
- <div style="margin-bottom: 20px;">
370
- <h3>Additional Considerations</h3>
371
- <div style="display: flex; gap: 20px;">
372
- <div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;">
373
- <h4>Cloud Hardware Pros</h4>
374
- <ul>
375
- <li>Full control over infrastructure and customization</li>
376
- <li>Predictable costs for steady, high-volume workloads</li>
377
- <li>Can run multiple models simultaneously</li>
378
- <li>No token context limitations</li>
379
- <li>Data stays on your infrastructure</li>
380
- </ul>
381
- </div>
382
- <div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;">
383
- <h4>API Endpoints Pros</h4>
384
- <ul>
385
- <li>No infrastructure management overhead</li>
386
- <li>Pay-per-use model (ideal for sporadic usage)</li>
387
- <li>Instant scalability</li>
388
- <li>No upfront costs or commitment</li>
389
- <li>Automatic updates to newer model versions</li>
390
- </ul>
391
- </div>
392
- </div>
393
- </div>
394
-
395
- <div style="background-color: #FEF3C7; padding: 15px; border-radius: 8px; margin-bottom: 20px;">
396
- <p><strong>Note:</strong> These estimates are based on current pricing as of May 2025 and may vary based on regional pricing differences, discounts, and usage patterns.</p>
397
- </div>
398
- </div>
399
  """
400
-
401
- return html_output, fig
402
 
403
- # Main app function
404
  def app_function(
405
- compute_hours,
406
- tokens_per_month,
407
- input_ratio,
408
- api_calls,
409
- model_size,
410
- storage_gb,
411
- batch_size,
412
- reserved_instances,
413
- spot_instances,
414
- multi_year_commitment
415
  ):
416
- html_output, fig = generate_cost_comparison(
417
- compute_hours,
418
- tokens_per_month,
419
- input_ratio,
420
- api_calls,
421
- model_size,
422
- storage_gb,
423
- reserved_instances,
424
- spot_instances,
425
- multi_year_commitment
426
  )
427
-
428
- return html_output, fig
429
 
430
- # Define the Gradio interface
431
- with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
432
- gr.HTML("""
433
- <div style="text-align: center; margin-bottom: 20px;">
434
- <h1 style="color: #4F46E5; font-size: 2.5rem;">Cloud Cost Estimator</h1>
435
- <p style="font-size: 1.2rem;">Compare costs between cloud hardware configurations and inference API endpoints</p>
436
- </div>
437
- """)
438
-
439
- with gr.Row():
440
- with gr.Column(scale=1):
441
- gr.HTML("<h3>Usage Parameters</h3>")
442
-
443
- compute_hours = gr.Slider(
444
- label="Compute Hours per Month",
445
- minimum=1,
446
- maximum=730,
447
- value=100,
448
- info="Number of hours you'll run the model per month"
449
- )
450
-
451
- tokens_per_month = gr.Slider(
452
- label="Tokens Processed per Month (millions)",
453
- minimum=1,
454
- maximum=1000,
455
- value=10,
456
- info="Total number of tokens processed per month in millions"
457
- )
458
-
459
- input_ratio = gr.Slider(
460
- label="Input Token Ratio (%)",
461
- minimum=10,
462
- maximum=90,
463
- value=30,
464
- info="Percentage of total tokens that are input tokens"
465
- )
466
-
467
- api_calls = gr.Slider(
468
- label="API Calls per Month",
469
- minimum=100,
470
- maximum=1000000,
471
- value=10000,
472
- step=100,
473
- info="Number of API calls made per month"
474
- )
475
-
476
- model_size = gr.Dropdown(
477
- label="Model Size",
478
- choices=list(model_sizes.keys()),
479
- value="Medium (13B parameters)",
480
- info="Size of the language model you want to run"
481
- )
482
-
483
- storage_gb = gr.Slider(
484
- label="Storage Required (GB)",
485
- minimum=10,
486
- maximum=1000,
487
- value=100,
488
- info="Amount of storage required for models and data"
489
- )
490
-
491
- batch_size = gr.Slider(
492
- label="Batch Size",
493
- minimum=1,
494
- maximum=64,
495
- value=4,
496
- info="Batch size for inference (affects throughput)"
497
- )
498
-
499
- gr.HTML("<h3>Advanced Options</h3>")
500
-
501
- reserved_instances = gr.Checkbox(
502
- label="Use Reserved Instances",
503
- value=False,
504
- info="Reserved instances offer significant discounts with 1-3 year commitments"
505
- )
506
-
507
- spot_instances = gr.Checkbox(
508
- label="Use Spot/Preemptible Instances",
509
- value=False,
510
- info="Spot instances can be 70-90% cheaper but may be terminated with little notice"
511
- )
512
-
513
- multi_year_commitment = gr.Radio(
514
- label="Commitment Period (if using Reserved Instances)",
515
- choices=["1", "3"],
516
- value="1",
517
- info="Length of reserved instance commitment in years"
518
- )
519
-
520
- submit_button = gr.Button("Calculate Costs", variant="primary")
521
-
522
- with gr.Column(scale=2):
523
- results_html = gr.HTML(label="Results")
524
- plot_output = gr.Plot(label="Cost Comparison")
525
-
526
- submit_button.click(
527
- app_function,
528
- inputs=[
529
- compute_hours,
530
- tokens_per_month,
531
- input_ratio,
532
- api_calls,
533
- model_size,
534
- storage_gb,
535
- reserved_instances,
536
- spot_instances,
537
- multi_year_commitment
538
- ],
539
- outputs=[results_html, plot_output]
540
- )
541
-
542
- gr.HTML("""
543
- <div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;">
544
- <h3>Help & Resources</h3>
545
- <p><strong>Cloud Provider Documentation:</strong>
546
- <a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> |
547
- <a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a>
548
- </p>
549
- <p><strong>API Provider Documentation:</strong>
550
- <a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> |
551
- <a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> |
552
- <a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a>
553
- </p>
554
- <p>Made with ❤️ by Cloud Cost Estimator | Data last updated: May 2025</p>
555
- </div>
556
- """)
557
 
558
- demo.launch()
559
- value=False,
560
- info="Spot instances can be 70-90% cheaper but may be terminated with little notice"
561
- )
562
-
563
- multi_year_commitment = gr.Radio(
564
- label="Commitment Period (if using Reserved Instances)",
565
- choices=[1, 3],
566
- value=1,
567
- info="Length of reserved instance commitment in years"
568
- )
569
-
570
- submit_button = gr.Button("Calculate Costs", variant="primary")
571
-
572
- with gr.Column(scale=2):
573
- results_html = gr.HTML(label="Results")
574
- plot_output = gr.Plot(label="Cost Comparison")
575
-
576
- submit_button.click(
577
- app_function,
578
- inputs=[
579
- compute_hours,
580
- tokens_per_month,
581
- input_ratio,
582
- api_calls,
583
- model_size,
584
- storage_gb,
585
- batch_size,
586
- reserved_instances,
587
- spot_instances,
588
- multi_year_commitment
589
- ],
590
- outputs=[results_html, plot_output]
591
- )
592
-
593
- gr.HTML("""
594
- <div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;">
595
- <h3>Help & Resources</h3>
596
- <p><strong>Cloud Provider Documentation:</strong>
597
- <a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> |
598
- <a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a>
599
- </p>
600
- <p><strong>API Provider Documentation:</strong>
601
- <a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> |
602
- <a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> |
603
- <a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a>
604
- </p>
605
- <p>Made with ❤️ by Cloud Cost Estimator | Data last updated: May 2025</p>
606
- </div>
607
- """)
608
 
609
- demo.launch()
 
 
2
  import pandas as pd
3
  import numpy as np
4
  import plotly.express as px
 
5
 
6
  # Initialize pricing data
 
7
  aws_instances = {
8
  "g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
9
  "g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
 
13
  "p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
14
  }
15
 
 
16
  gcp_instances = {
17
  "a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
18
  "a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
 
22
  "g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
23
  }
24
 
 
25
  api_pricing = {
26
  "OpenAI": {
27
  "GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
 
42
  }
43
  }
44
 
 
45
  model_sizes = {
46
  "Small (7B parameters)": {"memory_required": 14, "throughput_factor": 1.0},
47
  "Medium (13B parameters)": {"memory_required": 26, "throughput_factor": 0.7},
 
49
  "XL (180B parameters)": {"memory_required": 360, "throughput_factor": 0.15},
50
  }
51
 
52
+
53
  def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
54
+ data = aws_instances[instance]
55
+ rate = data['hourly_rate']
 
 
56
  if spot:
57
+ rate *= 0.3
58
  elif reserved:
59
+ factors = {1: 0.6, 3: 0.4}
60
+ rate *= factors.get(years, 0.6)
61
+ compute = rate * hours
62
+ storage_cost = storage * 0.10
63
+ return {'compute_cost': compute, 'storage_cost': storage_cost, 'total_cost': compute + storage_cost}
64
+
 
 
 
 
 
 
 
 
65
 
66
  def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
67
+ data = gcp_instances[instance]
68
+ rate = data['hourly_rate']
 
 
69
  if spot:
70
+ rate *= 0.2
71
  elif reserved:
72
+ factors = {1: 0.7, 3: 0.5}
73
+ rate *= factors.get(years, 0.7)
74
+ compute = rate * hours
75
+ storage_cost = storage * 0.04
76
+ return {'compute_cost': compute, 'storage_cost': storage_cost, 'total_cost': compute + storage_cost}
77
+
 
 
 
 
 
 
 
 
78
 
79
  def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
80
+ mdata = api_pricing[provider][model]
81
+ input_cost = (input_tokens * mdata['input_per_1M']) / 1
82
+ output_cost = (output_tokens * mdata['output_per_1M']) / 1
83
+ call_cost = api_calls * 0.0001 if provider == 'TogetherAI' else 0
84
+ total = input_cost + output_cost + call_cost
85
+ return {'input_cost': input_cost, 'output_cost': output_cost, 'api_call_cost': call_cost, 'total_cost': total}
86
+
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ def filter_compatible_instances(instances, min_mem):
89
+ result = {}
90
+ for name, data in instances.items():
91
+ mem_str = data['gpu_memory']
92
+ if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
93
+ val = int(mem_str.replace('GB',''))
94
+ elif 'x' in mem_str:
95
+ parts = mem_str.split('x')
96
+ val = int(parts[0]) * int(parts[1].replace('GB',''))
 
 
 
 
 
 
 
 
97
  else:
98
+ val = int(mem_str.replace('GB',''))
99
+ if val >= min_mem:
100
+ result[name] = data
101
+ return result
102
+
 
 
103
 
104
  def generate_cost_comparison(
105
+ compute_hours, tokens_per_month, input_ratio, api_calls,
106
+ model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 
 
 
 
 
 
 
107
  ):
108
+ years = int(multi_year_commitment)
109
+ in_tokens = tokens_per_month * (input_ratio/100)
110
+ out_tokens = tokens_per_month - in_tokens
111
+ min_mem = model_sizes[model_size]['memory_required']
112
+ aws_comp = filter_compatible_instances(aws_instances, min_mem)
113
+ gcp_comp = filter_compatible_instances(gcp_instances, min_mem)
 
 
 
 
 
114
  results = []
115
+
116
+ # AWS table
117
+ aws_html = '<h3>AWS Compatible Instances</h3>'
118
+ if aws_comp:
119
+ aws_html += '<table width="100%"><tr><th>Instance</th><th>Monthly Cost</th></tr>'
120
+ best_aws, best_cost = None, float('inf')
121
+ for inst in aws_comp:
122
+ c = calculate_aws_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
123
+ aws_html += f'<tr><td>{inst}</td><td>${c:.2f}</td></tr>'
124
+ if c < best_cost:
125
+ best_aws, best_cost = inst, c
126
+ aws_html += '</table>'
 
 
 
 
 
 
 
 
 
127
  if best_aws:
128
+ results.append({'provider': f'AWS ({best_aws})', 'cost': best_cost, 'type':'Cloud'})
 
 
 
 
 
129
  else:
130
+ aws_html += '<p>No compatible AWS instances.</p>'
131
+
132
+ # GCP table
133
+ gcp_html = '<h3>GCP Compatible Instances</h3>'
134
+ if gcp_comp:
135
+ gcp_html += '<table width="100%"><tr><th>Instance</th><th>Monthly Cost</th></tr>'
136
+ best_gcp, best_gcp_cost = None, float('inf')
137
+ for inst in gcp_comp:
138
+ c = calculate_gcp_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
139
+ gcp_html += f'<tr><td>{inst}</td><td>${c:.2f}</td></tr>'
140
+ if c < best_gcp_cost:
141
+ best_gcp, best_gcp_cost = inst, c
142
+ gcp_html += '</table>'
 
 
 
 
 
 
 
 
 
 
 
143
  if best_gcp:
144
+ results.append({'provider': f'GCP ({best_gcp})', 'cost': best_gcp_cost, 'type':'Cloud'})
 
 
 
 
 
145
  else:
146
+ gcp_html += '<p>No compatible GCP instances.</p>'
147
+
148
+ # API table
149
+ api_html = '<h3>API Options</h3>'
150
+ api_html += '<table width="100%"><tr><th>Provider</th><th>Model</th><th>Total Cost</th></tr>'
 
 
 
151
  api_costs = {}
152
+ for prov in api_pricing:
153
+ for mdl in api_pricing[prov]:
154
+ cost_data = calculate_api_cost(prov, mdl, in_tokens, out_tokens, api_calls)
155
+ api_costs[(prov,mdl)] = cost_data['total_cost']
156
+ api_html += f'<tr><td>{prov}</td><td>{mdl}</td><td>${cost_data["total_cost"]:.2f}</td></tr>'
157
+ api_html += '</table>'
158
+ best_api = min(api_costs, key=api_costs.get)
159
+ results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_costs[best_api], 'type':'API'})
160
+
161
+ # Recommendation
162
+ cheapest = min(results, key=lambda x: x['cost'])
163
+ rec = '<h3>Recommendation</h3>'
164
+ if cheapest['type']=='API':
165
+ rec += f"<p>The API {cheapest['provider']} is cheapest at ${cheapest['cost']:.2f}.</p>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  else:
167
+ rec += f"<p>The Cloud {cheapest['provider']} is cheapest at ${cheapest['cost']:.2f}.</p>"
168
+
169
+ # Plot
170
+ df_res = pd.DataFrame(results)
171
+ fig = px.bar(df_res, x='provider', y='cost', color='type', title='Monthly Cost Comparison')
172
+
173
+ # HTML output
174
+ html = f"""
175
+ <div>{aws_html}</div>
176
+ <div>{gcp_html}</div>
177
+ <div>{api_html}</div>
178
+ <div>{rec}</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  """
180
+ return html, fig
181
+
182
 
 
183
  def app_function(
184
+ compute_hours, tokens_per_month, input_ratio, api_calls,
185
+ model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 
 
 
 
 
 
 
 
186
  ):
187
+ return generate_cost_comparison(
188
+ compute_hours, tokens_per_month, input_ratio, api_calls,
189
+ model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 
 
 
 
 
 
 
190
  )
 
 
191
 
192
+ # Gradio interface
193
+ def main():
194
+ with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
195
+ gr.HTML("""
196
+ <div style="text-align:center; margin-bottom:20px;">
197
+ <h1>Cloud Cost Estimator</h1>
198
+ <p>Compare costs between cloud hardware and API endpoints</p>
199
+ </div>
200
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
+ with gr.Row():
203
+ with gr.Column(scale=1):
204
+ gr.HTML("<h3>Usage Parameters</h3>")
205
+ compute_hours = gr.Slider(label="Compute Hours per Month", minimum=1, maximum=730, value=100)
206
+ tokens_per_month = gr.Slider(label="Tokens Processed per Month (millions)", minimum=1, maximum=1000, value=10)
207
+ input_ratio = gr.Slider(label="Input Token Ratio (%)", minimum=10, maximum=90, value=30)
208
+ api_calls = gr.Slider(label="API Calls per Month", minimum=100, maximum=1000000, value=10000, step=100)
209
+ model_size = gr.Dropdown(label="Model Size", choices=list(model_sizes.keys()), value="Medium (13B parameters)")
210
+ storage_gb = gr.Slider(label="Storage Required (GB)", minimum=10, maximum=1000, value=100)
211
+
212
+ gr.HTML("<h3>Advanced Options</h3>")
213
+ reserved_instances = gr.Checkbox(label="Use Reserved Instances", value=False)
214
+ spot_instances = gr.Checkbox(label="Use Spot/Preemptible Instances", value=False)
215
+ multi_year_commitment = gr.Radio(label="Commitment Period (years)", choices=["1","3"], value="1")
216
+ submit_button = gr.Button("Calculate Costs", variant="primary")
217
+
218
+ with gr.Column(scale=2):
219
+ results_html = gr.HTML(label="Results")
220
+ plot_output = gr.Plot(label="Cost Comparison")
221
+
222
+ submit_button.click(
223
+ app_function,
224
+ inputs=[compute_hours, tokens_per_month, input_ratio, api_calls, model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment],
225
+ outputs=[results_html, plot_output]
226
+ )
227
+
228
+ gr.HTML("""
229
+ <div style="margin-top:30px; border-top:1px solid #e5e7eb; padding-top:20px;">
230
+ <h3>Help & Resources</h3>
231
+ <p><a href="https://aws.amazon.com/ec2/pricing/">AWS EC2 Pricing</a> | <a href="https://cloud.google.com/compute/pricing">GCP Pricing</a></p>
232
+ <p><a href="https://openai.com/pricing">OpenAI API Pricing</a> | <a href="https://www.anthropic.com/api">Anthropic Claude API Pricing</a> | <a href="https://www.together.ai/pricing">TogetherAI Pricing</a></p>
233
+ </div>
234
+ """)
235
+
236
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ if __name__ == "__main__":
239
+ main()