delightfulrachel commited on
Commit
29a68b1
·
verified ·
1 Parent(s): e26b1a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +540 -123
app.py CHANGED
@@ -2,8 +2,10 @@ import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
  import plotly.express as px
 
5
 
6
- # Pricing data
 
7
  aws_instances = {
8
  "g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
9
  "g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
@@ -13,6 +15,7 @@ aws_instances = {
13
  "p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
14
  }
15
 
 
16
  gcp_instances = {
17
  "a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
18
  "a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
@@ -22,6 +25,7 @@ gcp_instances = {
22
  "g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
23
  }
24
 
 
25
  api_pricing = {
26
  "OpenAI": {
27
  "GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
@@ -42,151 +46,564 @@ api_pricing = {
42
  }
43
  }
44
 
 
45
  model_sizes = {
46
- "Small (7B parameters)": {"memory_required": 14},
47
- "Medium (13B parameters)": {"memory_required": 26},
48
- "Large (70B parameters)": {"memory_required": 140},
49
- "XL (180B parameters)": {"memory_required": 360},
50
  }
51
 
 
52
  def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
53
- data = aws_instances[instance]
54
- rate = data['hourly_rate']
 
 
55
  if spot:
56
- rate *= 0.3
57
  elif reserved:
58
- factors = {1: 0.6, 3: 0.4}
59
- rate *= factors.get(years, 0.6)
60
- compute = rate * hours
61
- storage_cost = storage * 0.10
62
- return {'total_cost': compute + storage_cost}
 
 
 
 
 
 
 
 
 
63
 
64
  def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
65
- data = gcp_instances[instance]
66
- rate = data['hourly_rate']
 
 
67
  if spot:
68
- rate *= 0.2
69
  elif reserved:
70
- factors = {1: 0.7, 3: 0.5}
71
- rate *= factors.get(years, 0.7)
72
- compute = rate * hours
73
- storage_cost = storage * 0.04
74
- return {'total_cost': compute + storage_cost}
 
 
 
 
 
 
 
 
 
75
 
76
  def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
77
- m = api_pricing[provider][model]
78
- input_cost = input_tokens * m['input_per_1M']
79
- output_cost = output_tokens * m['output_per_1M']
80
- call_cost = api_calls * 0.0001 if provider == 'TogetherAI' else 0
81
- return {'total_cost': input_cost + output_cost + call_cost}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- def filter_compatible(instances, min_mem):
84
- res = {}
85
- for name, data in instances.items():
86
- mem_str = data['gpu_memory']
87
- if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
88
- val = int(mem_str.replace('GB',''))
89
- elif 'x' in mem_str:
90
- parts = mem_str.split('x')
91
- val = int(parts[0]) * int(parts[1].replace('GB',''))
 
 
 
 
 
 
 
 
92
  else:
93
- val = int(mem_str.replace('GB',''))
94
- if val >= min_mem:
95
- res[name] = data
96
- return res
 
 
 
97
 
98
  def generate_cost_comparison(
99
- compute_hours, tokens_per_month, input_ratio, api_calls,
100
- model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 
 
 
 
 
 
 
101
  ):
102
- years = int(multi_year_commitment)
103
- in_tokens = tokens_per_month * (input_ratio / 100)
104
- out_tokens = tokens_per_month - in_tokens
105
- min_mem = model_sizes[model_size]['memory_required']
106
-
107
- aws_comp = filter_compatible(aws_instances, min_mem)
108
- gcp_comp = filter_compatible(gcp_instances, min_mem)
109
-
 
 
 
110
  results = []
111
- # AWS
112
- if aws_comp:
113
- best_aws = min(aws_comp.keys(), key=lambda x: calculate_aws_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
114
- best_aws_cost = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
115
- results.append({'provider': f'AWS ({best_aws})', 'cost': best_aws_cost, 'type': 'Cloud'})
116
- # GCP
117
- if gcp_comp:
118
- best_gcp = min(gcp_comp.keys(), key=lambda x: calculate_gcp_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
119
- best_gcp_cost = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
120
- results.append({'provider': f'GCP ({best_gcp})', 'cost': best_gcp_cost, 'type': 'Cloud'})
121
- # API (TogetherAI only)
122
- api_opts = { (prov, m): calculate_api_cost(prov, m, in_tokens, out_tokens, api_calls)['total_cost']
123
- for prov in api_pricing for m in api_pricing[prov] }
124
- best_api = min(api_opts, key=api_opts.get)
125
- results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_opts[best_api], 'type': 'API'})
126
-
127
- # Build bar chart
128
- df_res = pd.DataFrame(results)
129
- aws_name = df_res[df_res['type']=='Cloud']['provider'].iloc[0]
130
- gcp_name = df_res[df_res['type']=='Cloud']['provider'].iloc[1]
131
- api_name = df_res[df_res['type']=='API']['provider'].iloc[0]
132
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  fig = px.bar(
134
- df_res, x='provider', y='cost', color='provider',
135
- color_discrete_map={
136
- aws_name: '#FF9900', # AWS orange
137
- gcp_name: '#4285F4', # GCP blue
138
- api_name: '#D62828' # TogetherAI red
139
- },
140
- title='Monthly Cost Comparison',
141
- labels={'provider': 'Provider', 'cost': 'Monthly Cost'}
142
  )
143
- fig.update_yaxes(tickprefix='$')
144
- fig.update_layout(showlegend=False, height=500)
145
-
146
- # HTML summary tables omitted for brevity
147
- html_tables = '<div>'
148
- # ... you can reinsert your HTML tables here if needed
149
- html_tables += '</div>'
150
- return html_tables, fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
 
152
  def app_function(
153
- compute_hours, tokens_per_month, input_ratio, api_calls,
154
- model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 
 
 
 
 
 
 
 
155
  ):
156
- return generate_cost_comparison(
157
- compute_hours, tokens_per_month, input_ratio, api_calls,
158
- model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 
 
 
 
 
 
 
159
  )
 
 
160
 
161
- # Gradio UI
162
- def main():
163
- with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
164
- gr.HTML("""
165
- <div style="text-align:center; margin-bottom:20px;">
166
- <h1>Cloud Cost Estimator</h1>
167
- <p>Compare cloud vs API costs</p>
168
- </div>
169
- """)
170
- with gr.Row():
171
- with gr.Column(scale=1):
172
- compute_hours = gr.Slider("Compute Hours per Month", 1, 730, 100)
173
- tokens_per_month = gr.Slider("Tokens per Month (M)", 1, 1000, 10)
174
- input_ratio = gr.Slider("Input Ratio (%)", 10, 90, 30)
175
- api_calls = gr.Slider("API Calls per Month", 100, 1_000_000, 10000, step=100)
176
- model_size = gr.Dropdown(list(model_sizes.keys()), value="Medium (13B parameters)")
177
- storage_gb = gr.Slider("Storage (GB)", 10, 1000, 100)
178
- reserved_instances = gr.Checkbox("Reserved Instances", value=False)
179
- spot_instances = gr.Checkbox("Spot Instances", value=False)
180
- multi_year_commitment = gr.Radio(["1","3"], value="1")
181
- submit = gr.Button("Calculate Costs")
182
- with gr.Column(scale=2):
183
- out_html = gr.HTML()
184
- out_plot = gr.Plot()
185
- submit.click(app_function,
186
- inputs=[compute_hours, tokens_per_month, input_ratio, api_calls,
187
- model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment],
188
- outputs=[out_html, out_plot])
189
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
- if __name__ == "__main__":
192
- main()
 
2
  import pandas as pd
3
  import numpy as np
4
  import plotly.express as px
5
+ import plotly.graph_objects as go
6
 
7
+ # Initialize pricing data
8
+ # AWS pricing - Instance types and their properties
9
  aws_instances = {
10
  "g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
11
  "g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
 
15
  "p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
16
  }
17
 
18
+ # GCP pricing - Instance types and their properties
19
  gcp_instances = {
20
  "a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
21
  "a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
 
25
  "g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
26
  }
27
 
28
+ # API pricing - Models and their prices
29
  api_pricing = {
30
  "OpenAI": {
31
  "GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
 
46
  }
47
  }
48
 
49
+ # Model sizes and memory requirements
50
  model_sizes = {
51
+ "Small (7B parameters)": {"memory_required": 14, "throughput_factor": 1.0},
52
+ "Medium (13B parameters)": {"memory_required": 26, "throughput_factor": 0.7},
53
+ "Large (70B parameters)": {"memory_required": 140, "throughput_factor": 0.3},
54
+ "XL (180B parameters)": {"memory_required": 360, "throughput_factor": 0.15},
55
  }
56
 
57
+ # Calculate costs
58
  def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
59
+ instance_data = aws_instances[instance]
60
+ base_hourly = instance_data["hourly_rate"]
61
+
62
+ # Apply discounts for reservation or spot
63
  if spot:
64
+ hourly_rate = base_hourly * 0.3 # 70% discount for spot
65
  elif reserved:
66
+ discount_factors = {1: 0.6, 3: 0.4} # 40% for 1 year, 60% for 3 years
67
+ hourly_rate = base_hourly * discount_factors.get(years, 0.6)
68
+ else:
69
+ hourly_rate = base_hourly
70
+
71
+ compute_cost = hourly_rate * hours
72
+ storage_cost = storage * 0.10 # $0.10 per GB for EBS
73
+
74
+ return {
75
+ "compute_cost": compute_cost,
76
+ "storage_cost": storage_cost,
77
+ "total_cost": compute_cost + storage_cost,
78
+ "instance_details": instance_data
79
+ }
80
 
81
  def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
82
+ instance_data = gcp_instances[instance]
83
+ base_hourly = instance_data["hourly_rate"]
84
+
85
+ # Apply discounts
86
  if spot:
87
+ hourly_rate = base_hourly * 0.2 # 80% discount for preemptible
88
  elif reserved:
89
+ discount_factors = {1: 0.7, 3: 0.5} # 30% for 1 year, 50% for 3 years
90
+ hourly_rate = base_hourly * discount_factors.get(years, 0.7)
91
+ else:
92
+ hourly_rate = base_hourly
93
+
94
+ compute_cost = hourly_rate * hours
95
+ storage_cost = storage * 0.04 # $0.04 per GB for Standard SSD
96
+
97
+ return {
98
+ "compute_cost": compute_cost,
99
+ "storage_cost": storage_cost,
100
+ "total_cost": compute_cost + storage_cost,
101
+ "instance_details": instance_data
102
+ }
103
 
104
  def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
105
+ model_data = api_pricing[provider][model]
106
+
107
+ input_cost = (input_tokens * model_data["input_per_1M"]) / 1
108
+ output_cost = (output_tokens * model_data["output_per_1M"]) / 1
109
+
110
+ # Add a small cost for API calls for some providers
111
+ api_call_costs = 0
112
+ if provider == "TogetherAI":
113
+ api_call_costs = api_calls * 0.0001 # $0.0001 per request
114
+
115
+ total_cost = input_cost + output_cost + api_call_costs
116
+
117
+ return {
118
+ "input_cost": input_cost,
119
+ "output_cost": output_cost,
120
+ "api_call_cost": api_call_costs,
121
+ "total_cost": total_cost,
122
+ "model_details": model_data
123
+ }
124
 
125
+ # Filter instances based on model size requirements
126
+ def filter_compatible_instances(instances_dict, min_memory_required):
127
+ compatible = {}
128
+ for name, data in instances_dict.items():
129
+ # Parse GPU memory
130
+ memory_str = data["gpu_memory"]
131
+
132
+ # Handle multiple GPUs
133
+ if "x" in memory_str and not memory_str.startswith(("1x", "2x", "4x", "8x")):
134
+ # Format: "16GB"
135
+ memory_val = int(memory_str.split("GB")[0])
136
+ elif "x" in memory_str:
137
+ # Format: "8x40GB"
138
+ parts = memory_str.split("x")
139
+ num_gpus = int(parts[0])
140
+ memory_per_gpu = int(parts[1].split("GB")[0])
141
+ memory_val = num_gpus * memory_per_gpu
142
  else:
143
+ # Format: "40GB"
144
+ memory_val = int(memory_str.split("GB")[0])
145
+
146
+ if memory_val >= min_memory_required:
147
+ compatible[name] = data
148
+
149
+ return compatible
150
 
151
  def generate_cost_comparison(
152
+ compute_hours,
153
+ tokens_per_month,
154
+ input_ratio,
155
+ api_calls,
156
+ model_size,
157
+ storage_gb,
158
+ reserved_instances,
159
+ spot_instances,
160
+ multi_year_commitment
161
  ):
162
+ # Calculate input and output tokens
163
+ input_tokens = tokens_per_month * (input_ratio / 100)
164
+ output_tokens = tokens_per_month * (1 - (input_ratio / 100))
165
+
166
+ # Check model memory requirements
167
+ min_memory_required = model_sizes[model_size]["memory_required"]
168
+
169
+ # Filter compatible instances
170
+ compatible_aws = filter_compatible_instances(aws_instances, min_memory_required)
171
+ compatible_gcp = filter_compatible_instances(gcp_instances, min_memory_required)
172
+
173
  results = []
174
+
175
+ # Generate HTML for AWS options
176
+ if compatible_aws:
177
+ aws_results = "<h3>AWS Compatible Instances</h3>"
178
+ aws_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>"
179
+
180
+ best_aws = None
181
+ best_aws_cost = float('inf')
182
+
183
+ for instance in compatible_aws:
184
+ cost_result = calculate_aws_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
185
+ total_cost = cost_result["total_cost"]
186
+
187
+ if total_cost < best_aws_cost:
188
+ best_aws = instance
189
+ best_aws_cost = total_cost
190
+
191
+ aws_results += f"<tr><td>{instance}</td><td>{compatible_aws[instance]['vcpus']}</td><td>{compatible_aws[instance]['memory']}GB</td><td>{compatible_aws[instance]['gpu']}</td><td>${compatible_aws[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>"
192
+
193
+ aws_results += "</table>"
194
+
195
+ if best_aws:
196
+ best_aws_data = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
197
+ results.append({
198
+ "provider": f"AWS ({best_aws})",
199
+ "cost": best_aws_data["total_cost"],
200
+ "type": "Cloud"
201
+ })
202
+ else:
203
+ aws_results = "<h3>AWS Compatible Instances</h3><p>No compatible AWS instances found for this model size.</p>"
204
+ best_aws = None
205
+ best_aws_cost = float('inf')
206
+
207
+ # Generate HTML for GCP options
208
+ if compatible_gcp:
209
+ gcp_results = "<h3>Google Cloud Compatible Instances</h3>"
210
+ gcp_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>"
211
+
212
+ best_gcp = None
213
+ best_gcp_cost = float('inf')
214
+
215
+ for instance in compatible_gcp:
216
+ cost_result = calculate_gcp_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
217
+ total_cost = cost_result["total_cost"]
218
+
219
+ if total_cost < best_gcp_cost:
220
+ best_gcp = instance
221
+ best_gcp_cost = total_cost
222
+
223
+ gcp_results += f"<tr><td>{instance}</td><td>{compatible_gcp[instance]['vcpus']}</td><td>{compatible_gcp[instance]['memory']}GB</td><td>{compatible_gcp[instance]['gpu']}</td><td>${compatible_gcp[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>"
224
+
225
+ gcp_results += "</table>"
226
+
227
+ if best_gcp:
228
+ best_gcp_data = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
229
+ results.append({
230
+ "provider": f"GCP ({best_gcp})",
231
+ "cost": best_gcp_data["total_cost"],
232
+ "type": "Cloud"
233
+ })
234
+ else:
235
+ gcp_results = "<h3>Google Cloud Compatible Instances</h3><p>No compatible Google Cloud instances found for this model size.</p>"
236
+ best_gcp = None
237
+ best_gcp_cost = float('inf')
238
+
239
+ # Generate HTML for API options
240
+ api_results = "<h3>API Options</h3>"
241
+ api_results += "<table width='100%'><tr><th>Provider</th><th>Model</th><th>Input Cost</th><th>Output Cost</th><th>Total Cost</th><th>Context Length</th></tr>"
242
+
243
+ api_costs = {}
244
+
245
+ for provider in api_pricing:
246
+ for model in api_pricing[provider]:
247
+ cost_data = calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls)
248
+ api_costs[(provider, model)] = cost_data
249
+
250
+ api_results += f"<tr><td>{provider}</td><td>{model}</td><td>${cost_data['input_cost']:.2f}</td><td>${cost_data['output_cost']:.2f}</td><td>${cost_data['total_cost']:.2f}</td><td>{api_pricing[provider][model]['token_context']:,}</td></tr>"
251
+
252
+ api_results += "</table>"
253
+
254
+ # Find best API option
255
+ best_api = min(api_costs.keys(), key=lambda x: api_costs[x]["total_cost"])
256
+ best_api_cost = api_costs[best_api]
257
+
258
+ results.append({
259
+ "provider": f"{best_api[0]} ({best_api[1]})",
260
+ "cost": best_api_cost["total_cost"],
261
+ "type": "API"
262
+ })
263
+
264
+ # Create recommendation HTML
265
+ recommendation = "<h3>Recommendation</h3>"
266
+
267
+ # Find the cheapest option
268
+ cheapest = min(results, key=lambda x: x["cost"])
269
+
270
+ if cheapest["type"] == "API":
271
+ recommendation += f"<p>Based on your usage parameters, the <strong>{cheapest['provider']}</strong> API endpoint is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>"
272
+
273
+ # Calculate API vs cloud cost ratio
274
+ cheapest_cloud = None
275
+ for result in results:
276
+ if result["type"] == "Cloud":
277
+ if cheapest_cloud is None or result["cost"] < cheapest_cloud["cost"]:
278
+ cheapest_cloud = result
279
+
280
+ if cheapest_cloud:
281
+ ratio = cheapest_cloud["cost"] / cheapest["cost"]
282
+ recommendation += f"<p>This is <strong>{ratio:.1f}x cheaper</strong> than the most affordable cloud option ({cheapest_cloud['provider']}).</p>"
283
+ else:
284
+ recommendation += f"<p>Based on your usage parameters, <strong>{cheapest['provider']}</strong> is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>"
285
+
286
+ # Find cheapest API
287
+ cheapest_api = None
288
+ for result in results:
289
+ if result["type"] == "API":
290
+ if cheapest_api is None or result["cost"] < cheapest_api["cost"]:
291
+ cheapest_api = result
292
+
293
+ if cheapest_api:
294
+ ratio = cheapest_api["cost"] / cheapest["cost"]
295
+ if ratio > 1:
296
+ recommendation += f"<p>This is <strong>{1/ratio:.1f}x cheaper</strong> than the most affordable API option ({cheapest_api['provider']}).</p>"
297
+ else:
298
+ recommendation += f"<p>However, the API option ({cheapest_api['provider']}) is <strong>{ratio:.1f}x cheaper</strong>.</p>"
299
+
300
+ # Additional recommendation text
301
+ if tokens_per_month > 100 and cheapest["type"] == "Cloud":
302
+ recommendation += "<p>With your high token volume, cloud hardware becomes more cost-effective despite the higher upfront costs.</p>"
303
+ elif compute_hours < 50 and cheapest["type"] == "API":
304
+ recommendation += "<p>With your low usage hours, API endpoints are more cost-effective as you only pay for what you use.</p>"
305
+
306
+ # Create breakeven analysis HTML
307
+ breakeven = "<h3>Breakeven Analysis</h3>"
308
+
309
+ if best_aws is not None and best_api_cost["total_cost"] > 0:
310
+ aws_hourly = aws_instances[best_aws]["hourly_rate"]
311
+ breakeven_hours = best_api_cost["total_cost"] / aws_hourly
312
+
313
+ breakeven += f"<p>API vs AWS: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>"
314
+
315
+ if compute_hours > breakeven_hours:
316
+ breakeven += "<p>You're past the breakeven point - AWS hardware is more cost-effective than API usage.</p>"
317
+ else:
318
+ breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than AWS hardware.</p>"
319
+
320
+ if best_gcp is not None and best_api_cost["total_cost"] > 0:
321
+ gcp_hourly = gcp_instances[best_gcp]["hourly_rate"]
322
+ breakeven_hours = best_api_cost["total_cost"] / gcp_hourly
323
+
324
+ breakeven += f"<p>API vs GCP: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>"
325
+
326
+ if compute_hours > breakeven_hours:
327
+ breakeven += "<p>You're past the breakeven point - GCP hardware is more cost-effective than API usage.</p>"
328
+ else:
329
+ breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than GCP hardware.</p>"
330
+
331
+ # Generate cost comparison chart
332
  fig = px.bar(
333
+ pd.DataFrame(results),
334
+ x="provider",
335
+ y="cost",
336
+ color="type",
337
+ color_discrete_map={"Cloud": "#3B82F6", "API": "#8B5CF6"},
338
+ title="Monthly Cost Comparison",
339
+ labels={"provider": "Provider & Instance", "cost": "Monthly Cost ($)"}
 
340
  )
341
+
342
+ fig.update_layout(height=500)
343
+
344
+ # Create HTML structure for the results
345
+ html_output = f"""
346
+ <div style="padding: 20px; font-family: Arial, sans-serif;">
347
+ <h2>Cost Comparison Results</h2>
348
+
349
+ <div style="margin-bottom: 20px;">
350
+ {aws_results}
351
+ </div>
352
+
353
+ <div style="margin-bottom: 20px;">
354
+ {gcp_results}
355
+ </div>
356
+
357
+ <div style="margin-bottom: 20px;">
358
+ {api_results}
359
+ </div>
360
+
361
+ <div style="margin-bottom: 20px;">
362
+ {recommendation}
363
+ </div>
364
+
365
+ <div style="margin-bottom: 20px;">
366
+ {breakeven}
367
+ </div>
368
+
369
+ <div style="margin-bottom: 20px;">
370
+ <h3>Additional Considerations</h3>
371
+ <div style="display: flex; gap: 20px;">
372
+ <div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;">
373
+ <h4>Cloud Hardware Pros</h4>
374
+ <ul>
375
+ <li>Full control over infrastructure and customization</li>
376
+ <li>Predictable costs for steady, high-volume workloads</li>
377
+ <li>Can run multiple models simultaneously</li>
378
+ <li>No token context limitations</li>
379
+ <li>Data stays on your infrastructure</li>
380
+ </ul>
381
+ </div>
382
+ <div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;">
383
+ <h4>API Endpoints Pros</h4>
384
+ <ul>
385
+ <li>No infrastructure management overhead</li>
386
+ <li>Pay-per-use model (ideal for sporadic usage)</li>
387
+ <li>Instant scalability</li>
388
+ <li>No upfront costs or commitment</li>
389
+ <li>Automatic updates to newer model versions</li>
390
+ </ul>
391
+ </div>
392
+ </div>
393
+ </div>
394
+
395
+ <div style="background-color: #FEF3C7; padding: 15px; border-radius: 8px; margin-bottom: 20px;">
396
+ <p><strong>Note:</strong> These estimates are based on current pricing as of May 2025 and may vary based on regional pricing differences, discounts, and usage patterns.</p>
397
+ </div>
398
+ </div>
399
+ """
400
+
401
+ return html_output, fig
402
 
403
+ # Main app function
404
  def app_function(
405
+ compute_hours,
406
+ tokens_per_month,
407
+ input_ratio,
408
+ api_calls,
409
+ model_size,
410
+ storage_gb,
411
+ batch_size,
412
+ reserved_instances,
413
+ spot_instances,
414
+ multi_year_commitment
415
  ):
416
+ html_output, fig = generate_cost_comparison(
417
+ compute_hours,
418
+ tokens_per_month,
419
+ input_ratio,
420
+ api_calls,
421
+ model_size,
422
+ storage_gb,
423
+ reserved_instances,
424
+ spot_instances,
425
+ multi_year_commitment
426
  )
427
+
428
+ return html_output, fig
429
 
430
+ # Define the Gradio interface
431
+ with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
432
+ gr.HTML("""
433
+ <div style="text-align: center; margin-bottom: 20px;">
434
+ <h1 style="color: #4F46E5; font-size: 2.5rem;">Cloud Cost Estimator</h1>
435
+ <p style="font-size: 1.2rem;">Compare costs between cloud hardware configurations and inference API endpoints</p>
436
+ </div>
437
+ """)
438
+
439
+ with gr.Row():
440
+ with gr.Column(scale=1):
441
+ gr.HTML("<h3>Usage Parameters</h3>")
442
+
443
+ compute_hours = gr.Slider(
444
+ label="Compute Hours per Month",
445
+ minimum=1,
446
+ maximum=730,
447
+ value=100,
448
+ info="Number of hours you'll run the model per month"
449
+ )
450
+
451
+ tokens_per_month = gr.Slider(
452
+ label="Tokens Processed per Month (millions)",
453
+ minimum=1,
454
+ maximum=1000,
455
+ value=10,
456
+ info="Total number of tokens processed per month in millions"
457
+ )
458
+
459
+ input_ratio = gr.Slider(
460
+ label="Input Token Ratio (%)",
461
+ minimum=10,
462
+ maximum=90,
463
+ value=30,
464
+ info="Percentage of total tokens that are input tokens"
465
+ )
466
+
467
+ api_calls = gr.Slider(
468
+ label="API Calls per Month",
469
+ minimum=100,
470
+ maximum=1000000,
471
+ value=10000,
472
+ step=100,
473
+ info="Number of API calls made per month"
474
+ )
475
+
476
+ model_size = gr.Dropdown(
477
+ label="Model Size",
478
+ choices=list(model_sizes.keys()),
479
+ value="Medium (13B parameters)",
480
+ info="Size of the language model you want to run"
481
+ )
482
+
483
+ storage_gb = gr.Slider(
484
+ label="Storage Required (GB)",
485
+ minimum=10,
486
+ maximum=1000,
487
+ value=100,
488
+ info="Amount of storage required for models and data"
489
+ )
490
+
491
+ batch_size = gr.Slider(
492
+ label="Batch Size",
493
+ minimum=1,
494
+ maximum=64,
495
+ value=4,
496
+ info="Batch size for inference (affects throughput)"
497
+ )
498
+
499
+ gr.HTML("<h3>Advanced Options</h3>")
500
+
501
+ reserved_instances = gr.Checkbox(
502
+ label="Use Reserved Instances",
503
+ value=False,
504
+ info="Reserved instances offer significant discounts with 1-3 year commitments"
505
+ )
506
+
507
+ spot_instances = gr.Checkbox(
508
+ label="Use Spot/Preemptible Instances",
509
+ value=False,
510
+ info="Spot instances can be 70-90% cheaper but may be terminated with little notice"
511
+ )
512
+
513
+ multi_year_commitment = gr.Radio(
514
+ label="Commitment Period (if using Reserved Instances)",
515
+ choices=["1", "3"],
516
+ value="1",
517
+ info="Length of reserved instance commitment in years"
518
+ )
519
+
520
+ submit_button = gr.Button("Calculate Costs", variant="primary")
521
+
522
+ with gr.Column(scale=2):
523
+ results_html = gr.HTML(label="Results")
524
+ plot_output = gr.Plot(label="Cost Comparison")
525
+
526
+ submit_button.click(
527
+ app_function,
528
+ inputs=[
529
+ compute_hours,
530
+ tokens_per_month,
531
+ input_ratio,
532
+ api_calls,
533
+ model_size,
534
+ storage_gb,
535
+ reserved_instances,
536
+ spot_instances,
537
+ multi_year_commitment
538
+ ],
539
+ outputs=[results_html, plot_output]
540
+ )
541
+
542
+ gr.HTML("""
543
+ <div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;">
544
+ <h3>Help & Resources</h3>
545
+ <p><strong>Cloud Provider Documentation:</strong>
546
+ <a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> |
547
+ <a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a>
548
+ </p>
549
+ <p><strong>API Provider Documentation:</strong>
550
+ <a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> |
551
+ <a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> |
552
+ <a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a>
553
+ </p>
554
+ <p>Made with ❤️ by Cloud Cost Estimator | Data last updated: May 2025</p>
555
+ </div>
556
+ """)
557
+
558
+ demo.launch()
559
+ value=False,
560
+ info="Spot instances can be 70-90% cheaper but may be terminated with little notice"
561
+ )
562
+
563
+ multi_year_commitment = gr.Radio(
564
+ label="Commitment Period (if using Reserved Instances)",
565
+ choices=[1, 3],
566
+ value=1,
567
+ info="Length of reserved instance commitment in years"
568
+ )
569
+
570
+ submit_button = gr.Button("Calculate Costs", variant="primary")
571
+
572
+ with gr.Column(scale=2):
573
+ results_html = gr.HTML(label="Results")
574
+ plot_output = gr.Plot(label="Cost Comparison")
575
+
576
+ submit_button.click(
577
+ app_function,
578
+ inputs=[
579
+ compute_hours,
580
+ tokens_per_month,
581
+ input_ratio,
582
+ api_calls,
583
+ model_size,
584
+ storage_gb,
585
+ batch_size,
586
+ reserved_instances,
587
+ spot_instances,
588
+ multi_year_commitment
589
+ ],
590
+ outputs=[results_html, plot_output]
591
+ )
592
+
593
+ gr.HTML("""
594
+ <div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;">
595
+ <h3>Help & Resources</h3>
596
+ <p><strong>Cloud Provider Documentation:</strong>
597
+ <a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> |
598
+ <a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a>
599
+ </p>
600
+ <p><strong>API Provider Documentation:</strong>
601
+ <a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> |
602
+ <a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> |
603
+ <a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a>
604
+ </p>
605
+ <p>Made with ❤️ by Cloud Cost Estimator | Data last updated: May 2025</p>
606
+ </div>
607
+ """)
608
 
609
+ demo.launch()