import gradio as gr
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
# Updated pricing data - restructured for better comparison
aws_instances = {
# T4 GPU Instances (entry level)
"g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB", "tier": "Entry"},
"g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB", "tier": "Entry"},
# A10G GPU Instances (mid-tier)
"g5.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA A10G", "hourly_rate": 0.65, "gpu_memory": "24GB", "tier": "Mid"},
"g5.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA A10G", "hourly_rate": 1.006, "gpu_memory": "24GB", "tier": "Mid"},
# V100 GPU Instances (high-tier)
"p3.2xlarge": {"vcpus": 8, "memory": 61, "gpu": "1x NVIDIA V100", "hourly_rate": 3.06, "gpu_memory": "16GB", "tier": "High"},
# Added comparable instances to match GCP
"p4d.xlarge": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 4.10, "gpu_memory": "40GB", "tier": "Premium"},
"p4d.2xlarge": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 8.20, "gpu_memory": "2x40GB", "tier": "Premium"},
"p4d.4xlarge": {"vcpus": 48, "memory": 340, "gpu": "4x NVIDIA A100", "hourly_rate": 16.40, "gpu_memory": "4x40GB", "tier": "Premium"},
}
gcp_instances = {
# T4 GPU Instances (entry level)
"n1-standard-4-t4": {"vcpus": 4, "memory": 15, "gpu": "1x NVIDIA T4", "hourly_rate": 0.49, "gpu_memory": "16GB", "tier": "Entry"},
"n1-standard-8-t4": {"vcpus": 8, "memory": 30, "gpu": "1x NVIDIA T4", "hourly_rate": 0.69, "gpu_memory": "16GB", "tier": "Entry"},
# L4 GPU Instances (mid-tier)
"g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB", "tier": "Mid"},
"g2-standard-8": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA L4", "hourly_rate": 0.89, "gpu_memory": "24GB", "tier": "Mid"},
# Added comparable V100 instance
"n1-standard-8-v100": {"vcpus": 8, "memory": 60, "gpu": "1x NVIDIA V100", "hourly_rate": 2.95, "gpu_memory": "16GB", "tier": "High"},
# A100 GPU Instances (premium)
"a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB", "tier": "Premium"},
"a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB", "tier": "Premium"},
"a2-highgpu-4g": {"vcpus": 48, "memory": 340, "gpu": "4x NVIDIA A100", "hourly_rate": 5.86, "gpu_memory": "4x40GB", "tier": "Premium"},
}
api_pricing = {
"OpenAI": {
"GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
"GPT-4o": {"input_per_1M": 5.0, "output_per_1M": 15.0, "token_context": 32768},
"GPT-4o-mini": {"input_per_1M": 2.5, "output_per_1M": 7.5, "token_context": 32768},
},
"TogetherAI": {
"Llama-3-8B": {"input_per_1M": 0.15, "output_per_1M": 0.15, "token_context": 8192},
"Llama-3-70B": {"input_per_1M": 0.9, "output_per_1M": 0.9, "token_context": 8192},
"Llama-2-13B": {"input_per_1M": 0.6, "output_per_1M": 0.6, "token_context": 4096},
"Llama-2-70B": {"input_per_1M": 2.5, "output_per_1M": 2.5, "token_context": 4096},
"DeepSeek-Coder-33B": {"input_per_1M": 2.0, "output_per_1M": 2.0, "token_context": 16384},
},
"Anthropic": {
"Claude-3-Opus": {"input_per_1M": 15.0, "output_per_1M": 75.0, "token_context": 200000},
"Claude-3-Sonnet": {"input_per_1M": 3.0, "output_per_1M": 15.0, "token_context": 200000},
"Claude-3-Haiku": {"input_per_1M": 0.25, "output_per_1M": 1.25, "token_context": 200000},
}
}
model_sizes = {
"Small (7B parameters)": {"memory_required": 14},
"Medium (13B parameters)": {"memory_required": 26},
"Large (70B parameters)": {"memory_required": 140},
"XL (180B parameters)": {"memory_required": 360},
}
def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
data = aws_instances[instance]
rate = data['hourly_rate']
if spot:
rate *= 0.3
elif reserved:
factors = {1: 0.6, 3: 0.4}
rate *= factors.get(years, 0.6)
compute = rate * hours
storage_cost = storage * 0.10
return {'total_cost': compute + storage_cost, 'details': data}
def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
data = gcp_instances[instance]
rate = data['hourly_rate']
if spot:
rate *= 0.2
elif reserved:
factors = {1: 0.7, 3: 0.5}
rate *= factors.get(years, 0.7)
compute = rate * hours
storage_cost = storage * 0.04
return {'total_cost': compute + storage_cost, 'details': data}
def calculate_api_cost(provider, model, in_tokens, out_tokens, calls):
m = api_pricing[provider][model]
input_cost = in_tokens * m['input_per_1M'] / 1000000
output_cost = out_tokens * m['output_per_1M'] / 1000000
call_cost = calls * 0.0001 if provider == 'TogetherAI' else 0
return {'total_cost': input_cost + output_cost + call_cost, 'details': m}
def filter_compatible(instances, min_mem):
res = {}
for name, data in instances.items():
mem_str = data['gpu_memory']
if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
val = int(mem_str.replace('GB',''))
elif 'x' in mem_str:
parts = mem_str.split('x')
val = int(parts[0]) * int(parts[1].replace('GB',''))
else:
val = int(mem_str.replace('GB',''))
if val >= min_mem:
res[name] = data
return res
def generate_cost_comparison(
compute_hours, tokens_per_month, input_ratio, api_calls,
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment,
comparison_tier
):
years = int(multi_year_commitment)
in_tokens = tokens_per_month * 1000000 * (input_ratio/100)
out_tokens = tokens_per_month * 1000000 - in_tokens
min_mem = model_sizes[model_size]['memory_required']
# Filter by both memory requirements and tier if a tier is selected
aws_comp = filter_compatible(aws_instances, min_mem)
gcp_comp = filter_compatible(gcp_instances, min_mem)
if comparison_tier != "All":
aws_comp = {k: v for k, v in aws_comp.items() if v.get('tier', '') == comparison_tier}
gcp_comp = {k: v for k, v in gcp_comp.items() if v.get('tier', '') == comparison_tier}
results = []
# AWS table
aws_html = '
AWS Instances
'
aws_html += 'Instance | vCPUs | Memory | GPU | Tier | Monthly Cost ($) |
'
if aws_comp:
for inst in aws_comp:
res = calculate_aws_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)
aws_html += f'{inst} | {res["details"]["vcpus"]} | {res["details"]["memory"]}GB | {res["details"]["gpu"]} | {res["details"].get("tier", "")} | ${res["total_cost"]:.2f} |
'
# best AWS
best_aws = min(aws_comp, key=lambda x: calculate_aws_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
best_aws_cost = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
best_aws_tier = aws_instances[best_aws].get('tier', '')
results.append({'provider': f'AWS ({best_aws})', 'cost': best_aws_cost, 'type': 'Cloud', 'tier': best_aws_tier})
else:
aws_html += 'No compatible instances |
'
aws_html += '
'
# GCP table
gcp_html = 'GCP Instances
'
gcp_html += 'Instance | vCPUs | Memory | GPU | Tier | Monthly Cost ($) |
'
if gcp_comp:
for inst in gcp_comp:
res = calculate_gcp_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)
gcp_html += f'{inst} | {res["details"]["vcpus"]} | {res["details"]["memory"]}GB | {res["details"]["gpu"]} | {res["details"].get("tier", "")} | ${res["total_cost"]:.2f} |
'
best_gcp = min(gcp_comp, key=lambda x: calculate_gcp_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
best_gcp_cost = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
best_gcp_tier = gcp_instances[best_gcp].get('tier', '')
results.append({'provider': f'GCP ({best_gcp})', 'cost': best_gcp_cost, 'type': 'Cloud', 'tier': best_gcp_tier})
else:
gcp_html += 'No compatible instances |
'
gcp_html += '
'
# API table
api_html = 'API Options
'
api_html += 'Provider | Model | Input Cost | Output Cost | Total Cost ($) | Context |
'
api_costs = {}
for prov in api_pricing:
for mdl in api_pricing[prov]:
res = calculate_api_cost(prov, mdl, in_tokens, out_tokens, api_calls)
details = api_pricing[prov][mdl]
api_html += f'{prov} | {mdl} | ${in_tokens * details["input_per_1M"] / 1000000:.2f} | ${out_tokens * details["output_per_1M"] / 1000000:.2f} | ${res["total_cost"]:.2f} | {details["token_context"]:,} |
'
api_costs[(prov, mdl)] = res['total_cost']
api_html += '
'
if api_costs:
best_api = min(api_costs, key=api_costs.get)
results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_costs[best_api], 'type': 'API', 'tier': 'API'})
# Direct comparison tables for similar instances
direct_comparison_html = ""
if comparison_tier != "All" and comparison_tier != "API":
direct_comparison_html = f'Direct {comparison_tier} Tier Comparison
'
direct_comparison_html += 'Provider | Instance | vCPUs | Memory | GPU | Monthly Cost ($) |
'
aws_filtered = {k: v for k, v in aws_instances.items() if v.get('tier', '') == comparison_tier}
gcp_filtered = {k: v for k, v in gcp_instances.items() if v.get('tier', '') == comparison_tier}
# Group by vCPU for comparison
vcpu_groups = {}
for inst, data in aws_filtered.items():
vcpu = data['vcpus']
if vcpu not in vcpu_groups:
vcpu_groups[vcpu] = {'aws': [], 'gcp': []}
vcpu_groups[vcpu]['aws'].append(inst)
for inst, data in gcp_filtered.items():
vcpu = data['vcpus']
if vcpu not in vcpu_groups:
vcpu_groups[vcpu] = {'aws': [], 'gcp': []}
vcpu_groups[vcpu]['gcp'].append(inst)
# Display direct comparisons
for vcpu in sorted(vcpu_groups.keys()):
group = vcpu_groups[vcpu]
for aws_inst in group['aws']:
aws_cost = calculate_aws_cost(aws_inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)
aws_data = aws_cost['details']
direct_comparison_html += f'AWS | {aws_inst} | {aws_data["vcpus"]} | {aws_data["memory"]}GB | {aws_data["gpu"]} | ${aws_cost["total_cost"]:.2f} |
'
for gcp_inst in group['gcp']:
gcp_cost = calculate_gcp_cost(gcp_inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)
gcp_data = gcp_cost['details']
direct_comparison_html += f'GCP | {gcp_inst} | {gcp_data["vcpus"]} | {gcp_data["memory"]}GB | {gcp_data["gpu"]} | ${gcp_cost["total_cost"]:.2f} |
'
# Add separator between different vCPU groups
if vcpu != sorted(vcpu_groups.keys())[-1]:
direct_comparison_html += ' |
'
direct_comparison_html += '
'
# Chart with annotations
df = pd.DataFrame(results)
colors = {'Entry': '#66BB6A', 'Mid': '#42A5F5', 'High': '#FFA726', 'Premium': '#EF5350', 'API': '#AB47BC'}
# Create figure using plotly graph objects for more control
fig = go.Figure()
# Add bars
for i, row in df.iterrows():
tier_color = colors.get(row.get('tier', 'API'), '#9E9E9E')
fig.add_trace(go.Bar(
x=[row['provider']],
y=[row['cost']],
name=row['provider'],
marker_color=tier_color
))
# Add annotations on top of each bar
for i, row in df.iterrows():
fig.add_annotation(
x=row['provider'],
y=row['cost'],
text=f"${row['cost']:.2f}",
showarrow=False,
yshift=10, # Position above the bar
font=dict(size=14)
)
# Update layout
fig.update_layout(
showlegend=False,
height=500,
yaxis=dict(title='Monthly Cost ($)', tickprefix='$'),
xaxis=dict(title=''),
title='Cost Comparison'
)
html = f"""
{direct_comparison_html}
{aws_html}
{gcp_html}
{api_html}
"""
return html, fig
# UI setup
with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
gr.HTML('Cloud Cost Estimator
')
with gr.Row():
with gr.Column(scale=1):
compute_hours = gr.Slider(label="Compute Hours per Month", minimum=1, maximum=300, value=50)
tokens_per_month = gr.Slider(label="Tokens per Month (M)", minimum=1, maximum=200, value=5)
input_ratio = gr.Slider(label="Input Ratio (%)", minimum=10, maximum=70, value=25)
api_calls = gr.Slider(label="API Calls per Month", minimum=100, maximum=100000, value=5000, step=100)
model_size = gr.Dropdown(label="Model Size", choices=list(model_sizes.keys()), value="Medium (13B parameters)")
storage_gb = gr.Slider(label="Storage (GB)", minimum=10, maximum=1000, value=100)
comparison_tier = gr.Radio(label="Comparison Tier", choices=["All", "Entry", "Mid", "High", "Premium", "API"], value="All")
reserved_instances = gr.Checkbox(label="Reserved Instances", value=False)
spot_instances = gr.Checkbox(label="Spot Instances", value=False)
multi_year_commitment = gr.Radio(label="Commitment Period (years)", choices=["1","3"], value="1")
with gr.Column(scale=2):
out_html = gr.HTML()
out_plot = gr.Plot()
# Create inputs list for the function
inputs = [compute_hours, tokens_per_month, input_ratio, api_calls,
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment, comparison_tier]
outputs = [out_html, out_plot]
# Initial calculation on load
demo.load(generate_cost_comparison, inputs, outputs)
# Update on each input change
for input_component in inputs:
input_component.change(generate_cost_comparison, inputs, outputs)
demo.launch()