File size: 9,189 Bytes
02e476c 2997c9b 02e476c e26b1a8 2997c9b 02e476c 2997c9b 02e476c 2997c9b e26b1a8 2997c9b 41a2f73 2997c9b 41a2f73 2997c9b 41a2f73 e26b1a8 2997c9b 41a2f73 2997c9b 41a2f73 2997c9b 41a2f73 e26b1a8 2997c9b e26b1a8 41a2f73 e26b1a8 41a2f73 e26b1a8 41a2f73 2997c9b 41a2f73 e26b1a8 2997c9b 41a2f73 2997c9b 41a2f73 e26b1a8 41a2f73 e26b1a8 41a2f73 e26b1a8 41a2f73 e26b1a8 41a2f73 e26b1a8 41a2f73 e26b1a8 2997c9b 41a2f73 2997c9b 41a2f73 2997c9b e26b1a8 41a2f73 e26b1a8 41a2f73 e26b1a8 41a2f73 e26b1a8 2997c9b 41a2f73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import gradio as gr
import pandas as pd
import numpy as np
import plotly.express as px
# Pricing data
aws_instances = {
"g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
"g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
"g5.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA A10G", "hourly_rate": 0.65, "gpu_memory": "24GB"},
"g5.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA A10G", "hourly_rate": 1.006, "gpu_memory": "24GB"},
"p3.2xlarge": {"vcpus": 8, "memory": 61, "gpu": "1x NVIDIA V100", "hourly_rate": 3.06, "gpu_memory": "16GB"},
"p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
}
gcp_instances = {
"a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
"a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
"a2-highgpu-4g": {"vcpus": 48, "memory": 340, "gpu": "4x NVIDIA A100", "hourly_rate": 5.86, "gpu_memory": "4x40GB"},
"n1-standard-4-t4": {"vcpus": 4, "memory": 15, "gpu": "1x NVIDIA T4", "hourly_rate": 0.49, "gpu_memory": "16GB"},
"n1-standard-8-t4": {"vcpus": 8, "memory": 30, "gpu": "1x NVIDIA T4", "hourly_rate": 0.69, "gpu_memory": "16GB"},
"g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
}
api_pricing = {
"OpenAI": {
"GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
"GPT-4o": {"input_per_1M": 5.0, "output_per_1M": 15.0, "token_context": 32768},
"GPT-4o-mini": {"input_per_1M": 2.5, "output_per_1M": 7.5, "token_context": 32768},
},
"TogetherAI": {
"Llama-3-8B": {"input_per_1M": 0.15, "output_per_1M": 0.15, "token_context": 8192},
"Llama-3-70B": {"input_per_1M": 0.9, "output_per_1M": 0.9, "token_context": 8192},
"Llama-2-13B": {"input_per_1M": 0.6, "output_per_1M": 0.6, "token_context": 4096},
"Llama-2-70B": {"input_per_1M": 2.5, "output_per_1M": 2.5, "token_context": 4096},
"DeepSeek-Coder-33B": {"input_per_1M": 2.0, "output_per_1M": 2.0, "token_context": 16384},
},
"Anthropic": {
"Claude-3-Opus": {"input_per_1M": 15.0, "output_per_1M": 75.0, "token_context": 200000},
"Claude-3-Sonnet": {"input_per_1M": 3.0, "output_per_1M": 15.0, "token_context": 200000},
"Claude-3-Haiku": {"input_per_1M": 0.25, "output_per_1M": 1.25, "token_context": 200000},
}
}
model_sizes = {
"Small (7B parameters)": {"memory_required": 14},
"Medium (13B parameters)": {"memory_required": 26},
"Large (70B parameters)": {"memory_required": 140},
"XL (180B parameters)": {"memory_required": 360},
}
def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
data = aws_instances[instance]
rate = data['hourly_rate']
if spot:
rate *= 0.3
elif reserved:
factors = {1: 0.6, 3: 0.4}
rate *= factors.get(years, 0.6)
compute = rate * hours
storage_cost = storage * 0.10
return {'total_cost': compute + storage_cost}
def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
data = gcp_instances[instance]
rate = data['hourly_rate']
if spot:
rate *= 0.2
elif reserved:
factors = {1: 0.7, 3: 0.5}
rate *= factors.get(years, 0.7)
compute = rate * hours
storage_cost = storage * 0.04
return {'total_cost': compute + storage_cost}
def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
m = api_pricing[provider][model]
input_cost = input_tokens * m['input_per_1M']
output_cost = output_tokens * m['output_per_1M']
call_cost = api_calls * 0.0001 if provider == 'TogetherAI' else 0
return {'total_cost': input_cost + output_cost + call_cost}
def filter_compatible(instances, min_mem):
res = {}
for name, data in instances.items():
mem_str = data['gpu_memory']
if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
val = int(mem_str.replace('GB',''))
elif 'x' in mem_str:
parts = mem_str.split('x')
val = int(parts[0]) * int(parts[1].replace('GB',''))
else:
val = int(mem_str.replace('GB',''))
if val >= min_mem:
res[name] = data
return res
def generate_cost_comparison(
compute_hours, tokens_per_month, input_ratio, api_calls,
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
):
years = int(multi_year_commitment)
in_tokens = tokens_per_month * (input_ratio / 100)
out_tokens = tokens_per_month - in_tokens
min_mem = model_sizes[model_size]['memory_required']
aws_comp = filter_compatible(aws_instances, min_mem)
gcp_comp = filter_compatible(gcp_instances, min_mem)
results = []
# AWS
if aws_comp:
best_aws = min(aws_comp.keys(), key=lambda x: calculate_aws_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
best_aws_cost = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
results.append({'provider': f'AWS ({best_aws})', 'cost': best_aws_cost, 'type': 'Cloud'})
# GCP
if gcp_comp:
best_gcp = min(gcp_comp.keys(), key=lambda x: calculate_gcp_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
best_gcp_cost = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
results.append({'provider': f'GCP ({best_gcp})', 'cost': best_gcp_cost, 'type': 'Cloud'})
# API (TogetherAI only)
api_opts = { (prov, m): calculate_api_cost(prov, m, in_tokens, out_tokens, api_calls)['total_cost']
for prov in api_pricing for m in api_pricing[prov] }
best_api = min(api_opts, key=api_opts.get)
results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_opts[best_api], 'type': 'API'})
# Build bar chart
df_res = pd.DataFrame(results)
aws_name = df_res[df_res['type']=='Cloud']['provider'].iloc[0]
gcp_name = df_res[df_res['type']=='Cloud']['provider'].iloc[1]
api_name = df_res[df_res['type']=='API']['provider'].iloc[0]
fig = px.bar(
df_res, x='provider', y='cost', color='provider',
color_discrete_map={
aws_name: '#FF9900', # AWS orange
gcp_name: '#4285F4', # GCP blue
api_name: '#D62828' # TogetherAI red
},
title='Monthly Cost Comparison',
labels={'provider': 'Provider', 'cost': 'Monthly Cost'}
)
fig.update_yaxes(tickprefix='$')
fig.update_layout(showlegend=False, height=500)
# HTML summary tables omitted for brevity
html_tables = '<div>'
# ... you can reinsert your HTML tables here if needed
html_tables += '</div>'
return html_tables, fig
def app_function(
compute_hours, tokens_per_month, input_ratio, api_calls,
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
):
return generate_cost_comparison(
compute_hours, tokens_per_month, input_ratio, api_calls,
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
)
# Gradio UI
def main():
with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
gr.HTML("""
<div style="text-align:center; margin-bottom:20px;">
<h1>Cloud Cost Estimator</h1>
<p>Compare cloud vs API costs</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
compute_hours = gr.Slider("Compute Hours per Month", 1, 730, 100)
tokens_per_month = gr.Slider("Tokens per Month (M)", 1, 1000, 10)
input_ratio = gr.Slider("Input Ratio (%)", 10, 90, 30)
api_calls = gr.Slider("API Calls per Month", 100, 1_000_000, 10000, step=100)
model_size = gr.Dropdown(list(model_sizes.keys()), value="Medium (13B parameters)")
storage_gb = gr.Slider("Storage (GB)", 10, 1000, 100)
reserved_instances = gr.Checkbox("Reserved Instances", value=False)
spot_instances = gr.Checkbox("Spot Instances", value=False)
multi_year_commitment = gr.Radio(["1","3"], value="1")
submit = gr.Button("Calculate Costs")
with gr.Column(scale=2):
out_html = gr.HTML()
out_plot = gr.Plot()
submit.click(app_function,
inputs=[compute_hours, tokens_per_month, input_ratio, api_calls,
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment],
outputs=[out_html, out_plot])
demo.launch()
if __name__ == "__main__":
main()
|