delightfulrachel's picture
Update app.py
41a2f73 verified
raw
history blame
11.3 kB
import gradio as gr
import pandas as pd
import numpy as np
import plotly.express as px
# Initialize pricing data
aws_instances = {
"g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
"g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
"g5.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA A10G", "hourly_rate": 0.65, "gpu_memory": "24GB"},
"g5.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA A10G", "hourly_rate": 1.006, "gpu_memory": "24GB"},
"p3.2xlarge": {"vcpus": 8, "memory": 61, "gpu": "1x NVIDIA V100", "hourly_rate": 3.06, "gpu_memory": "16GB"},
"p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
}
gcp_instances = {
"a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
"a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
"a2-highgpu-4g": {"vcpus": 48, "memory": 340, "gpu": "4x NVIDIA A100", "hourly_rate": 5.86, "gpu_memory": "4x40GB"},
"n1-standard-4-t4": {"vcpus": 4, "memory": 15, "gpu": "1x NVIDIA T4", "hourly_rate": 0.49, "gpu_memory": "16GB"},
"n1-standard-8-t4": {"vcpus": 8, "memory": 30, "gpu": "1x NVIDIA T4", "hourly_rate": 0.69, "gpu_memory": "16GB"},
"g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
}
api_pricing = {
"OpenAI": {
"GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
"GPT-4o": {"input_per_1M": 5.0, "output_per_1M": 15.0, "token_context": 32768},
"GPT-4o-mini": {"input_per_1M": 2.5, "output_per_1M": 7.5, "token_context": 32768},
},
"TogetherAI": {
"Llama-3-8B": {"input_per_1M": 0.15, "output_per_1M": 0.15, "token_context": 8192},
"Llama-3-70B": {"input_per_1M": 0.9, "output_per_1M": 0.9, "token_context": 8192},
"Llama-2-13B": {"input_per_1M": 0.6, "output_per_1M": 0.6, "token_context": 4096},
"Llama-2-70B": {"input_per_1M": 2.5, "output_per_1M": 2.5, "token_context": 4096},
"DeepSeek-Coder-33B": {"input_per_1M": 2.0, "output_per_1M": 2.0, "token_context": 16384},
},
"Anthropic": {
"Claude-3-Opus": {"input_per_1M": 15.0, "output_per_1M": 75.0, "token_context": 200000},
"Claude-3-Sonnet": {"input_per_1M": 3.0, "output_per_1M": 15.0, "token_context": 200000},
"Claude-3-Haiku": {"input_per_1M": 0.25, "output_per_1M": 1.25, "token_context": 200000},
}
}
model_sizes = {
"Small (7B parameters)": {"memory_required": 14, "throughput_factor": 1.0},
"Medium (13B parameters)": {"memory_required": 26, "throughput_factor": 0.7},
"Large (70B parameters)": {"memory_required": 140, "throughput_factor": 0.3},
"XL (180B parameters)": {"memory_required": 360, "throughput_factor": 0.15},
}
def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
data = aws_instances[instance]
rate = data['hourly_rate']
if spot:
rate *= 0.3
elif reserved:
factors = {1: 0.6, 3: 0.4}
rate *= factors.get(years, 0.6)
compute = rate * hours
storage_cost = storage * 0.10
return {'compute_cost': compute, 'storage_cost': storage_cost, 'total_cost': compute + storage_cost}
def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
data = gcp_instances[instance]
rate = data['hourly_rate']
if spot:
rate *= 0.2
elif reserved:
factors = {1: 0.7, 3: 0.5}
rate *= factors.get(years, 0.7)
compute = rate * hours
storage_cost = storage * 0.04
return {'compute_cost': compute, 'storage_cost': storage_cost, 'total_cost': compute + storage_cost}
def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
mdata = api_pricing[provider][model]
input_cost = (input_tokens * mdata['input_per_1M']) / 1
output_cost = (output_tokens * mdata['output_per_1M']) / 1
call_cost = api_calls * 0.0001 if provider == 'TogetherAI' else 0
total = input_cost + output_cost + call_cost
return {'input_cost': input_cost, 'output_cost': output_cost, 'api_call_cost': call_cost, 'total_cost': total}
def filter_compatible_instances(instances, min_mem):
result = {}
for name, data in instances.items():
mem_str = data['gpu_memory']
if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
val = int(mem_str.replace('GB',''))
elif 'x' in mem_str:
parts = mem_str.split('x')
val = int(parts[0]) * int(parts[1].replace('GB',''))
else:
val = int(mem_str.replace('GB',''))
if val >= min_mem:
result[name] = data
return result
def generate_cost_comparison(
compute_hours, tokens_per_month, input_ratio, api_calls,
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
):
years = int(multi_year_commitment)
in_tokens = tokens_per_month * (input_ratio/100)
out_tokens = tokens_per_month - in_tokens
min_mem = model_sizes[model_size]['memory_required']
aws_comp = filter_compatible_instances(aws_instances, min_mem)
gcp_comp = filter_compatible_instances(gcp_instances, min_mem)
results = []
# AWS table
aws_html = '<h3>AWS Compatible Instances</h3>'
if aws_comp:
aws_html += '<table width="100%"><tr><th>Instance</th><th>Monthly Cost</th></tr>'
best_aws, best_cost = None, float('inf')
for inst in aws_comp:
c = calculate_aws_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
aws_html += f'<tr><td>{inst}</td><td>${c:.2f}</td></tr>'
if c < best_cost:
best_aws, best_cost = inst, c
aws_html += '</table>'
if best_aws:
results.append({'provider': f'AWS ({best_aws})', 'cost': best_cost, 'type':'Cloud'})
else:
aws_html += '<p>No compatible AWS instances.</p>'
# GCP table
gcp_html = '<h3>GCP Compatible Instances</h3>'
if gcp_comp:
gcp_html += '<table width="100%"><tr><th>Instance</th><th>Monthly Cost</th></tr>'
best_gcp, best_gcp_cost = None, float('inf')
for inst in gcp_comp:
c = calculate_gcp_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
gcp_html += f'<tr><td>{inst}</td><td>${c:.2f}</td></tr>'
if c < best_gcp_cost:
best_gcp, best_gcp_cost = inst, c
gcp_html += '</table>'
if best_gcp:
results.append({'provider': f'GCP ({best_gcp})', 'cost': best_gcp_cost, 'type':'Cloud'})
else:
gcp_html += '<p>No compatible GCP instances.</p>'
# API table
api_html = '<h3>API Options</h3>'
api_html += '<table width="100%"><tr><th>Provider</th><th>Model</th><th>Total Cost</th></tr>'
api_costs = {}
for prov in api_pricing:
for mdl in api_pricing[prov]:
cost_data = calculate_api_cost(prov, mdl, in_tokens, out_tokens, api_calls)
api_costs[(prov,mdl)] = cost_data['total_cost']
api_html += f'<tr><td>{prov}</td><td>{mdl}</td><td>${cost_data["total_cost"]:.2f}</td></tr>'
api_html += '</table>'
best_api = min(api_costs, key=api_costs.get)
results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_costs[best_api], 'type':'API'})
# Recommendation
cheapest = min(results, key=lambda x: x['cost'])
rec = '<h3>Recommendation</h3>'
if cheapest['type']=='API':
rec += f"<p>The API {cheapest['provider']} is cheapest at ${cheapest['cost']:.2f}.</p>"
else:
rec += f"<p>The Cloud {cheapest['provider']} is cheapest at ${cheapest['cost']:.2f}.</p>"
# Plot
df_res = pd.DataFrame(results)
fig = px.bar(df_res, x='provider', y='cost', color='type', title='Monthly Cost Comparison')
# HTML output
html = f"""
<div>{aws_html}</div>
<div>{gcp_html}</div>
<div>{api_html}</div>
<div>{rec}</div>
"""
return html, fig
def app_function(
compute_hours, tokens_per_month, input_ratio, api_calls,
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
):
return generate_cost_comparison(
compute_hours, tokens_per_month, input_ratio, api_calls,
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
)
# Gradio interface
def main():
with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
gr.HTML("""
<div style="text-align:center; margin-bottom:20px;">
<h1>Cloud Cost Estimator</h1>
<p>Compare costs between cloud hardware and API endpoints</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
gr.HTML("<h3>Usage Parameters</h3>")
compute_hours = gr.Slider(label="Compute Hours per Month", minimum=1, maximum=730, value=100)
tokens_per_month = gr.Slider(label="Tokens Processed per Month (millions)", minimum=1, maximum=1000, value=10)
input_ratio = gr.Slider(label="Input Token Ratio (%)", minimum=10, maximum=90, value=30)
api_calls = gr.Slider(label="API Calls per Month", minimum=100, maximum=1000000, value=10000, step=100)
model_size = gr.Dropdown(label="Model Size", choices=list(model_sizes.keys()), value="Medium (13B parameters)")
storage_gb = gr.Slider(label="Storage Required (GB)", minimum=10, maximum=1000, value=100)
gr.HTML("<h3>Advanced Options</h3>")
reserved_instances = gr.Checkbox(label="Use Reserved Instances", value=False)
spot_instances = gr.Checkbox(label="Use Spot/Preemptible Instances", value=False)
multi_year_commitment = gr.Radio(label="Commitment Period (years)", choices=["1","3"], value="1")
submit_button = gr.Button("Calculate Costs", variant="primary")
with gr.Column(scale=2):
results_html = gr.HTML(label="Results")
plot_output = gr.Plot(label="Cost Comparison")
submit_button.click(
app_function,
inputs=[compute_hours, tokens_per_month, input_ratio, api_calls, model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment],
outputs=[results_html, plot_output]
)
gr.HTML("""
<div style="margin-top:30px; border-top:1px solid #e5e7eb; padding-top:20px;">
<h3>Help & Resources</h3>
<p><a href="https://aws.amazon.com/ec2/pricing/">AWS EC2 Pricing</a> | <a href="https://cloud.google.com/compute/pricing">GCP Pricing</a></p>
<p><a href="https://openai.com/pricing">OpenAI API Pricing</a> | <a href="https://www.anthropic.com/api">Anthropic Claude API Pricing</a> | <a href="https://www.together.ai/pricing">TogetherAI Pricing</a></p>
</div>
""")
demo.launch()
if __name__ == "__main__":
main()