|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
|
|
|
|
|
|
aws_instances = { |
|
"g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"}, |
|
"g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"}, |
|
"g5.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA A10G", "hourly_rate": 0.65, "gpu_memory": "24GB"}, |
|
"g5.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA A10G", "hourly_rate": 1.006, "gpu_memory": "24GB"}, |
|
"p3.2xlarge": {"vcpus": 8, "memory": 61, "gpu": "1x NVIDIA V100", "hourly_rate": 3.06, "gpu_memory": "16GB"}, |
|
"p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"} |
|
} |
|
|
|
|
|
gcp_instances = { |
|
"a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"}, |
|
"a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"}, |
|
"a2-highgpu-4g": {"vcpus": 48, "memory": 340, "gpu": "4x NVIDIA A100", "hourly_rate": 5.86, "gpu_memory": "4x40GB"}, |
|
"n1-standard-4-t4": {"vcpus": 4, "memory": 15, "gpu": "1x NVIDIA T4", "hourly_rate": 0.49, "gpu_memory": "16GB"}, |
|
"n1-standard-8-t4": {"vcpus": 8, "memory": 30, "gpu": "1x NVIDIA T4", "hourly_rate": 0.69, "gpu_memory": "16GB"}, |
|
"g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"} |
|
} |
|
|
|
|
|
api_pricing = { |
|
"OpenAI": { |
|
"GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385}, |
|
"GPT-4o": {"input_per_1M": 5.0, "output_per_1M": 15.0, "token_context": 32768}, |
|
"GPT-4o-mini": {"input_per_1M": 2.5, "output_per_1M": 7.5, "token_context": 32768}, |
|
}, |
|
"TogetherAI": { |
|
"Llama-3-8B": {"input_per_1M": 0.15, "output_per_1M": 0.15, "token_context": 8192}, |
|
"Llama-3-70B": {"input_per_1M": 0.9, "output_per_1M": 0.9, "token_context": 8192}, |
|
"Llama-2-13B": {"input_per_1M": 0.6, "output_per_1M": 0.6, "token_context": 4096}, |
|
"Llama-2-70B": {"input_per_1M": 2.5, "output_per_1M": 2.5, "token_context": 4096}, |
|
"DeepSeek-Coder-33B": {"input_per_1M": 2.0, "output_per_1M": 2.0, "token_context": 16384}, |
|
}, |
|
"Anthropic": { |
|
"Claude-3-Opus": {"input_per_1M": 15.0, "output_per_1M": 75.0, "token_context": 200000}, |
|
"Claude-3-Sonnet": {"input_per_1M": 3.0, "output_per_1M": 15.0, "token_context": 200000}, |
|
"Claude-3-Haiku": {"input_per_1M": 0.25, "output_per_1M": 1.25, "token_context": 200000}, |
|
} |
|
} |
|
|
|
|
|
model_sizes = { |
|
"Small (7B parameters)": {"memory_required": 14, "throughput_factor": 1.0}, |
|
"Medium (13B parameters)": {"memory_required": 26, "throughput_factor": 0.7}, |
|
"Large (70B parameters)": {"memory_required": 140, "throughput_factor": 0.3}, |
|
"XL (180B parameters)": {"memory_required": 360, "throughput_factor": 0.15}, |
|
} |
|
|
|
|
|
def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1): |
|
instance_data = aws_instances[instance] |
|
base_hourly = instance_data["hourly_rate"] |
|
|
|
|
|
if spot: |
|
hourly_rate = base_hourly * 0.3 |
|
elif reserved: |
|
discount_factors = {1: 0.6, 3: 0.4} |
|
hourly_rate = base_hourly * discount_factors.get(years, 0.6) |
|
else: |
|
hourly_rate = base_hourly |
|
|
|
compute_cost = hourly_rate * hours |
|
storage_cost = storage * 0.10 |
|
|
|
return { |
|
"compute_cost": compute_cost, |
|
"storage_cost": storage_cost, |
|
"total_cost": compute_cost + storage_cost, |
|
"instance_details": instance_data |
|
} |
|
|
|
def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1): |
|
instance_data = gcp_instances[instance] |
|
base_hourly = instance_data["hourly_rate"] |
|
|
|
|
|
if spot: |
|
hourly_rate = base_hourly * 0.2 |
|
elif reserved: |
|
discount_factors = {1: 0.7, 3: 0.5} |
|
hourly_rate = base_hourly * discount_factors.get(years, 0.7) |
|
else: |
|
hourly_rate = base_hourly |
|
|
|
compute_cost = hourly_rate * hours |
|
storage_cost = storage * 0.04 |
|
|
|
return { |
|
"compute_cost": compute_cost, |
|
"storage_cost": storage_cost, |
|
"total_cost": compute_cost + storage_cost, |
|
"instance_details": instance_data |
|
} |
|
|
|
def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls): |
|
model_data = api_pricing[provider][model] |
|
|
|
input_cost = (input_tokens * model_data["input_per_1M"]) / 1 |
|
output_cost = (output_tokens * model_data["output_per_1M"]) / 1 |
|
|
|
|
|
api_call_costs = 0 |
|
if provider == "TogetherAI": |
|
api_call_costs = api_calls * 0.0001 |
|
|
|
total_cost = input_cost + output_cost + api_call_costs |
|
|
|
return { |
|
"input_cost": input_cost, |
|
"output_cost": output_cost, |
|
"api_call_cost": api_call_costs, |
|
"total_cost": total_cost, |
|
"model_details": model_data |
|
} |
|
|
|
|
|
def filter_compatible_instances(instances_dict, min_memory_required): |
|
compatible = {} |
|
for name, data in instances_dict.items(): |
|
|
|
memory_str = data["gpu_memory"] |
|
|
|
|
|
if "x" in memory_str and not memory_str.startswith(("1x", "2x", "4x", "8x")): |
|
|
|
memory_val = int(memory_str.split("GB")[0]) |
|
elif "x" in memory_str: |
|
|
|
parts = memory_str.split("x") |
|
num_gpus = int(parts[0]) |
|
memory_per_gpu = int(parts[1].split("GB")[0]) |
|
memory_val = num_gpus * memory_per_gpu |
|
else: |
|
|
|
memory_val = int(memory_str.split("GB")[0]) |
|
|
|
if memory_val >= min_memory_required: |
|
compatible[name] = data |
|
|
|
return compatible |
|
|
|
def generate_cost_comparison( |
|
compute_hours, |
|
tokens_per_month, |
|
input_ratio, |
|
api_calls, |
|
model_size, |
|
storage_gb, |
|
reserved_instances, |
|
spot_instances, |
|
multi_year_commitment |
|
): |
|
|
|
input_tokens = tokens_per_month * (input_ratio / 100) |
|
output_tokens = tokens_per_month * (1 - (input_ratio / 100)) |
|
|
|
|
|
min_memory_required = model_sizes[model_size]["memory_required"] |
|
|
|
|
|
compatible_aws = filter_compatible_instances(aws_instances, min_memory_required) |
|
compatible_gcp = filter_compatible_instances(gcp_instances, min_memory_required) |
|
|
|
results = [] |
|
|
|
|
|
if compatible_aws: |
|
aws_results = "<h3>AWS Compatible Instances</h3>" |
|
aws_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>" |
|
|
|
best_aws = None |
|
best_aws_cost = float('inf') |
|
|
|
for instance in compatible_aws: |
|
cost_result = calculate_aws_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment) |
|
total_cost = cost_result["total_cost"] |
|
|
|
if total_cost < best_aws_cost: |
|
best_aws = instance |
|
best_aws_cost = total_cost |
|
|
|
aws_results += f"<tr><td>{instance}</td><td>{compatible_aws[instance]['vcpus']}</td><td>{compatible_aws[instance]['memory']}GB</td><td>{compatible_aws[instance]['gpu']}</td><td>${compatible_aws[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>" |
|
|
|
aws_results += "</table>" |
|
|
|
if best_aws: |
|
best_aws_data = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment) |
|
results.append({ |
|
"provider": f"AWS ({best_aws})", |
|
"cost": best_aws_data["total_cost"], |
|
"type": "Cloud" |
|
}) |
|
else: |
|
aws_results = "<h3>AWS Compatible Instances</h3><p>No compatible AWS instances found for this model size.</p>" |
|
best_aws = None |
|
best_aws_cost = float('inf') |
|
|
|
|
|
if compatible_gcp: |
|
gcp_results = "<h3>Google Cloud Compatible Instances</h3>" |
|
gcp_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>" |
|
|
|
best_gcp = None |
|
best_gcp_cost = float('inf') |
|
|
|
for instance in compatible_gcp: |
|
cost_result = calculate_gcp_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment) |
|
total_cost = cost_result["total_cost"] |
|
|
|
if total_cost < best_gcp_cost: |
|
best_gcp = instance |
|
best_gcp_cost = total_cost |
|
|
|
gcp_results += f"<tr><td>{instance}</td><td>{compatible_gcp[instance]['vcpus']}</td><td>{compatible_gcp[instance]['memory']}GB</td><td>{compatible_gcp[instance]['gpu']}</td><td>${compatible_gcp[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>" |
|
|
|
gcp_results += "</table>" |
|
|
|
if best_gcp: |
|
best_gcp_data = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment) |
|
results.append({ |
|
"provider": f"GCP ({best_gcp})", |
|
"cost": best_gcp_data["total_cost"], |
|
"type": "Cloud" |
|
}) |
|
else: |
|
gcp_results = "<h3>Google Cloud Compatible Instances</h3><p>No compatible Google Cloud instances found for this model size.</p>" |
|
best_gcp = None |
|
best_gcp_cost = float('inf') |
|
|
|
|
|
api_results = "<h3>API Options</h3>" |
|
api_results += "<table width='100%'><tr><th>Provider</th><th>Model</th><th>Input Cost</th><th>Output Cost</th><th>Total Cost</th><th>Context Length</th></tr>" |
|
|
|
api_costs = {} |
|
|
|
for provider in api_pricing: |
|
for model in api_pricing[provider]: |
|
cost_data = calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls) |
|
api_costs[(provider, model)] = cost_data |
|
|
|
api_results += f"<tr><td>{provider}</td><td>{model}</td><td>${cost_data['input_cost']:.2f}</td><td>${cost_data['output_cost']:.2f}</td><td>${cost_data['total_cost']:.2f}</td><td>{api_pricing[provider][model]['token_context']:,}</td></tr>" |
|
|
|
api_results += "</table>" |
|
|
|
|
|
best_api = min(api_costs.keys(), key=lambda x: api_costs[x]["total_cost"]) |
|
best_api_cost = api_costs[best_api] |
|
|
|
results.append({ |
|
"provider": f"{best_api[0]} ({best_api[1]})", |
|
"cost": best_api_cost["total_cost"], |
|
"type": "API" |
|
}) |
|
|
|
|
|
recommendation = "<h3>Recommendation</h3>" |
|
|
|
|
|
cheapest = min(results, key=lambda x: x["cost"]) |
|
|
|
if cheapest["type"] == "API": |
|
recommendation += f"<p>Based on your usage parameters, the <strong>{cheapest['provider']}</strong> API endpoint is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>" |
|
|
|
|
|
cheapest_cloud = None |
|
for result in results: |
|
if result["type"] == "Cloud": |
|
if cheapest_cloud is None or result["cost"] < cheapest_cloud["cost"]: |
|
cheapest_cloud = result |
|
|
|
if cheapest_cloud: |
|
ratio = cheapest_cloud["cost"] / cheapest["cost"] |
|
recommendation += f"<p>This is <strong>{ratio:.1f}x cheaper</strong> than the most affordable cloud option ({cheapest_cloud['provider']}).</p>" |
|
else: |
|
recommendation += f"<p>Based on your usage parameters, <strong>{cheapest['provider']}</strong> is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>" |
|
|
|
|
|
cheapest_api = None |
|
for result in results: |
|
if result["type"] == "API": |
|
if cheapest_api is None or result["cost"] < cheapest_api["cost"]: |
|
cheapest_api = result |
|
|
|
if cheapest_api: |
|
ratio = cheapest_api["cost"] / cheapest["cost"] |
|
if ratio > 1: |
|
recommendation += f"<p>This is <strong>{1/ratio:.1f}x cheaper</strong> than the most affordable API option ({cheapest_api['provider']}).</p>" |
|
else: |
|
recommendation += f"<p>However, the API option ({cheapest_api['provider']}) is <strong>{ratio:.1f}x cheaper</strong>.</p>" |
|
|
|
|
|
if tokens_per_month > 100 and cheapest["type"] == "Cloud": |
|
recommendation += "<p>With your high token volume, cloud hardware becomes more cost-effective despite the higher upfront costs.</p>" |
|
elif compute_hours < 50 and cheapest["type"] == "API": |
|
recommendation += "<p>With your low usage hours, API endpoints are more cost-effective as you only pay for what you use.</p>" |
|
|
|
|
|
breakeven = "<h3>Breakeven Analysis</h3>" |
|
|
|
if best_aws is not None and best_api_cost["total_cost"] > 0: |
|
aws_hourly = aws_instances[best_aws]["hourly_rate"] |
|
breakeven_hours = best_api_cost["total_cost"] / aws_hourly |
|
|
|
breakeven += f"<p>API vs AWS: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>" |
|
|
|
if compute_hours > breakeven_hours: |
|
breakeven += "<p>You're past the breakeven point - AWS hardware is more cost-effective than API usage.</p>" |
|
else: |
|
breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than AWS hardware.</p>" |
|
|
|
if best_gcp is not None and best_api_cost["total_cost"] > 0: |
|
gcp_hourly = gcp_instances[best_gcp]["hourly_rate"] |
|
breakeven_hours = best_api_cost["total_cost"] / gcp_hourly |
|
|
|
breakeven += f"<p>API vs GCP: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>" |
|
|
|
if compute_hours > breakeven_hours: |
|
breakeven += "<p>You're past the breakeven point - GCP hardware is more cost-effective than API usage.</p>" |
|
else: |
|
breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than GCP hardware.</p>" |
|
|
|
|
|
fig = px.bar( |
|
pd.DataFrame(results), |
|
x="provider", |
|
y="cost", |
|
color="type", |
|
color_discrete_map={"Cloud": "#3B82F6", "API": "#8B5CF6"}, |
|
title="Monthly Cost Comparison", |
|
labels={"provider": "Provider & Instance", "cost": "Monthly Cost ($)"} |
|
) |
|
|
|
fig.update_layout(height=500) |
|
|
|
|
|
html_output = f""" |
|
<div style="padding: 20px; font-family: Arial, sans-serif;"> |
|
<h2>Cost Comparison Results</h2> |
|
|
|
<div style="margin-bottom: 20px;"> |
|
{aws_results} |
|
</div> |
|
|
|
<div style="margin-bottom: 20px;"> |
|
{gcp_results} |
|
</div> |
|
|
|
<div style="margin-bottom: 20px;"> |
|
{api_results} |
|
</div> |
|
|
|
<div style="margin-bottom: 20px;"> |
|
{recommendation} |
|
</div> |
|
|
|
<div style="margin-bottom: 20px;"> |
|
{breakeven} |
|
</div> |
|
|
|
<div style="margin-bottom: 20px;"> |
|
<h3>Additional Considerations</h3> |
|
<div style="display: flex; gap: 20px;"> |
|
<div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;"> |
|
<h4>Cloud Hardware Pros</h4> |
|
<ul> |
|
<li>Full control over infrastructure and customization</li> |
|
<li>Predictable costs for steady, high-volume workloads</li> |
|
<li>Can run multiple models simultaneously</li> |
|
<li>No token context limitations</li> |
|
<li>Data stays on your infrastructure</li> |
|
</ul> |
|
</div> |
|
<div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;"> |
|
<h4>API Endpoints Pros</h4> |
|
<ul> |
|
<li>No infrastructure management overhead</li> |
|
<li>Pay-per-use model (ideal for sporadic usage)</li> |
|
<li>Instant scalability</li> |
|
<li>No upfront costs or commitment</li> |
|
<li>Automatic updates to newer model versions</li> |
|
</ul> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<div style="background-color: #FEF3C7; padding: 15px; border-radius: 8px; margin-bottom: 20px;"> |
|
<p><strong>Note:</strong> These estimates are based on current pricing as of May 2025 and may vary based on regional pricing differences, discounts, and usage patterns.</p> |
|
</div> |
|
</div> |
|
""" |
|
|
|
return html_output, fig |
|
|
|
|
|
def app_function( |
|
compute_hours, |
|
tokens_per_month, |
|
input_ratio, |
|
api_calls, |
|
model_size, |
|
storage_gb, |
|
batch_size, |
|
reserved_instances, |
|
spot_instances, |
|
multi_year_commitment |
|
): |
|
html_output, fig = generate_cost_comparison( |
|
compute_hours, |
|
tokens_per_month, |
|
input_ratio, |
|
api_calls, |
|
model_size, |
|
storage_gb, |
|
reserved_instances, |
|
spot_instances, |
|
multi_year_commitment |
|
) |
|
|
|
return html_output, fig |
|
|
|
|
|
with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo: |
|
gr.HTML(""" |
|
<div style="text-align: center; margin-bottom: 20px;"> |
|
<h1 style="color: #4F46E5; font-size: 2.5rem;">Cloud Cost Estimator</h1> |
|
<p style="font-size: 1.2rem;">Compare costs between cloud hardware configurations and inference API endpoints</p> |
|
</div> |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.HTML("<h3>Usage Parameters</h3>") |
|
|
|
compute_hours = gr.Slider( |
|
label="Compute Hours per Month", |
|
minimum=1, |
|
maximum=730, |
|
value=100, |
|
info="Number of hours you'll run the model per month" |
|
) |
|
|
|
tokens_per_month = gr.Slider( |
|
label="Tokens Processed per Month (millions)", |
|
minimum=1, |
|
maximum=1000, |
|
value=10, |
|
info="Total number of tokens processed per month in millions" |
|
) |
|
|
|
input_ratio = gr.Slider( |
|
label="Input Token Ratio (%)", |
|
minimum=10, |
|
maximum=90, |
|
value=30, |
|
info="Percentage of total tokens that are input tokens" |
|
) |
|
|
|
api_calls = gr.Slider( |
|
label="API Calls per Month", |
|
minimum=100, |
|
maximum=1000000, |
|
value=10000, |
|
step=100, |
|
info="Number of API calls made per month" |
|
) |
|
|
|
model_size = gr.Dropdown( |
|
label="Model Size", |
|
choices=list(model_sizes.keys()), |
|
value="Medium (13B parameters)", |
|
info="Size of the language model you want to run" |
|
) |
|
|
|
storage_gb = gr.Slider( |
|
label="Storage Required (GB)", |
|
minimum=10, |
|
maximum=1000, |
|
value=100, |
|
info="Amount of storage required for models and data" |
|
) |
|
|
|
batch_size = gr.Slider( |
|
label="Batch Size", |
|
minimum=1, |
|
maximum=64, |
|
value=4, |
|
info="Batch size for inference (affects throughput)" |
|
) |
|
|
|
gr.HTML("<h3>Advanced Options</h3>") |
|
|
|
reserved_instances = gr.Checkbox( |
|
label="Use Reserved Instances", |
|
value=False, |
|
info="Reserved instances offer significant discounts with 1-3 year commitments" |
|
) |
|
|
|
spot_instances = gr.Checkbox( |
|
label="Use Spot/Preemptible Instances", |
|
value=False, |
|
info="Spot instances can be 70-90% cheaper but may be terminated with little notice" |
|
) |
|
|
|
multi_year_commitment = gr.Radio( |
|
label="Commitment Period (if using Reserved Instances)", |
|
choices=["1", "3"], |
|
value="1", |
|
info="Length of reserved instance commitment in years" |
|
) |
|
|
|
submit_button = gr.Button("Calculate Costs", variant="primary") |
|
|
|
with gr.Column(scale=2): |
|
results_html = gr.HTML(label="Results") |
|
plot_output = gr.Plot(label="Cost Comparison") |
|
|
|
submit_button.click( |
|
app_function, |
|
inputs=[ |
|
compute_hours, |
|
tokens_per_month, |
|
input_ratio, |
|
api_calls, |
|
model_size, |
|
storage_gb, |
|
reserved_instances, |
|
spot_instances, |
|
multi_year_commitment |
|
], |
|
outputs=[results_html, plot_output] |
|
) |
|
|
|
gr.HTML(""" |
|
<div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;"> |
|
<h3>Help & Resources</h3> |
|
<p><strong>Cloud Provider Documentation:</strong> |
|
<a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> | |
|
<a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a> |
|
</p> |
|
<p><strong>API Provider Documentation:</strong> |
|
<a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> | |
|
<a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> | |
|
<a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a> |
|
</p> |
|
<p>Made with ❤️ by Cloud Cost Estimator | Data last updated: May 2025</p> |
|
</div> |
|
""") |
|
|
|
demo.launch() |
|
value=False, |
|
info="Spot instances can be 70-90% cheaper but may be terminated with little notice" |
|
) |
|
|
|
multi_year_commitment = gr.Radio( |
|
label="Commitment Period (if using Reserved Instances)", |
|
choices=[1, 3], |
|
value=1, |
|
info="Length of reserved instance commitment in years" |
|
) |
|
|
|
submit_button = gr.Button("Calculate Costs", variant="primary") |
|
|
|
with gr.Column(scale=2): |
|
results_html = gr.HTML(label="Results") |
|
plot_output = gr.Plot(label="Cost Comparison") |
|
|
|
submit_button.click( |
|
app_function, |
|
inputs=[ |
|
compute_hours, |
|
tokens_per_month, |
|
input_ratio, |
|
api_calls, |
|
model_size, |
|
storage_gb, |
|
batch_size, |
|
reserved_instances, |
|
spot_instances, |
|
multi_year_commitment |
|
], |
|
outputs=[results_html, plot_output] |
|
) |
|
|
|
gr.HTML(""" |
|
<div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;"> |
|
<h3>Help & Resources</h3> |
|
<p><strong>Cloud Provider Documentation:</strong> |
|
<a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> | |
|
<a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a> |
|
</p> |
|
<p><strong>API Provider Documentation:</strong> |
|
<a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> | |
|
<a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> | |
|
<a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a> |
|
</p> |
|
<p>Made with ❤️ by Cloud Cost Estimator | Data last updated: May 2025</p> |
|
</div> |
|
""") |
|
|
|
demo.launch() |