Spaces:

delightfulrachel
/

GPUandAPIcostestimator

Sleeping

App Files Files Community

delightfulrachel commited on May 14

Commit

29a68b1

verified ·

1 Parent(s): e26b1a8

Update app.py

Browse files

Files changed (1) hide show

app.py +540 -123

app.py CHANGED Viewed

@@ -2,8 +2,10 @@ import gradio as gr
 import pandas as pd
 import numpy as np
 import plotly.express as px
-# Pricing data
 aws_instances = {
     "g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
     "g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
@@ -13,6 +15,7 @@ aws_instances = {
     "p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
 }
 gcp_instances = {
     "a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
     "a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
@@ -22,6 +25,7 @@ gcp_instances = {
     "g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
 }
 api_pricing = {
     "OpenAI": {
         "GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
@@ -42,151 +46,564 @@ api_pricing = {
     }
 }
 model_sizes = {
-    "Small (7B parameters)": {"memory_required": 14},
-    "Medium (13B parameters)": {"memory_required": 26},
-    "Large (70B parameters)": {"memory_required": 140},
-    "XL (180B parameters)": {"memory_required": 360},
 }
 def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
-    data = aws_instances[instance]
-    rate = data['hourly_rate']
     if spot:
-        rate *= 0.3
     elif reserved:
-        factors = {1: 0.6, 3: 0.4}
-        rate *= factors.get(years, 0.6)
-    compute = rate * hours
-    storage_cost = storage * 0.10
-    return {'total_cost': compute + storage_cost}
 def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
-    data = gcp_instances[instance]
-    rate = data['hourly_rate']
     if spot:
-        rate *= 0.2
     elif reserved:
-        factors = {1: 0.7, 3: 0.5}
-        rate *= factors.get(years, 0.7)
-    compute = rate * hours
-    storage_cost = storage * 0.04
-    return {'total_cost': compute + storage_cost}
 def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
-    m = api_pricing[provider][model]
-    input_cost = input_tokens * m['input_per_1M']
-    output_cost = output_tokens * m['output_per_1M']
-    call_cost = api_calls * 0.0001 if provider == 'TogetherAI' else 0
-    return {'total_cost': input_cost + output_cost + call_cost}
-def filter_compatible(instances, min_mem):
-    res = {}
-    for name, data in instances.items():
-        mem_str = data['gpu_memory']
-        if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
-            val = int(mem_str.replace('GB',''))
-        elif 'x' in mem_str:
-            parts = mem_str.split('x')
-            val = int(parts[0]) * int(parts[1].replace('GB',''))
         else:
-            val = int(mem_str.replace('GB',''))
-        if val >= min_mem:
-            res[name] = data
-    return res
 def generate_cost_comparison(
-    compute_hours, tokens_per_month, input_ratio, api_calls,
-    model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 ):
-    years = int(multi_year_commitment)
-    in_tokens = tokens_per_month * (input_ratio / 100)
-    out_tokens = tokens_per_month - in_tokens
-    min_mem = model_sizes[model_size]['memory_required']
-    aws_comp = filter_compatible(aws_instances, min_mem)
-    gcp_comp = filter_compatible(gcp_instances, min_mem)
     results = []
-    # AWS
-    if aws_comp:
-        best_aws = min(aws_comp.keys(), key=lambda x: calculate_aws_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
-        best_aws_cost = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
-        results.append({'provider': f'AWS ({best_aws})', 'cost': best_aws_cost, 'type': 'Cloud'})
-    # GCP
-    if gcp_comp:
-        best_gcp = min(gcp_comp.keys(), key=lambda x: calculate_gcp_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
-        best_gcp_cost = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
-        results.append({'provider': f'GCP ({best_gcp})', 'cost': best_gcp_cost, 'type': 'Cloud'})
-    # API (TogetherAI only)
-    api_opts = { (prov, m): calculate_api_cost(prov, m, in_tokens, out_tokens, api_calls)['total_cost']
-                 for prov in api_pricing for m in api_pricing[prov] }
-    best_api = min(api_opts, key=api_opts.get)
-    results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_opts[best_api], 'type': 'API'})
-    # Build bar chart
-    df_res = pd.DataFrame(results)
-    aws_name = df_res[df_res['type']=='Cloud']['provider'].iloc[0]
-    gcp_name = df_res[df_res['type']=='Cloud']['provider'].iloc[1]
-    api_name = df_res[df_res['type']=='API']['provider'].iloc[0]
     fig = px.bar(
-        df_res, x='provider', y='cost', color='provider',
-        color_discrete_map={
-            aws_name: '#FF9900',  # AWS orange
-            gcp_name: '#4285F4',  # GCP blue
-            api_name: '#D62828'   # TogetherAI red
-        },
-        title='Monthly Cost Comparison',
-        labels={'provider': 'Provider', 'cost': 'Monthly Cost'}
     )
-    fig.update_yaxes(tickprefix='$')
-    fig.update_layout(showlegend=False, height=500)
-    # HTML summary tables omitted for brevity
-    html_tables = '<div>'
-    # ... you can reinsert your HTML tables here if needed
-    html_tables += '</div>'
-    return html_tables, fig
 def app_function(
-    compute_hours, tokens_per_month, input_ratio, api_calls,
-    model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
 ):
-    return generate_cost_comparison(
-        compute_hours, tokens_per_month, input_ratio, api_calls,
-        model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
     )
-# Gradio UI
-def main():
-    with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
-        gr.HTML("""
-        <div style="text-align:center; margin-bottom:20px;">
-            <h1>Cloud Cost Estimator</h1>
-            <p>Compare cloud vs API costs</p>
-        </div>
-        """)
-        with gr.Row():
-            with gr.Column(scale=1):
-                compute_hours = gr.Slider("Compute Hours per Month", 1, 730, 100)
-                tokens_per_month = gr.Slider("Tokens per Month (M)", 1, 1000, 10)
-                input_ratio = gr.Slider("Input Ratio (%)", 10, 90, 30)
-                api_calls = gr.Slider("API Calls per Month", 100, 1_000_000, 10000, step=100)
-                model_size = gr.Dropdown(list(model_sizes.keys()), value="Medium (13B parameters)")
-                storage_gb = gr.Slider("Storage (GB)", 10, 1000, 100)
-                reserved_instances = gr.Checkbox("Reserved Instances", value=False)
-                spot_instances = gr.Checkbox("Spot Instances", value=False)
-                multi_year_commitment = gr.Radio(["1","3"], value="1")
-                submit = gr.Button("Calculate Costs")
-            with gr.Column(scale=2):
-                out_html = gr.HTML()
-                out_plot = gr.Plot()
-        submit.click(app_function,
-                     inputs=[compute_hours, tokens_per_month, input_ratio, api_calls,
-                             model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment],
-                     outputs=[out_html, out_plot])
-        demo.launch()
-if __name__ == "__main__":
-    main()

 import pandas as pd
 import numpy as np
 import plotly.express as px
+import plotly.graph_objects as go
+# Initialize pricing data
+# AWS pricing - Instance types and their properties
 aws_instances = {
     "g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
     "g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
     "p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
 }
+# GCP pricing - Instance types and their properties
 gcp_instances = {
     "a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
     "a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
     "g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
 }
+# API pricing - Models and their prices
 api_pricing = {
     "OpenAI": {
         "GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
     }
 }
+# Model sizes and memory requirements
 model_sizes = {
+    "Small (7B parameters)": {"memory_required": 14, "throughput_factor": 1.0},
+    "Medium (13B parameters)": {"memory_required": 26, "throughput_factor": 0.7},
+    "Large (70B parameters)": {"memory_required": 140, "throughput_factor": 0.3},
+    "XL (180B parameters)": {"memory_required": 360, "throughput_factor": 0.15},
 }
+# Calculate costs
 def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
+    instance_data = aws_instances[instance]
+    base_hourly = instance_data["hourly_rate"]
+    # Apply discounts for reservation or spot
     if spot:
+        hourly_rate = base_hourly * 0.3  # 70% discount for spot
     elif reserved:
+        discount_factors = {1: 0.6, 3: 0.4}  # 40% for 1 year, 60% for 3 years
+        hourly_rate = base_hourly * discount_factors.get(years, 0.6)
+    else:
+        hourly_rate = base_hourly
+    compute_cost = hourly_rate * hours
+    storage_cost = storage * 0.10  # $0.10 per GB for EBS
+    return {
+        "compute_cost": compute_cost,
+        "storage_cost": storage_cost,
+        "total_cost": compute_cost + storage_cost,
+        "instance_details": instance_data
+    }
 def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
+    instance_data = gcp_instances[instance]
+    base_hourly = instance_data["hourly_rate"]
+    # Apply discounts
     if spot:
+        hourly_rate = base_hourly * 0.2  # 80% discount for preemptible
     elif reserved:
+        discount_factors = {1: 0.7, 3: 0.5}  # 30% for 1 year, 50% for 3 years
+        hourly_rate = base_hourly * discount_factors.get(years, 0.7)
+    else:
+        hourly_rate = base_hourly
+    compute_cost = hourly_rate * hours
+    storage_cost = storage * 0.04  # $0.04 per GB for Standard SSD
+    return {
+        "compute_cost": compute_cost,
+        "storage_cost": storage_cost,
+        "total_cost": compute_cost + storage_cost,
+        "instance_details": instance_data
+    }
 def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
+    model_data = api_pricing[provider][model]
+    input_cost = (input_tokens * model_data["input_per_1M"]) / 1
+    output_cost = (output_tokens * model_data["output_per_1M"]) / 1
+    # Add a small cost for API calls for some providers
+    api_call_costs = 0
+    if provider == "TogetherAI":
+        api_call_costs = api_calls * 0.0001  # $0.0001 per request
+    total_cost = input_cost + output_cost + api_call_costs
+    return {
+        "input_cost": input_cost,
+        "output_cost": output_cost,
+        "api_call_cost": api_call_costs,
+        "total_cost": total_cost,
+        "model_details": model_data
+    }
+# Filter instances based on model size requirements
+def filter_compatible_instances(instances_dict, min_memory_required):
+    compatible = {}
+    for name, data in instances_dict.items():
+        # Parse GPU memory
+        memory_str = data["gpu_memory"]
+        # Handle multiple GPUs
+        if "x" in memory_str and not memory_str.startswith(("1x", "2x", "4x", "8x")):
+            # Format: "16GB"
+            memory_val = int(memory_str.split("GB")[0])
+        elif "x" in memory_str:
+            # Format: "8x40GB"
+            parts = memory_str.split("x")
+            num_gpus = int(parts[0])
+            memory_per_gpu = int(parts[1].split("GB")[0])
+            memory_val = num_gpus * memory_per_gpu
         else:
+            # Format: "40GB"
+            memory_val = int(memory_str.split("GB")[0])
+        if memory_val >= min_memory_required:
+            compatible[name] = data
+    return compatible
 def generate_cost_comparison(
+    compute_hours,
+    tokens_per_month,
+    input_ratio,
+    api_calls,
+    model_size,
+    storage_gb,
+    reserved_instances,
+    spot_instances,
+    multi_year_commitment
 ):
+    # Calculate input and output tokens
+    input_tokens = tokens_per_month * (input_ratio / 100)
+    output_tokens = tokens_per_month * (1 - (input_ratio / 100))
+    # Check model memory requirements
+    min_memory_required = model_sizes[model_size]["memory_required"]
+    # Filter compatible instances
+    compatible_aws = filter_compatible_instances(aws_instances, min_memory_required)
+    compatible_gcp = filter_compatible_instances(gcp_instances, min_memory_required)
     results = []
+    # Generate HTML for AWS options
+    if compatible_aws:
+        aws_results = "<h3>AWS Compatible Instances</h3>"
+        aws_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>"
+        best_aws = None
+        best_aws_cost = float('inf')
+        for instance in compatible_aws:
+            cost_result = calculate_aws_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
+            total_cost = cost_result["total_cost"]
+            if total_cost < best_aws_cost:
+                best_aws = instance
+                best_aws_cost = total_cost
+            aws_results += f"<tr><td>{instance}</td><td>{compatible_aws[instance]['vcpus']}</td><td>{compatible_aws[instance]['memory']}GB</td><td>{compatible_aws[instance]['gpu']}</td><td>${compatible_aws[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>"
+        aws_results += "</table>"
+        if best_aws:
+            best_aws_data = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
+            results.append({
+                "provider": f"AWS ({best_aws})",
+                "cost": best_aws_data["total_cost"],
+                "type": "Cloud"
+            })
+    else:
+        aws_results = "<h3>AWS Compatible Instances</h3><p>No compatible AWS instances found for this model size.</p>"
+        best_aws = None
+        best_aws_cost = float('inf')
+    # Generate HTML for GCP options
+    if compatible_gcp:
+        gcp_results = "<h3>Google Cloud Compatible Instances</h3>"
+        gcp_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>"
+        best_gcp = None
+        best_gcp_cost = float('inf')
+        for instance in compatible_gcp:
+            cost_result = calculate_gcp_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
+            total_cost = cost_result["total_cost"]
+            if total_cost < best_gcp_cost:
+                best_gcp = instance
+                best_gcp_cost = total_cost
+            gcp_results += f"<tr><td>{instance}</td><td>{compatible_gcp[instance]['vcpus']}</td><td>{compatible_gcp[instance]['memory']}GB</td><td>{compatible_gcp[instance]['gpu']}</td><td>${compatible_gcp[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>"
+        gcp_results += "</table>"
+        if best_gcp:
+            best_gcp_data = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
+            results.append({
+                "provider": f"GCP ({best_gcp})",
+                "cost": best_gcp_data["total_cost"],
+                "type": "Cloud"
+            })
+    else:
+        gcp_results = "<h3>Google Cloud Compatible Instances</h3><p>No compatible Google Cloud instances found for this model size.</p>"
+        best_gcp = None
+        best_gcp_cost = float('inf')
+    # Generate HTML for API options
+    api_results = "<h3>API Options</h3>"
+    api_results += "<table width='100%'><tr><th>Provider</th><th>Model</th><th>Input Cost</th><th>Output Cost</th><th>Total Cost</th><th>Context Length</th></tr>"
+    api_costs = {}
+    for provider in api_pricing:
+        for model in api_pricing[provider]:
+            cost_data = calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls)
+            api_costs[(provider, model)] = cost_data
+            api_results += f"<tr><td>{provider}</td><td>{model}</td><td>${cost_data['input_cost']:.2f}</td><td>${cost_data['output_cost']:.2f}</td><td>${cost_data['total_cost']:.2f}</td><td>{api_pricing[provider][model]['token_context']:,}</td></tr>"
+    api_results += "</table>"
+    # Find best API option
+    best_api = min(api_costs.keys(), key=lambda x: api_costs[x]["total_cost"])
+    best_api_cost = api_costs[best_api]
+    results.append({
+        "provider": f"{best_api[0]} ({best_api[1]})",
+        "cost": best_api_cost["total_cost"],
+        "type": "API"
+    })
+    # Create recommendation HTML
+    recommendation = "<h3>Recommendation</h3>"
+    # Find the cheapest option
+    cheapest = min(results, key=lambda x: x["cost"])
+    if cheapest["type"] == "API":
+        recommendation += f"<p>Based on your usage parameters, the <strong>{cheapest['provider']}</strong> API endpoint is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>"
+        # Calculate API vs cloud cost ratio
+        cheapest_cloud = None
+        for result in results:
+            if result["type"] == "Cloud":
+                if cheapest_cloud is None or result["cost"] < cheapest_cloud["cost"]:
+                    cheapest_cloud = result
+        if cheapest_cloud:
+            ratio = cheapest_cloud["cost"] / cheapest["cost"]
+            recommendation += f"<p>This is <strong>{ratio:.1f}x cheaper</strong> than the most affordable cloud option ({cheapest_cloud['provider']}).</p>"
+    else:
+        recommendation += f"<p>Based on your usage parameters, <strong>{cheapest['provider']}</strong> is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>"
+        # Find cheapest API
+        cheapest_api = None
+        for result in results:
+            if result["type"] == "API":
+                if cheapest_api is None or result["cost"] < cheapest_api["cost"]:
+                    cheapest_api = result
+        if cheapest_api:
+            ratio = cheapest_api["cost"] / cheapest["cost"]
+            if ratio > 1:
+                recommendation += f"<p>This is <strong>{1/ratio:.1f}x cheaper</strong> than the most affordable API option ({cheapest_api['provider']}).</p>"
+            else:
+                recommendation += f"<p>However, the API option ({cheapest_api['provider']}) is <strong>{ratio:.1f}x cheaper</strong>.</p>"
+    # Additional recommendation text
+    if tokens_per_month > 100 and cheapest["type"] == "Cloud":
+        recommendation += "<p>With your high token volume, cloud hardware becomes more cost-effective despite the higher upfront costs.</p>"
+    elif compute_hours < 50 and cheapest["type"] == "API":
+        recommendation += "<p>With your low usage hours, API endpoints are more cost-effective as you only pay for what you use.</p>"
+    # Create breakeven analysis HTML
+    breakeven = "<h3>Breakeven Analysis</h3>"
+    if best_aws is not None and best_api_cost["total_cost"] > 0:
+        aws_hourly = aws_instances[best_aws]["hourly_rate"]
+        breakeven_hours = best_api_cost["total_cost"] / aws_hourly
+        breakeven += f"<p>API vs AWS: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>"
+        if compute_hours > breakeven_hours:
+            breakeven += "<p>You're past the breakeven point - AWS hardware is more cost-effective than API usage.</p>"
+        else:
+            breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than AWS hardware.</p>"
+    if best_gcp is not None and best_api_cost["total_cost"] > 0:
+        gcp_hourly = gcp_instances[best_gcp]["hourly_rate"]
+        breakeven_hours = best_api_cost["total_cost"] / gcp_hourly
+        breakeven += f"<p>API vs GCP: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>"
+        if compute_hours > breakeven_hours:
+            breakeven += "<p>You're past the breakeven point - GCP hardware is more cost-effective than API usage.</p>"
+        else:
+            breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than GCP hardware.</p>"
+    # Generate cost comparison chart
     fig = px.bar(
+        pd.DataFrame(results),
+        x="provider",
+        y="cost",
+        color="type",
+        color_discrete_map={"Cloud": "#3B82F6", "API": "#8B5CF6"},
+        title="Monthly Cost Comparison",
+        labels={"provider": "Provider & Instance", "cost": "Monthly Cost ($)"}
     )
+    fig.update_layout(height=500)
+    # Create HTML structure for the results
+    html_output = f"""
+    <div style="padding: 20px; font-family: Arial, sans-serif;">
+        <h2>Cost Comparison Results</h2>
+        <div style="margin-bottom: 20px;">
+            {aws_results}
+        </div>
+        <div style="margin-bottom: 20px;">
+            {gcp_results}
+        </div>
+        <div style="margin-bottom: 20px;">
+            {api_results}
+        </div>
+        <div style="margin-bottom: 20px;">
+            {recommendation}
+        </div>
+        <div style="margin-bottom: 20px;">
+            {breakeven}
+        </div>
+        <div style="margin-bottom: 20px;">
+            <h3>Additional Considerations</h3>
+            <div style="display: flex; gap: 20px;">
+                <div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;">
+                    <h4>Cloud Hardware Pros</h4>
+                    <ul>
+                        <li>Full control over infrastructure and customization</li>
+                        <li>Predictable costs for steady, high-volume workloads</li>
+                        <li>Can run multiple models simultaneously</li>
+                        <li>No token context limitations</li>
+                        <li>Data stays on your infrastructure</li>
+                    </ul>
+                </div>
+                <div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;">
+                    <h4>API Endpoints Pros</h4>
+                    <ul>
+                        <li>No infrastructure management overhead</li>
+                        <li>Pay-per-use model (ideal for sporadic usage)</li>
+                        <li>Instant scalability</li>
+                        <li>No upfront costs or commitment</li>
+                        <li>Automatic updates to newer model versions</li>
+                    </ul>
+                </div>
+            </div>
+        </div>
+        <div style="background-color: #FEF3C7; padding: 15px; border-radius: 8px; margin-bottom: 20px;">
+            <p><strong>Note:</strong> These estimates are based on current pricing as of May 2025 and may vary based on regional pricing differences, discounts, and usage patterns.</p>
+        </div>
+    </div>
+    """
+    return html_output, fig
+# Main app function
 def app_function(
+    compute_hours,
+    tokens_per_month,
+    input_ratio,
+    api_calls,
+    model_size,
+    storage_gb,
+    batch_size,
+    reserved_instances,
+    spot_instances,
+    multi_year_commitment
 ):
+    html_output, fig = generate_cost_comparison(
+        compute_hours,
+        tokens_per_month,
+        input_ratio,
+        api_calls,
+        model_size,
+        storage_gb,
+        reserved_instances,
+        spot_instances,
+        multi_year_commitment
     )
+    return html_output, fig
+# Define the Gradio interface
+with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
+    gr.HTML("""
+    <div style="text-align: center; margin-bottom: 20px;">
+        <h1 style="color: #4F46E5; font-size: 2.5rem;">Cloud Cost Estimator</h1>
+        <p style="font-size: 1.2rem;">Compare costs between cloud hardware configurations and inference API endpoints</p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.HTML("<h3>Usage Parameters</h3>")
+            compute_hours = gr.Slider(
+                label="Compute Hours per Month",
+                minimum=1,
+                maximum=730,
+                value=100,
+                info="Number of hours you'll run the model per month"
+            )
+            tokens_per_month = gr.Slider(
+                label="Tokens Processed per Month (millions)",
+                minimum=1,
+                maximum=1000,
+                value=10,
+                info="Total number of tokens processed per month in millions"
+            )
+            input_ratio = gr.Slider(
+                label="Input Token Ratio (%)",
+                minimum=10,
+                maximum=90,
+                value=30,
+                info="Percentage of total tokens that are input tokens"
+            )
+            api_calls = gr.Slider(
+                label="API Calls per Month",
+                minimum=100,
+                maximum=1000000,
+                value=10000,
+                step=100,
+                info="Number of API calls made per month"
+            )
+            model_size = gr.Dropdown(
+                label="Model Size",
+                choices=list(model_sizes.keys()),
+                value="Medium (13B parameters)",
+                info="Size of the language model you want to run"
+            )
+            storage_gb = gr.Slider(
+                label="Storage Required (GB)",
+                minimum=10,
+                maximum=1000,
+                value=100,
+                info="Amount of storage required for models and data"
+            )
+            batch_size = gr.Slider(
+                label="Batch Size",
+                minimum=1,
+                maximum=64,
+                value=4,
+                info="Batch size for inference (affects throughput)"
+            )
+            gr.HTML("<h3>Advanced Options</h3>")
+            reserved_instances = gr.Checkbox(
+                label="Use Reserved Instances",
+                value=False,
+                info="Reserved instances offer significant discounts with 1-3 year commitments"
+            )
+            spot_instances = gr.Checkbox(
+                label="Use Spot/Preemptible Instances",
+                value=False,
+                info="Spot instances can be 70-90% cheaper but may be terminated with little notice"
+            )
+            multi_year_commitment = gr.Radio(
+                label="Commitment Period (if using Reserved Instances)",
+                choices=["1", "3"],
+                value="1",
+                info="Length of reserved instance commitment in years"
+            )
+            submit_button = gr.Button("Calculate Costs", variant="primary")
+        with gr.Column(scale=2):
+            results_html = gr.HTML(label="Results")
+            plot_output = gr.Plot(label="Cost Comparison")
+    submit_button.click(
+        app_function,
+        inputs=[
+            compute_hours,
+            tokens_per_month,
+            input_ratio,
+            api_calls,
+            model_size,
+            storage_gb,
+            reserved_instances,
+            spot_instances,
+            multi_year_commitment
+        ],
+        outputs=[results_html, plot_output]
+    )
+    gr.HTML("""
+    <div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;">
+        <h3>Help & Resources</h3>
+        <p><strong>Cloud Provider Documentation:</strong>
+            <a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> |
+            <a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a>
+        </p>
+        <p><strong>API Provider Documentation:</strong>
+            <a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> |
+            <a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> |
+            <a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a>
+        </p>
+        <p>Made with ❤️ by Cloud Cost Estimator | Data last updated: May 2025</p>
+    </div>
+    """)
+demo.launch()
+                value=False,
+                info="Spot instances can be 70-90% cheaper but may be terminated with little notice"
+            )
+            multi_year_commitment = gr.Radio(
+                label="Commitment Period (if using Reserved Instances)",
+                choices=[1, 3],
+                value=1,
+                info="Length of reserved instance commitment in years"
+            )
+            submit_button = gr.Button("Calculate Costs", variant="primary")
+        with gr.Column(scale=2):
+            results_html = gr.HTML(label="Results")
+            plot_output = gr.Plot(label="Cost Comparison")
+    submit_button.click(
+        app_function,
+        inputs=[
+            compute_hours,
+            tokens_per_month,
+            input_ratio,
+            api_calls,
+            model_size,
+            storage_gb,
+            batch_size,
+            reserved_instances,
+            spot_instances,
+            multi_year_commitment
+        ],
+        outputs=[results_html, plot_output]
+    )
+    gr.HTML("""
+    <div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;">
+        <h3>Help & Resources</h3>
+        <p><strong>Cloud Provider Documentation:</strong>
+            <a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> |
+            <a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a>
+        </p>
+        <p><strong>API Provider Documentation:</strong>
+            <a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> |
+            <a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> |
+            <a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a>
+        </p>
+        <p>Made with ❤️ by Cloud Cost Estimator | Data last updated: May 2025</p>
+    </div>
+    """)
+demo.launch()