import gradio as gr import pandas as pd import numpy as np import plotly.express as px import plotly.graph_objects as go # Updated pricing data - restructured for better comparison aws_instances = { # T4 GPU Instances (entry level) "g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB", "tier": "Entry"}, "g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB", "tier": "Entry"}, # A10G GPU Instances (mid-tier) "g5.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA A10G", "hourly_rate": 0.65, "gpu_memory": "24GB", "tier": "Mid"}, "g5.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA A10G", "hourly_rate": 1.006, "gpu_memory": "24GB", "tier": "Mid"}, # V100 GPU Instances (high-tier) "p3.2xlarge": {"vcpus": 8, "memory": 61, "gpu": "1x NVIDIA V100", "hourly_rate": 3.06, "gpu_memory": "16GB", "tier": "High"}, # Added comparable instances to match GCP "p4d.xlarge": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 4.10, "gpu_memory": "40GB", "tier": "Premium"}, "p4d.2xlarge": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 8.20, "gpu_memory": "2x40GB", "tier": "Premium"}, "p4d.4xlarge": {"vcpus": 48, "memory": 340, "gpu": "4x NVIDIA A100", "hourly_rate": 16.40, "gpu_memory": "4x40GB", "tier": "Premium"}, } gcp_instances = { # T4 GPU Instances (entry level) "n1-standard-4-t4": {"vcpus": 4, "memory": 15, "gpu": "1x NVIDIA T4", "hourly_rate": 0.49, "gpu_memory": "16GB", "tier": "Entry"}, "n1-standard-8-t4": {"vcpus": 8, "memory": 30, "gpu": "1x NVIDIA T4", "hourly_rate": 0.69, "gpu_memory": "16GB", "tier": "Entry"}, # L4 GPU Instances (mid-tier) "g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB", "tier": "Mid"}, "g2-standard-8": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA L4", "hourly_rate": 0.89, "gpu_memory": "24GB", "tier": "Mid"}, # Added comparable V100 instance "n1-standard-8-v100": {"vcpus": 8, "memory": 60, "gpu": "1x NVIDIA V100", "hourly_rate": 2.95, "gpu_memory": "16GB", "tier": "High"}, # A100 GPU Instances (premium) "a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB", "tier": "Premium"}, "a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB", "tier": "Premium"}, "a2-highgpu-4g": {"vcpus": 48, "memory": 340, "gpu": "4x NVIDIA A100", "hourly_rate": 5.86, "gpu_memory": "4x40GB", "tier": "Premium"}, } api_pricing = { "OpenAI": { "GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385}, "GPT-4o": {"input_per_1M": 5.0, "output_per_1M": 15.0, "token_context": 32768}, "GPT-4o-mini": {"input_per_1M": 2.5, "output_per_1M": 7.5, "token_context": 32768}, }, "TogetherAI": { "Llama-3-8B": {"input_per_1M": 0.15, "output_per_1M": 0.15, "token_context": 8192}, "Llama-3-70B": {"input_per_1M": 0.9, "output_per_1M": 0.9, "token_context": 8192}, "Llama-2-13B": {"input_per_1M": 0.6, "output_per_1M": 0.6, "token_context": 4096}, "Llama-2-70B": {"input_per_1M": 2.5, "output_per_1M": 2.5, "token_context": 4096}, "DeepSeek-Coder-33B": {"input_per_1M": 2.0, "output_per_1M": 2.0, "token_context": 16384}, }, "Anthropic": { "Claude-3-Opus": {"input_per_1M": 15.0, "output_per_1M": 75.0, "token_context": 200000}, "Claude-3-Sonnet": {"input_per_1M": 3.0, "output_per_1M": 15.0, "token_context": 200000}, "Claude-3-Haiku": {"input_per_1M": 0.25, "output_per_1M": 1.25, "token_context": 200000}, } } model_sizes = { "Small (7B parameters)": {"memory_required": 14}, "Medium (13B parameters)": {"memory_required": 26}, "Large (70B parameters)": {"memory_required": 140}, "XL (180B parameters)": {"memory_required": 360}, } def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1): data = aws_instances[instance] rate = data['hourly_rate'] if spot: rate *= 0.3 elif reserved: factors = {1: 0.6, 3: 0.4} rate *= factors.get(years, 0.6) compute = rate * hours storage_cost = storage * 0.10 return {'total_cost': compute + storage_cost, 'details': data} def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1): data = gcp_instances[instance] rate = data['hourly_rate'] if spot: rate *= 0.2 elif reserved: factors = {1: 0.7, 3: 0.5} rate *= factors.get(years, 0.7) compute = rate * hours storage_cost = storage * 0.04 return {'total_cost': compute + storage_cost, 'details': data} def calculate_api_cost(provider, model, in_tokens, out_tokens, calls): m = api_pricing[provider][model] input_cost = in_tokens * m['input_per_1M'] / 1000000 output_cost = out_tokens * m['output_per_1M'] / 1000000 call_cost = calls * 0.0001 if provider == 'TogetherAI' else 0 return {'total_cost': input_cost + output_cost + call_cost, 'details': m} def filter_compatible(instances, min_mem): res = {} for name, data in instances.items(): mem_str = data['gpu_memory'] if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')): val = int(mem_str.replace('GB','')) elif 'x' in mem_str: parts = mem_str.split('x') val = int(parts[0]) * int(parts[1].replace('GB','')) else: val = int(mem_str.replace('GB','')) if val >= min_mem: res[name] = data return res def generate_cost_comparison( compute_hours, tokens_per_month, input_ratio, api_calls, model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment, comparison_tier ): years = int(multi_year_commitment) in_tokens = tokens_per_month * 1000000 * (input_ratio/100) out_tokens = tokens_per_month * 1000000 - in_tokens min_mem = model_sizes[model_size]['memory_required'] # Filter by both memory requirements and tier if a tier is selected aws_comp = filter_compatible(aws_instances, min_mem) gcp_comp = filter_compatible(gcp_instances, min_mem) if comparison_tier != "All": aws_comp = {k: v for k, v in aws_comp.items() if v.get('tier', '') == comparison_tier} gcp_comp = {k: v for k, v in gcp_comp.items() if v.get('tier', '') == comparison_tier} results = [] # AWS table aws_html = '

AWS Instances

' aws_html += '' if aws_comp: for inst in aws_comp: res = calculate_aws_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years) aws_html += f'' # best AWS best_aws = min(aws_comp, key=lambda x: calculate_aws_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']) best_aws_cost = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'] best_aws_tier = aws_instances[best_aws].get('tier', '') results.append({'provider': f'AWS ({best_aws})', 'cost': best_aws_cost, 'type': 'Cloud', 'tier': best_aws_tier}) else: aws_html += '' aws_html += '
InstancevCPUsMemoryGPUTierMonthly Cost ($)
{inst}{res["details"]["vcpus"]}{res["details"]["memory"]}GB{res["details"]["gpu"]}{res["details"].get("tier", "")}${res["total_cost"]:.2f}
No compatible instances
' # GCP table gcp_html = '

GCP Instances

' gcp_html += '' if gcp_comp: for inst in gcp_comp: res = calculate_gcp_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years) gcp_html += f'' best_gcp = min(gcp_comp, key=lambda x: calculate_gcp_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']) best_gcp_cost = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'] best_gcp_tier = gcp_instances[best_gcp].get('tier', '') results.append({'provider': f'GCP ({best_gcp})', 'cost': best_gcp_cost, 'type': 'Cloud', 'tier': best_gcp_tier}) else: gcp_html += '' gcp_html += '
InstancevCPUsMemoryGPUTierMonthly Cost ($)
{inst}{res["details"]["vcpus"]}{res["details"]["memory"]}GB{res["details"]["gpu"]}{res["details"].get("tier", "")}${res["total_cost"]:.2f}
No compatible instances
' # API table api_html = '

API Options

' api_html += '' api_costs = {} for prov in api_pricing: for mdl in api_pricing[prov]: res = calculate_api_cost(prov, mdl, in_tokens, out_tokens, api_calls) details = api_pricing[prov][mdl] api_html += f'' api_costs[(prov, mdl)] = res['total_cost'] api_html += '
ProviderModelInput CostOutput CostTotal Cost ($)Context
{prov}{mdl}${in_tokens * details["input_per_1M"] / 1000000:.2f}${out_tokens * details["output_per_1M"] / 1000000:.2f}${res["total_cost"]:.2f}{details["token_context"]:,}
' if api_costs: best_api = min(api_costs, key=api_costs.get) results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_costs[best_api], 'type': 'API', 'tier': 'API'}) # Direct comparison tables for similar instances direct_comparison_html = "" if comparison_tier != "All" and comparison_tier != "API": direct_comparison_html = f'

Direct {comparison_tier} Tier Comparison

' direct_comparison_html += '' aws_filtered = {k: v for k, v in aws_instances.items() if v.get('tier', '') == comparison_tier} gcp_filtered = {k: v for k, v in gcp_instances.items() if v.get('tier', '') == comparison_tier} # Group by vCPU for comparison vcpu_groups = {} for inst, data in aws_filtered.items(): vcpu = data['vcpus'] if vcpu not in vcpu_groups: vcpu_groups[vcpu] = {'aws': [], 'gcp': []} vcpu_groups[vcpu]['aws'].append(inst) for inst, data in gcp_filtered.items(): vcpu = data['vcpus'] if vcpu not in vcpu_groups: vcpu_groups[vcpu] = {'aws': [], 'gcp': []} vcpu_groups[vcpu]['gcp'].append(inst) # Display direct comparisons for vcpu in sorted(vcpu_groups.keys()): group = vcpu_groups[vcpu] for aws_inst in group['aws']: aws_cost = calculate_aws_cost(aws_inst, compute_hours, storage_gb, reserved_instances, spot_instances, years) aws_data = aws_cost['details'] direct_comparison_html += f'' for gcp_inst in group['gcp']: gcp_cost = calculate_gcp_cost(gcp_inst, compute_hours, storage_gb, reserved_instances, spot_instances, years) gcp_data = gcp_cost['details'] direct_comparison_html += f'' # Add separator between different vCPU groups if vcpu != sorted(vcpu_groups.keys())[-1]: direct_comparison_html += '' direct_comparison_html += '
ProviderInstancevCPUsMemoryGPUMonthly Cost ($)
AWS{aws_inst}{aws_data["vcpus"]}{aws_data["memory"]}GB{aws_data["gpu"]}${aws_cost["total_cost"]:.2f}
GCP{gcp_inst}{gcp_data["vcpus"]}{gcp_data["memory"]}GB{gcp_data["gpu"]}${gcp_cost["total_cost"]:.2f}
' # Chart with annotations df = pd.DataFrame(results) colors = {'Entry': '#66BB6A', 'Mid': '#42A5F5', 'High': '#FFA726', 'Premium': '#EF5350', 'API': '#AB47BC'} # Create figure using plotly graph objects for more control fig = go.Figure() # Add bars for i, row in df.iterrows(): tier_color = colors.get(row.get('tier', 'API'), '#9E9E9E') fig.add_trace(go.Bar( x=[row['provider']], y=[row['cost']], name=row['provider'], marker_color=tier_color )) # Add annotations on top of each bar for i, row in df.iterrows(): fig.add_annotation( x=row['provider'], y=row['cost'], text=f"${row['cost']:.2f}", showarrow=False, yshift=10, # Position above the bar font=dict(size=14) ) # Update layout fig.update_layout( showlegend=False, height=500, yaxis=dict(title='Monthly Cost ($)', tickprefix='$'), xaxis=dict(title=''), title='Cost Comparison' ) html = f"""
{direct_comparison_html} {aws_html} {gcp_html} {api_html}
""" return html, fig # UI setup with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo: gr.HTML('

Cloud Cost Estimator

') with gr.Row(): with gr.Column(scale=1): compute_hours = gr.Slider(label="Compute Hours per Month", minimum=1, maximum=300, value=50) tokens_per_month = gr.Slider(label="Tokens per Month (M)", minimum=1, maximum=200, value=5) input_ratio = gr.Slider(label="Input Ratio (%)", minimum=10, maximum=70, value=25) api_calls = gr.Slider(label="API Calls per Month", minimum=100, maximum=100000, value=5000, step=100) model_size = gr.Dropdown(label="Model Size", choices=list(model_sizes.keys()), value="Medium (13B parameters)") storage_gb = gr.Slider(label="Storage (GB)", minimum=10, maximum=1000, value=100) comparison_tier = gr.Radio(label="Comparison Tier", choices=["All", "Entry", "Mid", "High", "Premium", "API"], value="All") reserved_instances = gr.Checkbox(label="Reserved Instances", value=False) spot_instances = gr.Checkbox(label="Spot Instances", value=False) multi_year_commitment = gr.Radio(label="Commitment Period (years)", choices=["1","3"], value="1") with gr.Column(scale=2): out_html = gr.HTML() out_plot = gr.Plot() # Create inputs list for the function inputs = [compute_hours, tokens_per_month, input_ratio, api_calls, model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment, comparison_tier] outputs = [out_html, out_plot] # Initial calculation on load demo.load(generate_cost_comparison, inputs, outputs) # Update on each input change for input_component in inputs: input_component.change(generate_cost_comparison, inputs, outputs) demo.launch()