Spaces:

delightfulrachel
/

GPUandAPIcostestimator

Sleeping

App Files Files Community

GPUandAPIcostestimator / app.py

delightfulrachel

Update app.py

2997c9b verified 4 months ago

raw

history blame

25.3 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go

	# Initialize pricing data
	# AWS pricing - Instance types and their properties
	aws_instances = {
	"g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
	"g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
	"g5.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA A10G", "hourly_rate": 0.65, "gpu_memory": "24GB"},
	"g5.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA A10G", "hourly_rate": 1.006, "gpu_memory": "24GB"},
	"p3.2xlarge": {"vcpus": 8, "memory": 61, "gpu": "1x NVIDIA V100", "hourly_rate": 3.06, "gpu_memory": "16GB"},
	"p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
	}

	# GCP pricing - Instance types and their properties
	gcp_instances = {
	"a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
	"a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
	"a2-highgpu-4g": {"vcpus": 48, "memory": 340, "gpu": "4x NVIDIA A100", "hourly_rate": 5.86, "gpu_memory": "4x40GB"},
	"n1-standard-4-t4": {"vcpus": 4, "memory": 15, "gpu": "1x NVIDIA T4", "hourly_rate": 0.49, "gpu_memory": "16GB"},
	"n1-standard-8-t4": {"vcpus": 8, "memory": 30, "gpu": "1x NVIDIA T4", "hourly_rate": 0.69, "gpu_memory": "16GB"},
	"g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
	}

	# API pricing - Models and their prices
	api_pricing = {
	"OpenAI": {
	"GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
	"GPT-4o": {"input_per_1M": 5.0, "output_per_1M": 15.0, "token_context": 32768},
	"GPT-4o-mini": {"input_per_1M": 2.5, "output_per_1M": 7.5, "token_context": 32768},
	},
	"TogetherAI": {
	"Llama-3-8B": {"input_per_1M": 0.15, "output_per_1M": 0.15, "token_context": 8192},
	"Llama-3-70B": {"input_per_1M": 0.9, "output_per_1M": 0.9, "token_context": 8192},
	"Llama-2-13B": {"input_per_1M": 0.6, "output_per_1M": 0.6, "token_context": 4096},
	"Llama-2-70B": {"input_per_1M": 2.5, "output_per_1M": 2.5, "token_context": 4096},
	"DeepSeek-Coder-33B": {"input_per_1M": 2.0, "output_per_1M": 2.0, "token_context": 16384},
	},
	"Anthropic": {
	"Claude-3-Opus": {"input_per_1M": 15.0, "output_per_1M": 75.0, "token_context": 200000},
	"Claude-3-Sonnet": {"input_per_1M": 3.0, "output_per_1M": 15.0, "token_context": 200000},
	"Claude-3-Haiku": {"input_per_1M": 0.25, "output_per_1M": 1.25, "token_context": 200000},
	}
	}

	# Model sizes and memory requirements
	model_sizes = {
	"Small (7B parameters)": {"memory_required": 14, "throughput_factor": 1.0},
	"Medium (13B parameters)": {"memory_required": 26, "throughput_factor": 0.7},
	"Large (70B parameters)": {"memory_required": 140, "throughput_factor": 0.3},
	"XL (180B parameters)": {"memory_required": 360, "throughput_factor": 0.15},
	}

	# Calculate costs
	def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
	instance_data = aws_instances[instance]
	base_hourly = instance_data["hourly_rate"]

	# Apply discounts for reservation or spot
	if spot:
	hourly_rate = base_hourly * 0.3 # 70% discount for spot
	elif reserved:
	discount_factors = {1: 0.6, 3: 0.4} # 40% for 1 year, 60% for 3 years
	hourly_rate = base_hourly * discount_factors.get(years, 0.6)
	else:
	hourly_rate = base_hourly

	compute_cost = hourly_rate * hours
	storage_cost = storage * 0.10 # $0.10 per GB for EBS

	return {
	"compute_cost": compute_cost,
	"storage_cost": storage_cost,
	"total_cost": compute_cost + storage_cost,
	"instance_details": instance_data
	}

	def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
	instance_data = gcp_instances[instance]
	base_hourly = instance_data["hourly_rate"]

	# Apply discounts
	if spot:
	hourly_rate = base_hourly * 0.2 # 80% discount for preemptible
	elif reserved:
	discount_factors = {1: 0.7, 3: 0.5} # 30% for 1 year, 50% for 3 years
	hourly_rate = base_hourly * discount_factors.get(years, 0.7)
	else:
	hourly_rate = base_hourly

	compute_cost = hourly_rate * hours
	storage_cost = storage * 0.04 # $0.04 per GB for Standard SSD

	return {
	"compute_cost": compute_cost,
	"storage_cost": storage_cost,
	"total_cost": compute_cost + storage_cost,
	"instance_details": instance_data
	}

	def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
	model_data = api_pricing[provider][model]

	input_cost = (input_tokens * model_data["input_per_1M"]) / 1
	output_cost = (output_tokens * model_data["output_per_1M"]) / 1

	# Add a small cost for API calls for some providers
	api_call_costs = 0
	if provider == "TogetherAI":
	api_call_costs = api_calls * 0.0001 # $0.0001 per request

	total_cost = input_cost + output_cost + api_call_costs

	return {
	"input_cost": input_cost,
	"output_cost": output_cost,
	"api_call_cost": api_call_costs,
	"total_cost": total_cost,
	"model_details": model_data
	}

	# Filter instances based on model size requirements
	def filter_compatible_instances(instances_dict, min_memory_required):
	compatible = {}
	for name, data in instances_dict.items():
	# Parse GPU memory
	memory_str = data["gpu_memory"]

	# Handle multiple GPUs
	if "x" in memory_str and not memory_str.startswith(("1x", "2x", "4x", "8x")):
	# Format: "16GB"
	memory_val = int(memory_str.split("GB")[0])
	elif "x" in memory_str:
	# Format: "8x40GB"
	parts = memory_str.split("x")
	num_gpus = int(parts[0])
	memory_per_gpu = int(parts[1].split("GB")[0])
	memory_val = num_gpus * memory_per_gpu
	else:
	# Format: "40GB"
	memory_val = int(memory_str.split("GB")[0])

	if memory_val >= min_memory_required:
	compatible[name] = data

	return compatible

	def generate_cost_comparison(
	compute_hours,
	tokens_per_month,
	input_ratio,
	api_calls,
	model_size,
	storage_gb,
	reserved_instances,
	spot_instances,
	multi_year_commitment
	):
	# Calculate input and output tokens
	input_tokens = tokens_per_month * (input_ratio / 100)
	output_tokens = tokens_per_month * (1 - (input_ratio / 100))

	# Check model memory requirements
	min_memory_required = model_sizes[model_size]["memory_required"]

	# Filter compatible instances
	compatible_aws = filter_compatible_instances(aws_instances, min_memory_required)
	compatible_gcp = filter_compatible_instances(gcp_instances, min_memory_required)

	results = []

	# Generate HTML for AWS options
	if compatible_aws:
	aws_results = "<h3>AWS Compatible Instances</h3>"
	aws_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>"

	best_aws = None
	best_aws_cost = float('inf')

	for instance in compatible_aws:
	cost_result = calculate_aws_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
	total_cost = cost_result["total_cost"]

	if total_cost < best_aws_cost:
	best_aws = instance
	best_aws_cost = total_cost

	aws_results += f"<tr><td>{instance}</td><td>{compatible_aws[instance]['vcpus']}</td><td>{compatible_aws[instance]['memory']}GB</td><td>{compatible_aws[instance]['gpu']}</td><td>${compatible_aws[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>"

	aws_results += "</table>"

	if best_aws:
	best_aws_data = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
	results.append({
	"provider": f"AWS ({best_aws})",
	"cost": best_aws_data["total_cost"],
	"type": "Cloud"
	})
	else:
	aws_results = "<h3>AWS Compatible Instances</h3><p>No compatible AWS instances found for this model size.</p>"
	best_aws = None
	best_aws_cost = float('inf')

	# Generate HTML for GCP options
	if compatible_gcp:
	gcp_results = "<h3>Google Cloud Compatible Instances</h3>"
	gcp_results += "<table width='100%'><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Hourly Rate</th><th>Monthly Cost</th></tr>"

	best_gcp = None
	best_gcp_cost = float('inf')

	for instance in compatible_gcp:
	cost_result = calculate_gcp_cost(instance, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
	total_cost = cost_result["total_cost"]

	if total_cost < best_gcp_cost:
	best_gcp = instance
	best_gcp_cost = total_cost

	gcp_results += f"<tr><td>{instance}</td><td>{compatible_gcp[instance]['vcpus']}</td><td>{compatible_gcp[instance]['memory']}GB</td><td>{compatible_gcp[instance]['gpu']}</td><td>${compatible_gcp[instance]['hourly_rate']:.3f}</td><td>${total_cost:.2f}</td></tr>"

	gcp_results += "</table>"

	if best_gcp:
	best_gcp_data = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, multi_year_commitment)
	results.append({
	"provider": f"GCP ({best_gcp})",
	"cost": best_gcp_data["total_cost"],
	"type": "Cloud"
	})
	else:
	gcp_results = "<h3>Google Cloud Compatible Instances</h3><p>No compatible Google Cloud instances found for this model size.</p>"
	best_gcp = None
	best_gcp_cost = float('inf')

	# Generate HTML for API options
	api_results = "<h3>API Options</h3>"
	api_results += "<table width='100%'><tr><th>Provider</th><th>Model</th><th>Input Cost</th><th>Output Cost</th><th>Total Cost</th><th>Context Length</th></tr>"

	api_costs = {}

	for provider in api_pricing:
	for model in api_pricing[provider]:
	cost_data = calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls)
	api_costs[(provider, model)] = cost_data

	api_results += f"<tr><td>{provider}</td><td>{model}</td><td>${cost_data['input_cost']:.2f}</td><td>${cost_data['output_cost']:.2f}</td><td>${cost_data['total_cost']:.2f}</td><td>{api_pricing[provider][model]['token_context']:,}</td></tr>"

	api_results += "</table>"

	# Find best API option
	best_api = min(api_costs.keys(), key=lambda x: api_costs[x]["total_cost"])
	best_api_cost = api_costs[best_api]

	results.append({
	"provider": f"{best_api[0]} ({best_api[1]})",
	"cost": best_api_cost["total_cost"],
	"type": "API"
	})

	# Create recommendation HTML
	recommendation = "<h3>Recommendation</h3>"

	# Find the cheapest option
	cheapest = min(results, key=lambda x: x["cost"])

	if cheapest["type"] == "API":
	recommendation += f"<p>Based on your usage parameters, the <strong>{cheapest['provider']}</strong> API endpoint is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>"

	# Calculate API vs cloud cost ratio
	cheapest_cloud = None
	for result in results:
	if result["type"] == "Cloud":
	if cheapest_cloud is None or result["cost"] < cheapest_cloud["cost"]:
	cheapest_cloud = result

	if cheapest_cloud:
	ratio = cheapest_cloud["cost"] / cheapest["cost"]
	recommendation += f"<p>This is <strong>{ratio:.1f}x cheaper</strong> than the most affordable cloud option ({cheapest_cloud['provider']}).</p>"
	else:
	recommendation += f"<p>Based on your usage parameters, <strong>{cheapest['provider']}</strong> is the most cost-effective option at <strong>${cheapest['cost']:.2f}/month</strong>.</p>"

	# Find cheapest API
	cheapest_api = None
	for result in results:
	if result["type"] == "API":
	if cheapest_api is None or result["cost"] < cheapest_api["cost"]:
	cheapest_api = result

	if cheapest_api:
	ratio = cheapest_api["cost"] / cheapest["cost"]
	if ratio > 1:
	recommendation += f"<p>This is <strong>{1/ratio:.1f}x cheaper</strong> than the most affordable API option ({cheapest_api['provider']}).</p>"
	else:
	recommendation += f"<p>However, the API option ({cheapest_api['provider']}) is <strong>{ratio:.1f}x cheaper</strong>.</p>"

	# Additional recommendation text
	if tokens_per_month > 100 and cheapest["type"] == "Cloud":
	recommendation += "<p>With your high token volume, cloud hardware becomes more cost-effective despite the higher upfront costs.</p>"
	elif compute_hours < 50 and cheapest["type"] == "API":
	recommendation += "<p>With your low usage hours, API endpoints are more cost-effective as you only pay for what you use.</p>"

	# Create breakeven analysis HTML
	breakeven = "<h3>Breakeven Analysis</h3>"

	if best_aws is not None and best_api_cost["total_cost"] > 0:
	aws_hourly = aws_instances[best_aws]["hourly_rate"]
	breakeven_hours = best_api_cost["total_cost"] / aws_hourly

	breakeven += f"<p>API vs AWS: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>"

	if compute_hours > breakeven_hours:
	breakeven += "<p>You're past the breakeven point - AWS hardware is more cost-effective than API usage.</p>"
	else:
	breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than AWS hardware.</p>"

	if best_gcp is not None and best_api_cost["total_cost"] > 0:
	gcp_hourly = gcp_instances[best_gcp]["hourly_rate"]
	breakeven_hours = best_api_cost["total_cost"] / gcp_hourly

	breakeven += f"<p>API vs GCP: <strong>{breakeven_hours:.1f} hours</strong> is the breakeven point.</p>"

	if compute_hours > breakeven_hours:
	breakeven += "<p>You're past the breakeven point - GCP hardware is more cost-effective than API usage.</p>"
	else:
	breakeven += "<p>You're below the breakeven point - API usage is more cost-effective than GCP hardware.</p>"

	# Generate cost comparison chart
	fig = px.bar(
	pd.DataFrame(results),
	x="provider",
	y="cost",
	color="type",
	color_discrete_map={"Cloud": "#3B82F6", "API": "#8B5CF6"},
	title="Monthly Cost Comparison",
	labels={"provider": "Provider & Instance", "cost": "Monthly Cost ($)"}
	)

	fig.update_layout(height=500)

	# Create HTML structure for the results
	html_output = f"""
	<div style="padding: 20px; font-family: Arial, sans-serif;">
	<h2>Cost Comparison Results</h2>

	<div style="margin-bottom: 20px;">
	{aws_results}
	</div>

	<div style="margin-bottom: 20px;">
	{gcp_results}
	</div>

	<div style="margin-bottom: 20px;">
	{api_results}
	</div>

	<div style="margin-bottom: 20px;">
	{recommendation}
	</div>

	<div style="margin-bottom: 20px;">
	{breakeven}
	</div>

	<div style="margin-bottom: 20px;">
	<h3>Additional Considerations</h3>
	<div style="display: flex; gap: 20px;">
	<div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;">
	<h4>Cloud Hardware Pros</h4>
	<ul>
	<li>Full control over infrastructure and customization</li>
	<li>Predictable costs for steady, high-volume workloads</li>
	<li>Can run multiple models simultaneously</li>
	<li>No token context limitations</li>
	<li>Data stays on your infrastructure</li>
	</ul>
	</div>
	<div style="flex: 1; background-color: #F3F4F6; padding: 15px; border-radius: 8px;">
	<h4>API Endpoints Pros</h4>
	<ul>
	<li>No infrastructure management overhead</li>
	<li>Pay-per-use model (ideal for sporadic usage)</li>
	<li>Instant scalability</li>
	<li>No upfront costs or commitment</li>
	<li>Automatic updates to newer model versions</li>
	</ul>
	</div>
	</div>
	</div>

	<div style="background-color: #FEF3C7; padding: 15px; border-radius: 8px; margin-bottom: 20px;">
	<p><strong>Note:</strong> These estimates are based on current pricing as of May 2025 and may vary based on regional pricing differences, discounts, and usage patterns.</p>
	</div>
	</div>
	"""

	return html_output, fig

	# Main app function
	def app_function(
	compute_hours,
	tokens_per_month,
	input_ratio,
	api_calls,
	model_size,
	storage_gb,
	batch_size,
	reserved_instances,
	spot_instances,
	multi_year_commitment
	):
	html_output, fig = generate_cost_comparison(
	compute_hours,
	tokens_per_month,
	input_ratio,
	api_calls,
	model_size,
	storage_gb,
	reserved_instances,
	spot_instances,
	multi_year_commitment
	)

	return html_output, fig

	# Define the Gradio interface
	with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 20px;">
	<h1 style="color: #4F46E5; font-size: 2.5rem;">Cloud Cost Estimator</h1>
	<p style="font-size: 1.2rem;">Compare costs between cloud hardware configurations and inference API endpoints</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("<h3>Usage Parameters</h3>")

	compute_hours = gr.Slider(
	label="Compute Hours per Month",
	minimum=1,
	maximum=730,
	value=100,
	info="Number of hours you'll run the model per month"
	)

	tokens_per_month = gr.Slider(
	label="Tokens Processed per Month (millions)",
	minimum=1,
	maximum=1000,
	value=10,
	info="Total number of tokens processed per month in millions"
	)

	input_ratio = gr.Slider(
	label="Input Token Ratio (%)",
	minimum=10,
	maximum=90,
	value=30,
	info="Percentage of total tokens that are input tokens"
	)

	api_calls = gr.Slider(
	label="API Calls per Month",
	minimum=100,
	maximum=1000000,
	value=10000,
	step=100,
	info="Number of API calls made per month"
	)

	model_size = gr.Dropdown(
	label="Model Size",
	choices=list(model_sizes.keys()),
	value="Medium (13B parameters)",
	info="Size of the language model you want to run"
	)

	storage_gb = gr.Slider(
	label="Storage Required (GB)",
	minimum=10,
	maximum=1000,
	value=100,
	info="Amount of storage required for models and data"
	)

	batch_size = gr.Slider(
	label="Batch Size",
	minimum=1,
	maximum=64,
	value=4,
	info="Batch size for inference (affects throughput)"
	)

	gr.HTML("<h3>Advanced Options</h3>")

	reserved_instances = gr.Checkbox(
	label="Use Reserved Instances",
	value=False,
	info="Reserved instances offer significant discounts with 1-3 year commitments"
	)

	spot_instances = gr.Checkbox(
	label="Use Spot/Preemptible Instances",
	value=False,
	info="Spot instances can be 70-90% cheaper but may be terminated with little notice"
	)

	multi_year_commitment = gr.Radio(
	label="Commitment Period (if using Reserved Instances)",
	choices=["1", "3"],
	value="1",
	info="Length of reserved instance commitment in years"
	)

	submit_button = gr.Button("Calculate Costs", variant="primary")

	with gr.Column(scale=2):
	results_html = gr.HTML(label="Results")
	plot_output = gr.Plot(label="Cost Comparison")

	submit_button.click(
	app_function,
	inputs=[
	compute_hours,
	tokens_per_month,
	input_ratio,
	api_calls,
	model_size,
	storage_gb,
	reserved_instances,
	spot_instances,
	multi_year_commitment
	],
	outputs=[results_html, plot_output]
	)

	gr.HTML("""
	<div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;">
	<h3>Help & Resources</h3>
	<p><strong>Cloud Provider Documentation:</strong>
	<a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> \|
	<a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a>
	</p>
	<p><strong>API Provider Documentation:</strong>
	<a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> \|
	<a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> \|
	<a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a>
	</p>
	<p>Made with ❤️ by Cloud Cost Estimator \| Data last updated: May 2025</p>
	</div>
	""")

	demo.launch()
	value=False,
	info="Spot instances can be 70-90% cheaper but may be terminated with little notice"
	)

	multi_year_commitment = gr.Radio(
	label="Commitment Period (if using Reserved Instances)",
	choices=[1, 3],
	value=1,
	info="Length of reserved instance commitment in years"
	)

	submit_button = gr.Button("Calculate Costs", variant="primary")

	with gr.Column(scale=2):
	results_html = gr.HTML(label="Results")
	plot_output = gr.Plot(label="Cost Comparison")

	submit_button.click(
	app_function,
	inputs=[
	compute_hours,
	tokens_per_month,
	input_ratio,
	api_calls,
	model_size,
	storage_gb,
	batch_size,
	reserved_instances,
	spot_instances,
	multi_year_commitment
	],
	outputs=[results_html, plot_output]
	)

	gr.HTML("""
	<div style="margin-top: 30px; border-top: 1px solid #e5e7eb; padding-top: 20px;">
	<h3>Help & Resources</h3>
	<p><strong>Cloud Provider Documentation:</strong>
	<a href="https://aws.amazon.com/ec2/pricing/" target="_blank">AWS EC2 Pricing</a> \|
	<a href="https://cloud.google.com/compute/pricing" target="_blank">GCP Compute Engine Pricing</a>
	</p>
	<p><strong>API Provider Documentation:</strong>
	<a href="https://openai.com/pricing" target="_blank">OpenAI API Pricing</a> \|
	<a href="https://www.anthropic.com/api" target="_blank">Anthropic Claude API Pricing</a> \|
	<a href="https://www.together.ai/pricing" target="_blank">TogetherAI API Pricing</a>
	</p>
	<p>Made with ❤️ by Cloud Cost Estimator \| Data last updated: May 2025</p>
	</div>
	""")

	demo.launch()