Spaces:

delightfulrachel
/

GPUandAPIcostestimator

Sleeping

App Files Files Community

GPUandAPIcostestimator / app.py

delightfulrachel

Update app.py

b185736 verified 4 months ago

raw

history blame

11.7 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go

	# Pricing data
	aws_instances = {
	"g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
	"g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
	"g5.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA A10G", "hourly_rate": 0.65, "gpu_memory": "24GB"},
	"g5.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA A10G", "hourly_rate": 1.006, "gpu_memory": "24GB"},
	"p3.2xlarge": {"vcpus": 8, "memory": 61, "gpu": "1x NVIDIA V100", "hourly_rate": 3.06, "gpu_memory": "16GB"},
	"p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
	}

	gcp_instances = {
	"a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
	"a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
	"a2-highgpu-4g": {"vcpus": 48, "memory": 340, "gpu": "4x NVIDIA A100", "hourly_rate": 5.86, "gpu_memory": "4x40GB"},
	"n1-standard-4-t4": {"vcpus": 4, "memory": 15, "gpu": "1x NVIDIA T4", "hourly_rate": 0.49, "gpu_memory": "16GB"},
	"n1-standard-8-t4": {"vcpus": 8, "memory": 30, "gpu": "1x NVIDIA T4", "hourly_rate": 0.69, "gpu_memory": "16GB"},
	"g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
	}

	api_pricing = {
	"OpenAI": {
	"GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
	"GPT-4o": {"input_per_1M": 5.0, "output_per_1M": 15.0, "token_context": 32768},
	"GPT-4o-mini": {"input_per_1M": 2.5, "output_per_1M": 7.5, "token_context": 32768},
	},
	"TogetherAI": {
	"Llama-3-8B": {"input_per_1M": 0.15, "output_per_1M": 0.15, "token_context": 8192},
	"Llama-3-70B": {"input_per_1M": 0.9, "output_per_1M": 0.9, "token_context": 8192},
	"Llama-2-13B": {"input_per_1M": 0.6, "output_per_1M": 0.6, "token_context": 4096},
	"Llama-2-70B": {"input_per_1M": 2.5, "output_per_1M": 2.5, "token_context": 4096},
	"DeepSeek-Coder-33B": {"input_per_1M": 2.0, "output_per_1M": 2.0, "token_context": 16384},
	},
	"Anthropic": {
	"Claude-3-Opus": {"input_per_1M": 15.0, "output_per_1M": 75.0, "token_context": 200000},
	"Claude-3-Sonnet": {"input_per_1M": 3.0, "output_per_1M": 15.0, "token_context": 200000},
	"Claude-3-Haiku": {"input_per_1M": 0.25, "output_per_1M": 1.25, "token_context": 200000},
	}
	}

	model_sizes = {
	"Small (7B parameters)": {"memory_required": 14},
	"Medium (13B parameters)": {"memory_required": 26},
	"Large (70B parameters)": {"memory_required": 140},
	"XL (180B parameters)": {"memory_required": 360},
	}

	def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
	data = aws_instances[instance]
	rate = data['hourly_rate']
	if spot:
	rate *= 0.3
	elif reserved:
	factors = {1: 0.6, 3: 0.4}
	rate *= factors.get(years, 0.6)
	compute = rate * hours
	storage_cost = storage * 0.10
	return {'total_cost': compute + storage_cost, 'details': data}

	def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
	data = gcp_instances[instance]
	rate = data['hourly_rate']
	if spot:
	rate *= 0.2
	elif reserved:
	factors = {1: 0.7, 3: 0.5}
	rate *= factors.get(years, 0.7)
	compute = rate * hours
	storage_cost = storage * 0.04
	return {'total_cost': compute + storage_cost, 'details': data}

	def calculate_api_cost(provider, model, in_tokens, out_tokens, calls):
	m = api_pricing[provider][model]
	input_cost = in_tokens * m['input_per_1M']
	output_cost = out_tokens * m['output_per_1M']
	call_cost = calls * 0.0001 if provider == 'TogetherAI' else 0
	return {'total_cost': input_cost + output_cost + call_cost, 'details': m}

	def filter_compatible(instances, min_mem):
	res = {}
	for name, data in instances.items():
	mem_str = data['gpu_memory']
	if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
	val = int(mem_str.replace('GB',''))
	elif 'x' in mem_str:
	parts = mem_str.split('x')
	val = int(parts[0]) * int(parts[1].replace('GB',''))
	else:
	val = int(mem_str.replace('GB',''))
	if val >= min_mem:
	res[name] = data
	return res

	def generate_cost_comparison(
	compute_hours, tokens_per_month, input_ratio, api_calls,
	model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
	):
	years = int(multi_year_commitment)
	in_tokens = tokens_per_month * (input_ratio/100)
	out_tokens = tokens_per_month - in_tokens
	min_mem = model_sizes[model_size]['memory_required']

	aws_comp = filter_compatible(aws_instances, min_mem)
	gcp_comp = filter_compatible(gcp_instances, min_mem)

	results = []

	# AWS table
	aws_html = '<h3>AWS Instances</h3>'
	aws_html += '<table width="100%"><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Monthly Cost ($)</th></tr>'
	if aws_comp:
	for inst in aws_comp:
	res = calculate_aws_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)
	aws_html += f'<tr><td>{inst}</td><td>{res["details"]["vcpus"]}</td><td>{res["details"]["memory"]}GB</td><td>{res["details"]["gpu"]}</td><td>${res["total_cost"]:.2f}</td></tr>'
	# best AWS
	best_aws = min(aws_comp, key=lambda x: calculate_aws_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
	best_aws_cost = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
	results.append({'provider': f'AWS ({best_aws})', 'cost': best_aws_cost, 'type': 'Cloud'})
	else:
	aws_html += '<tr><td colspan="5">No compatible instances</td></tr>'
	aws_html += '</table>'

	# GCP table
	gcp_html = '<h3>GCP Instances</h3>'
	gcp_html += '<table width="100%"><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Monthly Cost ($)</th></tr>'
	if gcp_comp:
	for inst in gcp_comp:
	res = calculate_gcp_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)
	gcp_html += f'<tr><td>{inst}</td><td>{res["details"]["vcpus"]}</td><td>{res["details"]["memory"]}GB</td><td>{res["details"]["gpu" ]}</td><td>${res["total_cost"]:.2f}</td></tr>'
	best_gcp = min(gcp_comp, key=lambda x: calculate_gcp_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
	best_gcp_cost = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
	results.append({'provider': f'GCP ({best_gcp})', 'cost': best_gcp_cost, 'type': 'Cloud'})
	else:
	gcp_html += '<tr><td colspan="5">No compatible instances</td></tr>'
	gcp_html += '</table>'

	# API table
	api_html = '<h3>API Options</h3>'
	api_html += '<table width="100%"><tr><th>Provider</th><th>Model</th><th>Input Cost</th><th>Output Cost</th><th>Total Cost ($)</th><th>Context</th></tr>'
	api_costs = {}
	for prov in api_pricing:
	for mdl in api_pricing[prov]:
	res = calculate_api_cost(prov, mdl, in_tokens, out_tokens, api_calls)
	details = api_pricing[prov][mdl]
	api_html += f'<tr><td>{prov}</td><td>{mdl}</td><td>${in_tokens * details["input_per_1M"]:.2f}</td><td>${out_tokens * details["output_per_1M"]:.2f}</td><td>${res["total_cost"]:.2f}</td><td>{details["token_context"]:,}</td></tr>'
	api_costs[(prov, mdl)] = res['total_cost']
	api_html += '</table>'
	best_api = min(api_costs, key=api_costs.get)
	results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_costs[best_api], 'type': 'API'})

	# Recommendation and Breakeven omitted for brevity

	# Chart with annotations
	df = pd.DataFrame(results)
	colors = {r['provider']: c for r,c in zip(results, ['#FF9900','#4285F4','#D62828'])}

	# Create figure using plotly graph objects for more control
	fig = go.Figure()

	# Add bars
	for i, row in df.iterrows():
	fig.add_trace(go.Bar(
	x=[row['provider']],
	y=[row['cost']],
	name=row['provider'],
	marker_color=colors[row['provider']]
	))

	# Add annotations on top of each bar
	for i, row in df.iterrows():
	fig.add_annotation(
	x=row['provider'],
	y=row['cost'],
	text=f"${row['cost']:.2f}",
	showarrow=False,
	yshift=10, # Position above the bar
	font=dict(size=14)
	)

	# Update layout
	fig.update_layout(
	showlegend=False,
	height=500,
	yaxis=dict(title='Monthly Cost ($)', tickprefix='$'),
	xaxis=dict(title=''),
	title='Cost Comparison'
	)

	html = f"""
	<div style='padding:20px;font-family:Arial;'>
	{aws_html}
	{gcp_html}
	{api_html}
	</div>
	"""
	return html, fig

	# UI setup
	with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
	gr.HTML('<h1 style="text-align:center;">Cloud Cost Estimator</h1>')
	with gr.Row():
	with gr.Column(scale=1):
	compute_hours = gr.Slider(label="Compute Hours per Month", minimum=1, maximum=300, value=50)
	tokens_per_month = gr.Slider(label="Tokens per Month (M)", minimum=1, maximum=200, value=5)
	input_ratio = gr.Slider(label="Input Ratio (%)", minimum=10, maximum=70, value=25)
	api_calls = gr.Slider(label="API Calls per Month", minimum=100, maximum=100000, value=5000, step=100)
	model_size = gr.Dropdown(label="Model Size", choices=list(model_sizes.keys()), value="Medium (13B parameters)")
	storage_gb = gr.Slider(label="Storage (GB)", minimum=10, maximum=1000, value=100)
	reserved_instances = gr.Checkbox(label="Reserved Instances", value=False)
	spot_instances = gr.Checkbox(label="Spot Instances", value=False)
	multi_year_commitment = gr.Radio(label="Commitment Period (years)", choices=["1","3"], value="1")
	with gr.Column(scale=2):
	out_html = gr.HTML()
	out_plot = gr.Plot()

	# Create inputs list for the function
	inputs = [compute_hours, tokens_per_month, input_ratio, api_calls,
	model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment]
	outputs = [out_html, out_plot]

	# Initial calculation on load
	demo.load(generate_cost_comparison, inputs, outputs)

	# Update on each input change
	compute_hours.change(generate_cost_comparison, inputs, outputs)
	tokens_per_month.change(generate_cost_comparison, inputs, outputs)
	input_ratio.change(generate_cost_comparison, inputs, outputs)
	api_calls.change(generate_cost_comparison, inputs, outputs)
	model_size.change(generate_cost_comparison, inputs, outputs)
	storage_gb.change(generate_cost_comparison, inputs, outputs)
	reserved_instances.change(generate_cost_comparison, inputs, outputs)
	spot_instances.change(generate_cost_comparison, inputs, outputs)
	multi_year_commitment.change(generate_cost_comparison, inputs, outputs)

	demo.launch()