Spaces:

delightfulrachel
/

GPUandAPIcostestimator

Sleeping

App Files Files Community

GPUandAPIcostestimator / app.py

delightfulrachel

Update app.py

41a2f73 verified 4 months ago

raw

history blame

11.3 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import plotly.express as px

	# Initialize pricing data
	aws_instances = {
	"g4dn.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA T4", "hourly_rate": 0.526, "gpu_memory": "16GB"},
	"g4dn.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA T4", "hourly_rate": 0.752, "gpu_memory": "16GB"},
	"g5.xlarge": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA A10G", "hourly_rate": 0.65, "gpu_memory": "24GB"},
	"g5.2xlarge": {"vcpus": 8, "memory": 32, "gpu": "1x NVIDIA A10G", "hourly_rate": 1.006, "gpu_memory": "24GB"},
	"p3.2xlarge": {"vcpus": 8, "memory": 61, "gpu": "1x NVIDIA V100", "hourly_rate": 3.06, "gpu_memory": "16GB"},
	"p4d.24xlarge": {"vcpus": 96, "memory": 1152, "gpu": "8x NVIDIA A100", "hourly_rate": 32.77, "gpu_memory": "8x40GB"}
	}

	gcp_instances = {
	"a2-highgpu-1g": {"vcpus": 12, "memory": 85, "gpu": "1x NVIDIA A100", "hourly_rate": 1.46, "gpu_memory": "40GB"},
	"a2-highgpu-2g": {"vcpus": 24, "memory": 170, "gpu": "2x NVIDIA A100", "hourly_rate": 2.93, "gpu_memory": "2x40GB"},
	"a2-highgpu-4g": {"vcpus": 48, "memory": 340, "gpu": "4x NVIDIA A100", "hourly_rate": 5.86, "gpu_memory": "4x40GB"},
	"n1-standard-4-t4": {"vcpus": 4, "memory": 15, "gpu": "1x NVIDIA T4", "hourly_rate": 0.49, "gpu_memory": "16GB"},
	"n1-standard-8-t4": {"vcpus": 8, "memory": 30, "gpu": "1x NVIDIA T4", "hourly_rate": 0.69, "gpu_memory": "16GB"},
	"g2-standard-4": {"vcpus": 4, "memory": 16, "gpu": "1x NVIDIA L4", "hourly_rate": 0.59, "gpu_memory": "24GB"}
	}

	api_pricing = {
	"OpenAI": {
	"GPT-3.5-Turbo": {"input_per_1M": 0.5, "output_per_1M": 1.5, "token_context": 16385},
	"GPT-4o": {"input_per_1M": 5.0, "output_per_1M": 15.0, "token_context": 32768},
	"GPT-4o-mini": {"input_per_1M": 2.5, "output_per_1M": 7.5, "token_context": 32768},
	},
	"TogetherAI": {
	"Llama-3-8B": {"input_per_1M": 0.15, "output_per_1M": 0.15, "token_context": 8192},
	"Llama-3-70B": {"input_per_1M": 0.9, "output_per_1M": 0.9, "token_context": 8192},
	"Llama-2-13B": {"input_per_1M": 0.6, "output_per_1M": 0.6, "token_context": 4096},
	"Llama-2-70B": {"input_per_1M": 2.5, "output_per_1M": 2.5, "token_context": 4096},
	"DeepSeek-Coder-33B": {"input_per_1M": 2.0, "output_per_1M": 2.0, "token_context": 16384},
	},
	"Anthropic": {
	"Claude-3-Opus": {"input_per_1M": 15.0, "output_per_1M": 75.0, "token_context": 200000},
	"Claude-3-Sonnet": {"input_per_1M": 3.0, "output_per_1M": 15.0, "token_context": 200000},
	"Claude-3-Haiku": {"input_per_1M": 0.25, "output_per_1M": 1.25, "token_context": 200000},
	}
	}

	model_sizes = {
	"Small (7B parameters)": {"memory_required": 14, "throughput_factor": 1.0},
	"Medium (13B parameters)": {"memory_required": 26, "throughput_factor": 0.7},
	"Large (70B parameters)": {"memory_required": 140, "throughput_factor": 0.3},
	"XL (180B parameters)": {"memory_required": 360, "throughput_factor": 0.15},
	}


	def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
	data = aws_instances[instance]
	rate = data['hourly_rate']
	if spot:
	rate *= 0.3
	elif reserved:
	factors = {1: 0.6, 3: 0.4}
	rate *= factors.get(years, 0.6)
	compute = rate * hours
	storage_cost = storage * 0.10
	return {'compute_cost': compute, 'storage_cost': storage_cost, 'total_cost': compute + storage_cost}


	def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
	data = gcp_instances[instance]
	rate = data['hourly_rate']
	if spot:
	rate *= 0.2
	elif reserved:
	factors = {1: 0.7, 3: 0.5}
	rate *= factors.get(years, 0.7)
	compute = rate * hours
	storage_cost = storage * 0.04
	return {'compute_cost': compute, 'storage_cost': storage_cost, 'total_cost': compute + storage_cost}


	def calculate_api_cost(provider, model, input_tokens, output_tokens, api_calls):
	mdata = api_pricing[provider][model]
	input_cost = (input_tokens * mdata['input_per_1M']) / 1
	output_cost = (output_tokens * mdata['output_per_1M']) / 1
	call_cost = api_calls * 0.0001 if provider == 'TogetherAI' else 0
	total = input_cost + output_cost + call_cost
	return {'input_cost': input_cost, 'output_cost': output_cost, 'api_call_cost': call_cost, 'total_cost': total}


	def filter_compatible_instances(instances, min_mem):
	result = {}
	for name, data in instances.items():
	mem_str = data['gpu_memory']
	if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
	val = int(mem_str.replace('GB',''))
	elif 'x' in mem_str:
	parts = mem_str.split('x')
	val = int(parts[0]) * int(parts[1].replace('GB',''))
	else:
	val = int(mem_str.replace('GB',''))
	if val >= min_mem:
	result[name] = data
	return result


	def generate_cost_comparison(
	compute_hours, tokens_per_month, input_ratio, api_calls,
	model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
	):
	years = int(multi_year_commitment)
	in_tokens = tokens_per_month * (input_ratio/100)
	out_tokens = tokens_per_month - in_tokens
	min_mem = model_sizes[model_size]['memory_required']
	aws_comp = filter_compatible_instances(aws_instances, min_mem)
	gcp_comp = filter_compatible_instances(gcp_instances, min_mem)
	results = []

	# AWS table
	aws_html = '<h3>AWS Compatible Instances</h3>'
	if aws_comp:
	aws_html += '<table width="100%"><tr><th>Instance</th><th>Monthly Cost</th></tr>'
	best_aws, best_cost = None, float('inf')
	for inst in aws_comp:
	c = calculate_aws_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
	aws_html += f'<tr><td>{inst}</td><td>${c:.2f}</td></tr>'
	if c < best_cost:
	best_aws, best_cost = inst, c
	aws_html += '</table>'
	if best_aws:
	results.append({'provider': f'AWS ({best_aws})', 'cost': best_cost, 'type':'Cloud'})
	else:
	aws_html += '<p>No compatible AWS instances.</p>'

	# GCP table
	gcp_html = '<h3>GCP Compatible Instances</h3>'
	if gcp_comp:
	gcp_html += '<table width="100%"><tr><th>Instance</th><th>Monthly Cost</th></tr>'
	best_gcp, best_gcp_cost = None, float('inf')
	for inst in gcp_comp:
	c = calculate_gcp_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
	gcp_html += f'<tr><td>{inst}</td><td>${c:.2f}</td></tr>'
	if c < best_gcp_cost:
	best_gcp, best_gcp_cost = inst, c
	gcp_html += '</table>'
	if best_gcp:
	results.append({'provider': f'GCP ({best_gcp})', 'cost': best_gcp_cost, 'type':'Cloud'})
	else:
	gcp_html += '<p>No compatible GCP instances.</p>'

	# API table
	api_html = '<h3>API Options</h3>'
	api_html += '<table width="100%"><tr><th>Provider</th><th>Model</th><th>Total Cost</th></tr>'
	api_costs = {}
	for prov in api_pricing:
	for mdl in api_pricing[prov]:
	cost_data = calculate_api_cost(prov, mdl, in_tokens, out_tokens, api_calls)
	api_costs[(prov,mdl)] = cost_data['total_cost']
	api_html += f'<tr><td>{prov}</td><td>{mdl}</td><td>${cost_data["total_cost"]:.2f}</td></tr>'
	api_html += '</table>'
	best_api = min(api_costs, key=api_costs.get)
	results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_costs[best_api], 'type':'API'})

	# Recommendation
	cheapest = min(results, key=lambda x: x['cost'])
	rec = '<h3>Recommendation</h3>'
	if cheapest['type']=='API':
	rec += f"<p>The API {cheapest['provider']} is cheapest at ${cheapest['cost']:.2f}.</p>"
	else:
	rec += f"<p>The Cloud {cheapest['provider']} is cheapest at ${cheapest['cost']:.2f}.</p>"

	# Plot
	df_res = pd.DataFrame(results)
	fig = px.bar(df_res, x='provider', y='cost', color='type', title='Monthly Cost Comparison')

	# HTML output
	html = f"""
	<div>{aws_html}</div>
	<div>{gcp_html}</div>
	<div>{api_html}</div>
	<div>{rec}</div>
	"""
	return html, fig


	def app_function(
	compute_hours, tokens_per_month, input_ratio, api_calls,
	model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
	):
	return generate_cost_comparison(
	compute_hours, tokens_per_month, input_ratio, api_calls,
	model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment
	)

	# Gradio interface
	def main():
	with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
	gr.HTML("""
	<div style="text-align:center; margin-bottom:20px;">
	<h1>Cloud Cost Estimator</h1>
	<p>Compare costs between cloud hardware and API endpoints</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("<h3>Usage Parameters</h3>")
	compute_hours = gr.Slider(label="Compute Hours per Month", minimum=1, maximum=730, value=100)
	tokens_per_month = gr.Slider(label="Tokens Processed per Month (millions)", minimum=1, maximum=1000, value=10)
	input_ratio = gr.Slider(label="Input Token Ratio (%)", minimum=10, maximum=90, value=30)
	api_calls = gr.Slider(label="API Calls per Month", minimum=100, maximum=1000000, value=10000, step=100)
	model_size = gr.Dropdown(label="Model Size", choices=list(model_sizes.keys()), value="Medium (13B parameters)")
	storage_gb = gr.Slider(label="Storage Required (GB)", minimum=10, maximum=1000, value=100)

	gr.HTML("<h3>Advanced Options</h3>")
	reserved_instances = gr.Checkbox(label="Use Reserved Instances", value=False)
	spot_instances = gr.Checkbox(label="Use Spot/Preemptible Instances", value=False)
	multi_year_commitment = gr.Radio(label="Commitment Period (years)", choices=["1","3"], value="1")
	submit_button = gr.Button("Calculate Costs", variant="primary")

	with gr.Column(scale=2):
	results_html = gr.HTML(label="Results")
	plot_output = gr.Plot(label="Cost Comparison")

	submit_button.click(
	app_function,
	inputs=[compute_hours, tokens_per_month, input_ratio, api_calls, model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment],
	outputs=[results_html, plot_output]
	)

	gr.HTML("""
	<div style="margin-top:30px; border-top:1px solid #e5e7eb; padding-top:20px;">
	<h3>Help & Resources</h3>
	<p><a href="https://aws.amazon.com/ec2/pricing/">AWS EC2 Pricing</a> \| <a href="https://cloud.google.com/compute/pricing">GCP Pricing</a></p>
	<p><a href="https://openai.com/pricing">OpenAI API Pricing</a> \| <a href="https://www.anthropic.com/api">Anthropic Claude API Pricing</a> \| <a href="https://www.together.ai/pricing">TogetherAI Pricing</a></p>
	</div>
	""")

	demo.launch()

	if __name__ == "__main__":
	main()