Update app.py
Browse files
app.py
CHANGED
@@ -49,20 +49,36 @@ model_sizes = {
|
|
49 |
"XL (180B parameters)": {"memory_required": 360},
|
50 |
}
|
51 |
|
52 |
-
def
|
53 |
-
data =
|
54 |
rate = data['hourly_rate']
|
55 |
if spot:
|
56 |
-
rate *= 0.3
|
57 |
elif reserved:
|
58 |
-
factors = {1: 0.6, 3: 0.4}
|
59 |
-
rate *= factors.get(years,
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
def calculate_api_cost(provider, model, in_tokens, out_tokens, calls):
|
63 |
m = api_pricing[provider][model]
|
64 |
-
|
65 |
-
|
|
|
|
|
66 |
|
67 |
def filter_compatible(instances, min_mem):
|
68 |
res = {}
|
@@ -70,9 +86,11 @@ def filter_compatible(instances, min_mem):
|
|
70 |
mem_str = data['gpu_memory']
|
71 |
if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
|
72 |
val = int(mem_str.replace('GB',''))
|
73 |
-
|
74 |
parts = mem_str.split('x')
|
75 |
-
val = int(parts[0]) * int(parts[1].replace('GB',''))
|
|
|
|
|
76 |
if val >= min_mem:
|
77 |
res[name] = data
|
78 |
return res
|
@@ -90,67 +108,89 @@ def generate_cost_comparison(
|
|
90 |
gcp_comp = filter_compatible(gcp_instances, min_mem)
|
91 |
|
92 |
results = []
|
|
|
|
|
|
|
|
|
93 |
if aws_comp:
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
if gcp_comp:
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
df = pd.DataFrame(results)
|
108 |
-
|
109 |
-
|
110 |
-
api_label = df[df['type']=='API']['provider'].iloc[0]
|
111 |
-
|
112 |
-
fig = px.bar(df, x='provider', y='cost', color='provider', color_discrete_map={
|
113 |
-
aws_label: '#FF9900',
|
114 |
-
gcp_label: '#4285F4',
|
115 |
-
api_label: '#D62828'
|
116 |
-
})
|
117 |
fig.update_yaxes(tickprefix='$')
|
118 |
fig.update_layout(showlegend=False, height=500)
|
119 |
|
120 |
-
html =
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
return html, fig
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
):
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
with gr.Column(scale=2):
|
148 |
-
out_html = gr.HTML()
|
149 |
-
out_plot = gr.Plot()
|
150 |
-
submit.click(
|
151 |
-
app_function,
|
152 |
-
inputs=[compute_hours, tokens_per_month, input_ratio, api_calls,
|
153 |
-
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment],
|
154 |
-
outputs=[out_html, out_plot]
|
155 |
-
)
|
156 |
-
demo.launch()
|
|
|
49 |
"XL (180B parameters)": {"memory_required": 360},
|
50 |
}
|
51 |
|
52 |
+
def calculate_aws_cost(instance, hours, storage, reserved=False, spot=False, years=1):
|
53 |
+
data = aws_instances[instance]
|
54 |
rate = data['hourly_rate']
|
55 |
if spot:
|
56 |
+
rate *= 0.3
|
57 |
elif reserved:
|
58 |
+
factors = {1: 0.6, 3: 0.4}
|
59 |
+
rate *= factors.get(years, 0.6)
|
60 |
+
compute = rate * hours
|
61 |
+
storage_cost = storage * 0.10
|
62 |
+
return {'total_cost': compute + storage_cost, 'details': data}
|
63 |
+
|
64 |
+
def calculate_gcp_cost(instance, hours, storage, reserved=False, spot=False, years=1):
|
65 |
+
data = gcp_instances[instance]
|
66 |
+
rate = data['hourly_rate']
|
67 |
+
if spot:
|
68 |
+
rate *= 0.2
|
69 |
+
elif reserved:
|
70 |
+
factors = {1: 0.7, 3: 0.5}
|
71 |
+
rate *= factors.get(years, 0.7)
|
72 |
+
compute = rate * hours
|
73 |
+
storage_cost = storage * 0.04
|
74 |
+
return {'total_cost': compute + storage_cost, 'details': data}
|
75 |
|
76 |
def calculate_api_cost(provider, model, in_tokens, out_tokens, calls):
|
77 |
m = api_pricing[provider][model]
|
78 |
+
input_cost = in_tokens * m['input_per_1M']
|
79 |
+
output_cost = out_tokens * m['output_per_1M']
|
80 |
+
call_cost = calls * 0.0001 if provider == 'TogetherAI' else 0
|
81 |
+
return {'total_cost': input_cost + output_cost + call_cost, 'details': m}
|
82 |
|
83 |
def filter_compatible(instances, min_mem):
|
84 |
res = {}
|
|
|
86 |
mem_str = data['gpu_memory']
|
87 |
if 'x' in mem_str and not mem_str.startswith(('1x','2x','4x','8x')):
|
88 |
val = int(mem_str.replace('GB',''))
|
89 |
+
elif 'x' in mem_str:
|
90 |
parts = mem_str.split('x')
|
91 |
+
val = int(parts[0]) * int(parts[1].replace('GB',''))
|
92 |
+
else:
|
93 |
+
val = int(mem_str.replace('GB',''))
|
94 |
if val >= min_mem:
|
95 |
res[name] = data
|
96 |
return res
|
|
|
108 |
gcp_comp = filter_compatible(gcp_instances, min_mem)
|
109 |
|
110 |
results = []
|
111 |
+
|
112 |
+
# AWS table
|
113 |
+
aws_html = '<h3>AWS Instances</h3>'
|
114 |
+
aws_html += '<table width="100%"><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Monthly Cost ($)</th></tr>'
|
115 |
if aws_comp:
|
116 |
+
for inst in aws_comp:
|
117 |
+
res = calculate_aws_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)
|
118 |
+
aws_html += f'<tr><td>{inst}</td><td>{res["details"]["vcpus"]}</td><td>{res["details"]["memory"]}GB</td><td>{res["details"]["gpu"]}</td><td>${res["total_cost"]:.2f}</td></tr>'
|
119 |
+
# best AWS
|
120 |
+
best_aws = min(aws_comp, key=lambda x: calculate_aws_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
|
121 |
+
best_aws_cost = calculate_aws_cost(best_aws, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
|
122 |
+
results.append({'provider': f'AWS ({best_aws})', 'cost': best_aws_cost, 'type': 'Cloud'})
|
123 |
+
else:
|
124 |
+
aws_html += '<tr><td colspan="5">No compatible instances</td></tr>'
|
125 |
+
aws_html += '</table>'
|
126 |
+
|
127 |
+
# GCP table
|
128 |
+
gcp_html = '<h3>GCP Instances</h3>'
|
129 |
+
gcp_html += '<table width="100%"><tr><th>Instance</th><th>vCPUs</th><th>Memory</th><th>GPU</th><th>Monthly Cost ($)</th></tr>'
|
130 |
if gcp_comp:
|
131 |
+
for inst in gcp_comp:
|
132 |
+
res = calculate_gcp_cost(inst, compute_hours, storage_gb, reserved_instances, spot_instances, years)
|
133 |
+
gcp_html += f'<tr><td>{inst}</td><td>{res["details"]["vcpus"]}</td><td>{res["details"]["memory"]}GB</td><td>{res["details"]["gpu" ]}</td><td>${res["total_cost"]:.2f}</td></tr>'
|
134 |
+
best_gcp = min(gcp_comp, key=lambda x: calculate_gcp_cost(x, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost'])
|
135 |
+
best_gcp_cost = calculate_gcp_cost(best_gcp, compute_hours, storage_gb, reserved_instances, spot_instances, years)['total_cost']
|
136 |
+
results.append({'provider': f'GCP ({best_gcp})', 'cost': best_gcp_cost, 'type': 'Cloud'})
|
137 |
+
else:
|
138 |
+
gcp_html += '<tr><td colspan="5">No compatible instances</td></tr>'
|
139 |
+
gcp_html += '</table>'
|
140 |
+
|
141 |
+
# API table
|
142 |
+
api_html = '<h3>API Options</h3>'
|
143 |
+
api_html += '<table width="100%"><tr><th>Provider</th><th>Model</th><th>Input Cost</th><th>Output Cost</th><th>Total Cost ($)</th><th>Context</th></tr>'
|
144 |
+
api_costs = {}
|
145 |
+
for prov in api_pricing:
|
146 |
+
for mdl in api_pricing[prov]:
|
147 |
+
res = calculate_api_cost(prov, mdl, in_tokens, out_tokens, api_calls)
|
148 |
+
details = api_pricing[prov][mdl]
|
149 |
+
api_html += f'<tr><td>{prov}</td><td>{mdl}</td><td>${in_tokens * details["input_per_1M"]:.2f}</td><td>${out_tokens * details["output_per_1M"]:.2f}</td><td>${res["total_cost"]:.2f}</td><td>{details["token_context"]:,}</td></tr>'
|
150 |
+
api_costs[(prov, mdl)] = res['total_cost']
|
151 |
+
api_html += '</table>'
|
152 |
+
best_api = min(api_costs, key=api_costs.get)
|
153 |
+
results.append({'provider': f'{best_api[0]} ({best_api[1]})', 'cost': api_costs[best_api], 'type': 'API'})
|
154 |
+
|
155 |
+
# Recommendation and Breakeven omitted for brevity
|
156 |
+
|
157 |
+
# Chart
|
158 |
df = pd.DataFrame(results)
|
159 |
+
colors = {r['provider']: c for r,c in zip(results, ['#FF9900','#4285F4','#D62828'])}
|
160 |
+
fig = px.bar(df, x='provider', y='cost', color='provider', color_discrete_map=colors)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
fig.update_yaxes(tickprefix='$')
|
162 |
fig.update_layout(showlegend=False, height=500)
|
163 |
|
164 |
+
html = f"""
|
165 |
+
<div style='padding:20px;font-family:Arial;'>
|
166 |
+
{aws_html}
|
167 |
+
{gcp_html}
|
168 |
+
{api_html}
|
169 |
+
</div>
|
170 |
+
"""
|
171 |
return html, fig
|
172 |
|
173 |
+
# UI setup
|
174 |
+
with gr.Blocks(title="Cloud Cost Estimator", theme=gr.themes.Soft(primary_hue="indigo")) as demo:
|
175 |
+
gr.HTML('<h1 style="text-align:center;">Cloud Cost Estimator</h1>')
|
176 |
+
with gr.Row():
|
177 |
+
with gr.Column(scale=1):
|
178 |
+
compute_hours = gr.Slider(label="Compute Hours per Month", minimum=1, maximum=730, value=100)
|
179 |
+
tokens_per_month = gr.Slider(label="Tokens per Month (M)", minimum=1, maximum=1000, value=10)
|
180 |
+
input_ratio = gr.Slider(label="Input Ratio (%)", minimum=10, maximum=90, value=30)
|
181 |
+
api_calls = gr.Slider(label="API Calls per Month", minimum=100, maximum=1000000, value=10000, step=100)
|
182 |
+
model_size = gr.Dropdown(label="Model Size", choices=list(model_sizes.keys()), value="Medium (13B parameters)")
|
183 |
+
storage_gb = gr.Slider(label="Storage (GB)", minimum=10, maximum=1000, value=100)
|
184 |
+
reserved_instances = gr.Checkbox(label="Reserved Instances", value=False)
|
185 |
+
spot_instances = gr.Checkbox(label="Spot Instances", value=False)
|
186 |
+
multi_year_commitment = gr.Radio(label="Commitment Period (years)", choices=["1","3"], value="1")
|
187 |
+
submit = gr.Button("Calculate Costs")
|
188 |
+
with gr.Column(scale=2):
|
189 |
+
out_html = gr.HTML()
|
190 |
+
out_plot = gr.Plot()
|
191 |
+
submit.click(generate_cost_comparison,
|
192 |
+
inputs=[compute_hours, tokens_per_month, input_ratio, api_calls,
|
193 |
+
model_size, storage_gb, reserved_instances, spot_instances, multi_year_commitment],
|
194 |
+
outputs=[out_html, out_plot])
|
195 |
+
|
196 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|