Spaces:

dexhunter
/

aideml

Sleeping

App Files Files Community

aideml / sample_results /santa-2019-revenge-of-the-accountants.py

dominikschmidt

add open-source AIDE

39c930a over 1 year ago

raw

history blame

3.86 kB

	import pandas as pd
	import numpy as np
	from math import exp

	# Load the data
	family_data = pd.read_csv("./input/family_data.csv")
	sample_submission = pd.read_csv("./input/sample_submission.csv")

	# Constants
	N_DAYS = 100
	N_FAMILY = len(family_data)
	MAX_OCCUPANCY = 300
	MIN_OCCUPANCY = 125


	# Cost function components
	def preference_cost_matrix(family_data):
	cost_matrix = np.zeros((N_FAMILY, N_DAYS + 1), dtype=np.int64)
	for i in range(N_FAMILY):
	family = family_data.iloc[i]
	n_people = family["n_people"]
	for j in range(10):
	day = family[f"choice_{j}"]
	if j == 0:
	cost_matrix[i, day] = 0
	elif j == 1:
	cost_matrix[i, day] = 50
	elif j == 2:
	cost_matrix[i, day] = 50 + 9 * n_people
	elif j == 3:
	cost_matrix[i, day] = 100 + 9 * n_people
	elif j == 4:
	cost_matrix[i, day] = 200 + 9 * n_people
	elif j == 5:
	cost_matrix[i, day] = 200 + 18 * n_people
	elif j == 6:
	cost_matrix[i, day] = 300 + 18 * n_people
	elif j == 7:
	cost_matrix[i, day] = 300 + 36 * n_people
	elif j == 8:
	cost_matrix[i, day] = 400 + 36 * n_people
	elif j == 9:
	cost_matrix[i, day] = 500 + 36 * n_people + 199 * n_people
	cost_matrix[i, 0] = 500 + 36 * n_people + 398 * n_people
	return cost_matrix


	def accounting_penalty(occupancy):
	penalties = np.zeros(N_DAYS + 1)
	for day in range(N_DAYS - 1, -1, -1):
	Nd = occupancy[day]
	Nd_next = occupancy[day + 1]
	penalties[day] = max(0, (Nd - 125) / 400 * Nd ** (0.5 + abs(Nd - Nd_next) / 50))
	return penalties.sum()


	# Simulated annealing
	def simulated_annealing(family_data, sample_submission, cost_matrix):
	best = sample_submission["assigned_day"].values
	occupancy = np.zeros(N_DAYS + 1, dtype=int)
	for i, day in enumerate(best):
	occupancy[day] += family_data.iloc[i]["n_people"]
	occupancy[0] = occupancy[N_DAYS] # Occupancy for the "zeroth" day
	best_score = cost_matrix[np.arange(N_FAMILY), best].sum() + accounting_penalty(
	occupancy
	)
	temperature = 1.0
	alpha = 0.99
	for step in range(10000):
	# Create new candidate solution
	family_id = np.random.choice(range(N_FAMILY))
	old_day = best[family_id]
	new_day = np.random.choice(range(1, N_DAYS + 1))
	best[family_id] = new_day

	# Calculate the cost
	new_occupancy = occupancy.copy()
	new_occupancy[old_day] -= family_data.iloc[family_id]["n_people"]
	new_occupancy[new_day] += family_data.iloc[family_id]["n_people"]
	new_occupancy[0] = new_occupancy[N_DAYS] # Occupancy for the "zeroth" day
	if any((new_occupancy < MIN_OCCUPANCY) \| (new_occupancy > MAX_OCCUPANCY)):
	best[family_id] = old_day # Revert changes
	continue
	new_score = cost_matrix[np.arange(N_FAMILY), best].sum() + accounting_penalty(
	new_occupancy
	)

	# Acceptance probability
	if new_score < best_score or np.random.rand() < exp(
	-(new_score - best_score) / temperature
	):
	best_score = new_score
	occupancy = new_occupancy
	else:
	best[family_id] = old_day # Revert changes

	# Cool down
	temperature *= alpha

	return best, best_score


	# Run the optimization
	cost_matrix = preference_cost_matrix(family_data)
	best_schedule, best_score = simulated_annealing(
	family_data, sample_submission, cost_matrix
	)

	# Output the result
	print(f"Best score: {best_score}")
	sample_submission["assigned_day"] = best_schedule
	sample_submission.to_csv("./working/submission.csv", index=False)