|
import pandas as pd |
|
import numpy as np |
|
from math import exp |
|
|
|
|
|
family_data = pd.read_csv("./input/family_data.csv") |
|
sample_submission = pd.read_csv("./input/sample_submission.csv") |
|
|
|
|
|
N_DAYS = 100 |
|
N_FAMILY = len(family_data) |
|
MAX_OCCUPANCY = 300 |
|
MIN_OCCUPANCY = 125 |
|
|
|
|
|
|
|
def preference_cost_matrix(family_data): |
|
cost_matrix = np.zeros((N_FAMILY, N_DAYS + 1), dtype=np.int64) |
|
for i in range(N_FAMILY): |
|
family = family_data.iloc[i] |
|
n_people = family["n_people"] |
|
for j in range(10): |
|
day = family[f"choice_{j}"] |
|
if j == 0: |
|
cost_matrix[i, day] = 0 |
|
elif j == 1: |
|
cost_matrix[i, day] = 50 |
|
elif j == 2: |
|
cost_matrix[i, day] = 50 + 9 * n_people |
|
elif j == 3: |
|
cost_matrix[i, day] = 100 + 9 * n_people |
|
elif j == 4: |
|
cost_matrix[i, day] = 200 + 9 * n_people |
|
elif j == 5: |
|
cost_matrix[i, day] = 200 + 18 * n_people |
|
elif j == 6: |
|
cost_matrix[i, day] = 300 + 18 * n_people |
|
elif j == 7: |
|
cost_matrix[i, day] = 300 + 36 * n_people |
|
elif j == 8: |
|
cost_matrix[i, day] = 400 + 36 * n_people |
|
elif j == 9: |
|
cost_matrix[i, day] = 500 + 36 * n_people + 199 * n_people |
|
cost_matrix[i, 0] = 500 + 36 * n_people + 398 * n_people |
|
return cost_matrix |
|
|
|
|
|
def accounting_penalty(occupancy): |
|
penalties = np.zeros(N_DAYS + 1) |
|
for day in range(N_DAYS - 1, -1, -1): |
|
Nd = occupancy[day] |
|
Nd_next = occupancy[day + 1] |
|
penalties[day] = max(0, (Nd - 125) / 400 * Nd ** (0.5 + abs(Nd - Nd_next) / 50)) |
|
return penalties.sum() |
|
|
|
|
|
|
|
def simulated_annealing(family_data, sample_submission, cost_matrix): |
|
best = sample_submission["assigned_day"].values |
|
occupancy = np.zeros(N_DAYS + 1, dtype=int) |
|
for i, day in enumerate(best): |
|
occupancy[day] += family_data.iloc[i]["n_people"] |
|
occupancy[0] = occupancy[N_DAYS] |
|
best_score = cost_matrix[np.arange(N_FAMILY), best].sum() + accounting_penalty( |
|
occupancy |
|
) |
|
temperature = 1.0 |
|
alpha = 0.99 |
|
for step in range(10000): |
|
|
|
family_id = np.random.choice(range(N_FAMILY)) |
|
old_day = best[family_id] |
|
new_day = np.random.choice(range(1, N_DAYS + 1)) |
|
best[family_id] = new_day |
|
|
|
|
|
new_occupancy = occupancy.copy() |
|
new_occupancy[old_day] -= family_data.iloc[family_id]["n_people"] |
|
new_occupancy[new_day] += family_data.iloc[family_id]["n_people"] |
|
new_occupancy[0] = new_occupancy[N_DAYS] |
|
if any((new_occupancy < MIN_OCCUPANCY) | (new_occupancy > MAX_OCCUPANCY)): |
|
best[family_id] = old_day |
|
continue |
|
new_score = cost_matrix[np.arange(N_FAMILY), best].sum() + accounting_penalty( |
|
new_occupancy |
|
) |
|
|
|
|
|
if new_score < best_score or np.random.rand() < exp( |
|
-(new_score - best_score) / temperature |
|
): |
|
best_score = new_score |
|
occupancy = new_occupancy |
|
else: |
|
best[family_id] = old_day |
|
|
|
|
|
temperature *= alpha |
|
|
|
return best, best_score |
|
|
|
|
|
|
|
cost_matrix = preference_cost_matrix(family_data) |
|
best_schedule, best_score = simulated_annealing( |
|
family_data, sample_submission, cost_matrix |
|
) |
|
|
|
|
|
print(f"Best score: {best_score}") |
|
sample_submission["assigned_day"] = best_schedule |
|
sample_submission.to_csv("./working/submission.csv", index=False) |
|
|