Spaces:
Runtime error
Runtime error
import pandas as pd | |
from datetime import datetime, timedelta | |
# Basic Week over Week Retention | |
def calculate_wow_retention_by_type( | |
df: pd.DataFrame, market_creator: str | |
) -> pd.DataFrame: | |
filtered_df = df.loc[df["market_creator"] == market_creator] | |
# Get unique traders per week and type | |
weekly_traders = ( | |
filtered_df.groupby(["month_year_week", "trader_type"], sort=False)[ | |
"trader_address" | |
] | |
.nunique() | |
.reset_index() | |
) | |
# weekly_traders = weekly_traders.sort_values(['trader_type', 'month_year_week']) | |
# Get ordered list of unique weeks - converting to datetime for proper sorting | |
all_weeks = filtered_df["month_year_week"].unique() | |
weeks_datetime = pd.to_datetime(all_weeks) | |
sorted_weeks_idx = weeks_datetime.argsort() | |
all_weeks = all_weeks[sorted_weeks_idx] | |
# Create mapping from week string to numeric index | |
week_to_number = {week: idx for idx, week in enumerate(all_weeks)} | |
# Calculate retention | |
retention = [] | |
# Iterate through each trader type | |
for trader_type in weekly_traders["trader_type"].unique(): | |
type_data = weekly_traders[weekly_traders["trader_type"] == trader_type] | |
# Calculate retention for each week within this trader type | |
for i in range(1, len(type_data)): | |
current_week = type_data.iloc[i]["month_year_week"] | |
# print(f"current_week={current_week}") | |
week_number = week_to_number[current_week] | |
if week_to_number == 0: | |
# no previous week info | |
continue | |
previous_week_number = week_number - 1 | |
# this should give only one value | |
previous_week = [ | |
key | |
for key in week_to_number.keys() | |
if week_to_number[key] == previous_week_number | |
][0] | |
# print(f"previous week = {previous_week}") | |
# Get traders in both weeks for this type | |
current_traders = set( | |
filtered_df[ | |
(filtered_df["month_year_week"] == current_week) | |
& (filtered_df["trader_type"] == trader_type) | |
]["trader_address"] | |
) | |
previous_traders = set( | |
filtered_df[ | |
(filtered_df["month_year_week"] == previous_week) | |
& (filtered_df["trader_type"] == trader_type) | |
]["trader_address"] | |
) | |
retained = len(current_traders.intersection(previous_traders)) | |
retention_rate = ( | |
(retained / len(previous_traders)) * 100 | |
if len(previous_traders) > 0 | |
else 0 | |
) | |
retention.append( | |
{ | |
"trader_type": trader_type, | |
"week": current_week, | |
"retained_traders": retained, | |
"previous_traders": len(previous_traders), | |
"retention_rate": round(retention_rate, 2), | |
} | |
) | |
return pd.DataFrame(retention) | |
def create_retention_matrix(cohort_retention_df: pd.DataFrame) -> pd.DataFrame: | |
# Pivot the data to create the retention matrix | |
retention_matrix = cohort_retention_df.pivot( | |
index="cohort_week", columns="weeks_since_cohort", values="retention_rate" | |
) | |
# Sort index chronologically | |
retention_matrix.index = pd.to_datetime(retention_matrix.index) | |
retention_matrix = retention_matrix.sort_index() | |
# Rename columns to show week numbers | |
# retention_matrix.columns = [f"Week {i}" for i in retention_matrix.columns] | |
return retention_matrix | |
# Wow Retention at the cohort level | |
def calculate_cohort_retention( | |
df: pd.DataFrame, market_creator: str, trader_type: str | |
) -> pd.DataFrame: | |
df_filtered = df.loc[ | |
(df["market_creator"] == market_creator) & (df["trader_type"] == trader_type) | |
] | |
if len(df_filtered) == 0: | |
return pd.DataFrame() | |
df_filtered = df_filtered.sort_values(by="creation_timestamp", ascending=True) | |
# Get first week of activity for each trader | |
first_activity = ( | |
df_filtered.groupby("trader_address") | |
.agg({"creation_timestamp": "min", "month_year_week": "first"}) | |
.reset_index() | |
) | |
first_activity.columns = ["trader_address", "first_activity", "cohort_week"] | |
# Get ordered list of unique weeks - converting to datetime for proper sorting | |
all_weeks = df_filtered["month_year_week"].unique() | |
weeks_datetime = pd.to_datetime(all_weeks) | |
sorted_weeks_idx = weeks_datetime.argsort() | |
all_weeks = all_weeks[sorted_weeks_idx] | |
# Create mapping from week string to numeric index | |
week_to_number = {week: idx for idx, week in enumerate(all_weeks)} | |
# Merge back to get all activities | |
cohort_data = pd.merge( | |
df_filtered, | |
first_activity[["trader_address", "cohort_week"]], | |
on="trader_address", | |
) | |
# Get all unique weeks and cohorts | |
all_cohorts = cohort_data["cohort_week"].unique() | |
# extend the cohort | |
# print(f"all cohorts = {all_cohorts}") | |
retention_data = [] | |
max_weeks = 8 | |
# for cohort in all_cohorts: | |
for cohort_week_idx, cohort in enumerate(all_weeks): | |
# print(f"analyzing cohort {cohort}") | |
# Get all traders in this cohort | |
cohort_traders = set( | |
cohort_data[cohort_data["cohort_week"] == cohort]["trader_address"] | |
) | |
cohort_size = len(cohort_traders) | |
# print(f"cohort size = {cohort_size}") | |
# Calculate retention for each week after the cohort week | |
for week_idx, week in enumerate(all_weeks): | |
# print(f"Analyzing week = {week}") | |
weeks_since_cohort = week_idx - cohort_week_idx | |
if weeks_since_cohort < 0 or weeks_since_cohort > max_weeks: | |
continue | |
if cohort_size == 0: | |
print(f"NO new traders for cohort week={cohort}") | |
retention_data.append( | |
{ | |
"cohort_week": cohort, | |
"week": week, | |
"weeks_since_cohort": weeks_since_cohort, | |
"cohort_size": cohort_size, | |
"active_traders": 0, | |
"retained_traders": 0, | |
"previous_traders": 0, | |
"retention_rate": round(0, 2), | |
} | |
) | |
continue | |
# Get active traders from the cohort in current week | |
current_traders = set( | |
cohort_data[ | |
(cohort_data["cohort_week"] == cohort) | |
& (cohort_data["month_year_week"] == week) | |
]["trader_address"] | |
) | |
# Get active traders from the cohort in previous week | |
if week == cohort: | |
# For the first week, retention is 100% by definition | |
retained = len(current_traders) | |
retention_rate = 100 if len(current_traders) > 0 else 0 | |
elif week_idx > 0: | |
previous_week = all_weeks[week_idx - 1] | |
previous_traders = set( | |
cohort_data[ | |
(cohort_data["cohort_week"] == cohort) | |
& (cohort_data["month_year_week"] == previous_week) | |
]["trader_address"] | |
) | |
retained = len(current_traders.intersection(previous_traders)) | |
retention_rate = ( | |
(retained / len(previous_traders)) * 100 | |
if len(previous_traders) > 0 | |
else 0 | |
) | |
retention_data.append( | |
{ | |
"cohort_week": cohort, | |
"week": week, | |
"weeks_since_cohort": weeks_since_cohort, | |
"cohort_size": cohort_size, | |
"active_traders": len(current_traders), | |
"retained_traders": retained, | |
"previous_traders": ( | |
len(previous_traders) if week_idx > 0 else cohort_size | |
), | |
"retention_rate": round(retention_rate, 2), | |
} | |
) | |
retention_matrix = create_retention_matrix(pd.DataFrame(retention_data)) | |
return retention_matrix | |
def prepare_retention_dataset( | |
retention_df: pd.DataFrame, unknown_df: pd.DataFrame | |
) -> pd.DataFrame: | |
print("Preparing retention dataset") | |
retention_df["trader_type"] = retention_df["staking"].apply( | |
lambda x: "non_Olas" if x == "non_Olas" else "Olas" | |
) | |
retention_df.rename(columns={"request_time": "creation_timestamp"}, inplace=True) | |
retention_df = retention_df[ | |
["trader_type", "market_creator", "trader_address", "creation_timestamp"] | |
] | |
unknown_df["trader_type"] = "unclassified" | |
unknown_df = unknown_df[ | |
["trader_type", "market_creator", "trader_address", "creation_timestamp"] | |
] | |
all_traders = pd.concat([retention_df, unknown_df], ignore_index=True) | |
all_traders["creation_timestamp"] = pd.to_datetime( | |
all_traders["creation_timestamp"] | |
) | |
all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True) | |
# Remove data from current week and onwards | |
now = datetime.now() | |
# Get start of the current week (Monday) | |
start_of_week = now - timedelta(days=(now.weekday())) | |
start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0) | |
all_traders["creation_date"] = all_traders["creation_timestamp"].dt.date | |
all_traders["creation_date"] = pd.to_datetime(all_traders["creation_date"]) | |
# Filter the dataframe | |
filtered_traders = all_traders[all_traders["creation_date"] < start_of_week] | |
filtered_traders["month_year_week"] = ( | |
filtered_traders["creation_timestamp"] | |
.dt.to_period("W") | |
.dt.start_time.dt.strftime("%b-%d-%Y") | |
) | |
print(filtered_traders.month_year_week.unique()) | |
return filtered_traders | |
if __name__ == "__main__": | |
print("WIP") | |