Spaces:
Runtime error
Runtime error
import pandas as pd | |
from datetime import datetime, timedelta | |
from utils import DATA_DIR | |
# Basic Week over Week Retention | |
def calculate_wow_retention( | |
traders_df: pd.DataFrame, trader_filter: str | |
) -> pd.DataFrame: | |
"""Function to compute the wow retention at the week level""" | |
if trader_filter == "Olas": | |
df = traders_df.loc[traders_df["staking"] != "non_Olas"] | |
elif trader_filter == "non_Olas": | |
df = traders_df.loc[traders_df["staking"] == "non_Olas"] | |
else: | |
# unknown traders | |
print("Not implemented yet") | |
# Get unique traders per week | |
weekly_traders = ( | |
df.groupby("month_year_week")["trader_address"].nunique().reset_index() | |
) | |
# Calculate retention | |
retention = [] | |
for i in range(1, len(weekly_traders)): | |
current_week = weekly_traders.iloc[i]["month_year_week"] | |
previous_week = weekly_traders.iloc[i - 1]["month_year_week"] | |
# Get traders in both weeks | |
current_traders = set( | |
df[df["month_year_week"] == current_week]["trader_address"] | |
) | |
previous_traders = set( | |
df[df["month_year_week"] == previous_week]["trader_address"] | |
) | |
retained = len(current_traders.intersection(previous_traders)) | |
retention_rate = (retained / len(previous_traders)) * 100 | |
retention.append( | |
{ | |
"month_year_week": current_week, | |
"retained_traders": retained, | |
"previous_traders": len(previous_traders), | |
"retention_rate": retention_rate, | |
} | |
) | |
return pd.DataFrame(retention) | |
# N-Week Rolling Retention | |
def calculate_nweek_retention(df: pd.DataFrame, n_weeks=4): | |
# Get first and last trade for each trader | |
trader_activity = ( | |
df.groupby("trader_address") | |
.agg({"creation_timestamp": ["min", "max"]}) | |
.reset_index() | |
) | |
trader_activity.columns = ["trader_address", "first_trade", "last_trade"] | |
trader_activity["weeks_active"] = ( | |
pd.to_datetime(trader_activity["last_trade"]) | |
- pd.to_datetime(trader_activity["first_trade"]) | |
).dt.days / 7 | |
return { | |
"total_traders": len(trader_activity), | |
f"{n_weeks}_week_retained": len( | |
trader_activity[trader_activity["weeks_active"] >= n_weeks] | |
), | |
"retention_rate": ( | |
len(trader_activity[trader_activity["weeks_active"] >= n_weeks]) | |
/ len(trader_activity) | |
) | |
* 100, | |
} | |
# Cohort Retention | |
def calculate_cohort_retention(df, max_weeks=12): | |
# Get first week for each trader | |
# TODO check if first will retrieve the first week of the data or not | |
first_trades = ( | |
df.groupby("trader_address") | |
.agg({"creation_timestamp": "min", "month_year_week": "first"}) | |
.reset_index() | |
) | |
first_trades.columns = ["trader_address", "first_trade", "cohort_week"] | |
# Get ordered list of unique weeks - converting to datetime for proper sorting | |
all_weeks = df["month_year_week"].unique() | |
weeks_datetime = pd.to_datetime(all_weeks) | |
sorted_weeks_idx = weeks_datetime.argsort() | |
all_weeks = all_weeks[sorted_weeks_idx] | |
# Create mapping from week string to numeric index | |
week_to_number = {week: idx for idx, week in enumerate(all_weeks)} | |
# Merge back to get all activities | |
cohort_data = pd.merge( | |
df, first_trades[["trader_address", "cohort_week"]], on="trader_address" | |
) | |
# Calculate week number since first activity | |
cohort_data["cohort_number"] = cohort_data["cohort_week"].map(week_to_number) | |
cohort_data["activity_number"] = cohort_data["month_year_week"].map(week_to_number) | |
cohort_data["week_number"] = ( | |
cohort_data["activity_number"] - cohort_data["cohort_number"] | |
) | |
# Calculate retention by cohort | |
cohort_sizes = cohort_data.groupby("cohort_week")["trader_address"].nunique() | |
retention_matrix = cohort_data.groupby(["cohort_week", "week_number"])[ | |
"trader_address" | |
].nunique() | |
retention_matrix = retention_matrix.unstack(fill_value=0) | |
# Convert to percentages | |
retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100 | |
# Limit to max_weeks if specified | |
if max_weeks is not None and max_weeks < retention_matrix.shape[1]: | |
retention_matrix = retention_matrix.iloc[:, :max_weeks] | |
return retention_matrix.round(2) | |
if __name__ == "__main__": | |
# read trades dataset | |
trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet") | |
# First, create week numbers from timestamps | |
trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"]) | |
trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True) | |
trades_df["month_year_week"] = ( | |
trades_df["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y") | |
) | |
# Usage example: | |
wow_retention = calculate_wow_retention(trades_df, trader_filter="Olas") | |
rolling_retention = calculate_nweek_retention(trades_df, n_weeks=4) | |
cohort_retention = calculate_cohort_retention(trades_df) | |