Spaces:
Runtime error
Runtime error
File size: 5,135 Bytes
f26bf5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import pandas as pd
from datetime import datetime, timedelta
from utils import DATA_DIR
# Basic Week over Week Retention
def calculate_wow_retention(
traders_df: pd.DataFrame, trader_filter: str
) -> pd.DataFrame:
"""Function to compute the wow retention at the week level"""
if trader_filter == "Olas":
df = traders_df.loc[traders_df["staking"] != "non_Olas"]
elif trader_filter == "non_Olas":
df = traders_df.loc[traders_df["staking"] == "non_Olas"]
else:
# unknown traders
print("Not implemented yet")
# Get unique traders per week
weekly_traders = (
df.groupby("month_year_week")["trader_address"].nunique().reset_index()
)
# Calculate retention
retention = []
for i in range(1, len(weekly_traders)):
current_week = weekly_traders.iloc[i]["month_year_week"]
previous_week = weekly_traders.iloc[i - 1]["month_year_week"]
# Get traders in both weeks
current_traders = set(
df[df["month_year_week"] == current_week]["trader_address"]
)
previous_traders = set(
df[df["month_year_week"] == previous_week]["trader_address"]
)
retained = len(current_traders.intersection(previous_traders))
retention_rate = (retained / len(previous_traders)) * 100
retention.append(
{
"month_year_week": current_week,
"retained_traders": retained,
"previous_traders": len(previous_traders),
"retention_rate": retention_rate,
}
)
return pd.DataFrame(retention)
# N-Week Rolling Retention
def calculate_nweek_retention(df: pd.DataFrame, n_weeks=4):
# Get first and last trade for each trader
trader_activity = (
df.groupby("trader_address")
.agg({"creation_timestamp": ["min", "max"]})
.reset_index()
)
trader_activity.columns = ["trader_address", "first_trade", "last_trade"]
trader_activity["weeks_active"] = (
pd.to_datetime(trader_activity["last_trade"])
- pd.to_datetime(trader_activity["first_trade"])
).dt.days / 7
return {
"total_traders": len(trader_activity),
f"{n_weeks}_week_retained": len(
trader_activity[trader_activity["weeks_active"] >= n_weeks]
),
"retention_rate": (
len(trader_activity[trader_activity["weeks_active"] >= n_weeks])
/ len(trader_activity)
)
* 100,
}
# Cohort Retention
def calculate_cohort_retention(df, max_weeks=12):
# Get first week for each trader
# TODO check if first will retrieve the first week of the data or not
first_trades = (
df.groupby("trader_address")
.agg({"creation_timestamp": "min", "month_year_week": "first"})
.reset_index()
)
first_trades.columns = ["trader_address", "first_trade", "cohort_week"]
# Get ordered list of unique weeks - converting to datetime for proper sorting
all_weeks = df["month_year_week"].unique()
weeks_datetime = pd.to_datetime(all_weeks)
sorted_weeks_idx = weeks_datetime.argsort()
all_weeks = all_weeks[sorted_weeks_idx]
# Create mapping from week string to numeric index
week_to_number = {week: idx for idx, week in enumerate(all_weeks)}
# Merge back to get all activities
cohort_data = pd.merge(
df, first_trades[["trader_address", "cohort_week"]], on="trader_address"
)
# Calculate week number since first activity
cohort_data["cohort_number"] = cohort_data["cohort_week"].map(week_to_number)
cohort_data["activity_number"] = cohort_data["month_year_week"].map(week_to_number)
cohort_data["week_number"] = (
cohort_data["activity_number"] - cohort_data["cohort_number"]
)
# Calculate retention by cohort
cohort_sizes = cohort_data.groupby("cohort_week")["trader_address"].nunique()
retention_matrix = cohort_data.groupby(["cohort_week", "week_number"])[
"trader_address"
].nunique()
retention_matrix = retention_matrix.unstack(fill_value=0)
# Convert to percentages
retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100
# Limit to max_weeks if specified
if max_weeks is not None and max_weeks < retention_matrix.shape[1]:
retention_matrix = retention_matrix.iloc[:, :max_weeks]
return retention_matrix.round(2)
if __name__ == "__main__":
# read trades dataset
trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
# First, create week numbers from timestamps
trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"])
trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
trades_df["month_year_week"] = (
trades_df["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
)
# Usage example:
wow_retention = calculate_wow_retention(trades_df, trader_filter="Olas")
rolling_retention = calculate_nweek_retention(trades_df, n_weeks=4)
cohort_retention = calculate_cohort_retention(trades_df)
|