File size: 5,135 Bytes
f26bf5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import pandas as pd
from datetime import datetime, timedelta
from utils import DATA_DIR


# Basic Week over Week Retention
def calculate_wow_retention(
    traders_df: pd.DataFrame, trader_filter: str
) -> pd.DataFrame:
    """Function to compute the wow retention at the week level"""
    if trader_filter == "Olas":
        df = traders_df.loc[traders_df["staking"] != "non_Olas"]
    elif trader_filter == "non_Olas":
        df = traders_df.loc[traders_df["staking"] == "non_Olas"]
    else:
        # unknown traders
        print("Not implemented yet")
    # Get unique traders per week
    weekly_traders = (
        df.groupby("month_year_week")["trader_address"].nunique().reset_index()
    )

    # Calculate retention
    retention = []
    for i in range(1, len(weekly_traders)):
        current_week = weekly_traders.iloc[i]["month_year_week"]
        previous_week = weekly_traders.iloc[i - 1]["month_year_week"]

        # Get traders in both weeks
        current_traders = set(
            df[df["month_year_week"] == current_week]["trader_address"]
        )
        previous_traders = set(
            df[df["month_year_week"] == previous_week]["trader_address"]
        )

        retained = len(current_traders.intersection(previous_traders))
        retention_rate = (retained / len(previous_traders)) * 100

        retention.append(
            {
                "month_year_week": current_week,
                "retained_traders": retained,
                "previous_traders": len(previous_traders),
                "retention_rate": retention_rate,
            }
        )

    return pd.DataFrame(retention)


# N-Week Rolling Retention
def calculate_nweek_retention(df: pd.DataFrame, n_weeks=4):
    # Get first and last trade for each trader
    trader_activity = (
        df.groupby("trader_address")
        .agg({"creation_timestamp": ["min", "max"]})
        .reset_index()
    )

    trader_activity.columns = ["trader_address", "first_trade", "last_trade"]
    trader_activity["weeks_active"] = (
        pd.to_datetime(trader_activity["last_trade"])
        - pd.to_datetime(trader_activity["first_trade"])
    ).dt.days / 7

    return {
        "total_traders": len(trader_activity),
        f"{n_weeks}_week_retained": len(
            trader_activity[trader_activity["weeks_active"] >= n_weeks]
        ),
        "retention_rate": (
            len(trader_activity[trader_activity["weeks_active"] >= n_weeks])
            / len(trader_activity)
        )
        * 100,
    }


# Cohort Retention
def calculate_cohort_retention(df, max_weeks=12):
    # Get first week for each trader
    # TODO check if first will retrieve the first week of the data or not
    first_trades = (
        df.groupby("trader_address")
        .agg({"creation_timestamp": "min", "month_year_week": "first"})
        .reset_index()
    )
    first_trades.columns = ["trader_address", "first_trade", "cohort_week"]

    # Get ordered list of unique weeks - converting to datetime for proper sorting
    all_weeks = df["month_year_week"].unique()
    weeks_datetime = pd.to_datetime(all_weeks)
    sorted_weeks_idx = weeks_datetime.argsort()
    all_weeks = all_weeks[sorted_weeks_idx]

    # Create mapping from week string to numeric index
    week_to_number = {week: idx for idx, week in enumerate(all_weeks)}

    # Merge back to get all activities
    cohort_data = pd.merge(
        df, first_trades[["trader_address", "cohort_week"]], on="trader_address"
    )

    # Calculate week number since first activity
    cohort_data["cohort_number"] = cohort_data["cohort_week"].map(week_to_number)
    cohort_data["activity_number"] = cohort_data["month_year_week"].map(week_to_number)
    cohort_data["week_number"] = (
        cohort_data["activity_number"] - cohort_data["cohort_number"]
    )

    # Calculate retention by cohort
    cohort_sizes = cohort_data.groupby("cohort_week")["trader_address"].nunique()
    retention_matrix = cohort_data.groupby(["cohort_week", "week_number"])[
        "trader_address"
    ].nunique()
    retention_matrix = retention_matrix.unstack(fill_value=0)

    # Convert to percentages
    retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100

    # Limit to max_weeks if specified
    if max_weeks is not None and max_weeks < retention_matrix.shape[1]:
        retention_matrix = retention_matrix.iloc[:, :max_weeks]

    return retention_matrix.round(2)


if __name__ == "__main__":
    # read trades dataset
    trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
    # First, create week numbers from timestamps
    trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"])
    trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
    trades_df["month_year_week"] = (
        trades_df["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
    )
    # Usage example:
    wow_retention = calculate_wow_retention(trades_df, trader_filter="Olas")
    rolling_retention = calculate_nweek_retention(trades_df, n_weeks=4)
    cohort_retention = calculate_cohort_retention(trades_df)