In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gc

sns.set_style("darkgrid")

In [7]:
open_markets = pd.read_parquet("../data/markets_live_data.parquet")

In [8]:
open_markets.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 947 entries, 0 to 946
Data columns (total 23 columns):
 #   Column                              Non-Null Count  Dtype         
---  ------                              --------------  -----         
 0   creationTimestamp                   947 non-null    object        
 1   id                                  947 non-null    object        
 2   liquidityMeasure                    947 non-null    int64         
 3   liquidityParameter                  947 non-null    object        
 4   openingTimestamp                    947 non-null    object        
 5   outcomeTokenAmounts                 947 non-null    object        
 6   title                               947 non-null    object        
 7   sample_timestamp                    947 non-null    int64         
 8   open                                947 non-null    bool          
 9   total_trades                        947 non-null    int64         
 10  dist_gap_perc             

In [9]:
open_markets.open.value_counts()

open
False    834
True     113
Name: count, dtype: int64

In [3]:
all_trades = pd.read_parquet('../data/all_trades_profitability.parquet')

In [4]:
all_trades.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88355 entries, 0 to 88354
Data columns (total 21 columns):
 #   Column                  Non-Null Count  Dtype              
---  ------                  --------------  -----              
 0   trader_address          88355 non-null  object             
 1   market_creator          88355 non-null  object             
 2   trade_id                88355 non-null  object             
 3   creation_timestamp      88355 non-null  datetime64[ns, UTC]
 4   title                   88355 non-null  object             
 5   market_status           88355 non-null  object             
 6   collateral_amount       88355 non-null  float64            
 7   outcome_index           88355 non-null  object             
 8   trade_fee_amount        88355 non-null  float64            
 9   outcomes_tokens_traded  88355 non-null  float64            
 10  current_answer          88355 non-null  int64              
 11  is_invalid              88355 non-null  b

In [6]:
all_trades.market_status.value_counts()

market_status
CLOSED    8210
Name: count, dtype: int64

In [5]:
all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date

# Metrics we can compute at the trader agent level (for closed markets)


* ROI per market and per day (sorted by creation date)
* number of trades per market and per day
* net earnings
* earnings
* bet amount
* nr mech calls


In [14]:
volume_trades_per_trader_and_market = all_trades.groupby(["trader_address", "title"])["roi"].count().reset_index()
volume_trades_per_trader_and_market.rename(columns={"roi":"nr_trades_per_market"}, inplace=True)
volume_trades_per_trader_and_market.head()

Unnamed: 0,trader_address,title,nr_trades_per_market
0,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,Will Apple implement significant changes in th...,3
1,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,"Will Apple launch the iPhone 16, Watch, and Ai...",2
2,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,Will Bayer Leverkusen retain the Bundesliga ti...,1
3,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,Will Chick-fil-A successfully launch a streami...,2
4,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,Will Donald Trump's new cryptocurrency platfor...,1


# Adding multibet category

In [15]:
volume_trades_per_trader_and_market["multibet"] = volume_trades_per_trader_and_market.apply(lambda x: True if x.nr_trades_per_market > 1 else False, axis=1)

In [16]:
volume_trades_per_trader_and_market.head()

Unnamed: 0,trader_address,title,nr_trades_per_market,multibet
0,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,Will Apple implement significant changes in th...,3,True
1,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,"Will Apple launch the iPhone 16, Watch, and Ai...",2,True
2,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,Will Bayer Leverkusen retain the Bundesliga ti...,1,False
3,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,Will Chick-fil-A successfully launch a streami...,2,True
4,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,Will Donald Trump's new cryptocurrency platfor...,1,False


# Global dataset

In [17]:
trader_agent_metrics =  pd.merge(all_trades, volume_trades_per_trader_and_market, on=['trader_address', 'title'])

In [18]:
trader_agent_metrics.head()

Unnamed: 0,trader_address,market_creator,trade_id,creation_timestamp,title,market_status,collateral_amount,outcome_index,trade_fee_amount,outcomes_tokens_traded,...,earnings,redeemed,redeemed_amount,num_mech_calls,mech_fee_amount,net_earnings,roi,creation_date,nr_trades_per_market,multibet
0,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,quickstart,0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x02...,2024-08-25 02:37:35+00:00,Will the first floating offshore wind research...,CLOSED,0.450426,1,0.009009,0.729589,...,0.729589,False,0.0,2,0.02,0.250154,0.521769,2024-08-25,1,False
1,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,quickstart,0x012bf74d4c7060799f590c7c08accc0e9938e6c40x02...,2024-08-30 03:24:45+00:00,Will SpaceX's Polaris Dawn mission launch on 3...,CLOSED,0.419662,0,0.008393,0.610289,...,0.610289,False,0.0,1,0.01,0.172234,0.393179,2024-08-30,1,False
2,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,quickstart,0x02e93f85cbc48b380c725d58e85e083c112bd0180x02...,2024-08-26 02:48:20+00:00,Will Apple implement significant changes in th...,CLOSED,0.641732,1,0.012835,1.572272,...,0.0,False,0.0,4,0.04,-0.694567,-1.0,2024-08-26,3,True
3,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,quickstart,0x02e93f85cbc48b380c725d58e85e083c112bd0180x02...,2024-08-27 00:23:25+00:00,Will Apple implement significant changes in th...,CLOSED,0.482506,1,0.00965,1.013458,...,0.0,False,0.0,4,0.04,-0.532156,-1.0,2024-08-27,3,True
4,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,quickstart,0x02e93f85cbc48b380c725d58e85e083c112bd0180x02...,2024-08-28 01:41:30+00:00,Will Apple implement significant changes in th...,CLOSED,0.56793,1,0.011359,1.285445,...,0.0,False,0.0,4,0.04,-0.619289,-1.0,2024-08-28,3,True


In [20]:
trader_agent_metrics.sort_values(by=["trader_address", "title", "creation_timestamp"],inplace=True)

In [21]:
trader_agent_metrics.head()

Unnamed: 0,trader_address,market_creator,trade_id,creation_timestamp,title,market_status,collateral_amount,outcome_index,trade_fee_amount,outcomes_tokens_traded,...,earnings,redeemed,redeemed_amount,num_mech_calls,mech_fee_amount,net_earnings,roi,creation_date,nr_trades_per_market,multibet
2,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,quickstart,0x02e93f85cbc48b380c725d58e85e083c112bd0180x02...,2024-08-26 02:48:20+00:00,Will Apple implement significant changes in th...,CLOSED,0.641732,1,0.012835,1.572272,...,0.0,False,0.0,4,0.04,-0.694567,-1.0,2024-08-26,3,True
3,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,quickstart,0x02e93f85cbc48b380c725d58e85e083c112bd0180x02...,2024-08-27 00:23:25+00:00,Will Apple implement significant changes in th...,CLOSED,0.482506,1,0.00965,1.013458,...,0.0,False,0.0,4,0.04,-0.532156,-1.0,2024-08-27,3,True
4,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,quickstart,0x02e93f85cbc48b380c725d58e85e083c112bd0180x02...,2024-08-28 01:41:30+00:00,Will Apple implement significant changes in th...,CLOSED,0.56793,1,0.011359,1.285445,...,0.0,False,0.0,4,0.04,-0.619289,-1.0,2024-08-28,3,True
41,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,quickstart,0x493b27d17cd2672631b30f32115f52eb2ec101850x02...,2024-08-29 00:40:05+00:00,"Will Apple launch the iPhone 16, Watch, and Ai...",CLOSED,1.798721,1,0.035974,2.906159,...,2.906159,False,0.0,3,0.03,1.041463,0.558516,2024-08-29,2,True
42,0x022b36c50b85b8ae7addfb8a35d76c59d5814834,quickstart,0x493b27d17cd2672631b30f32115f52eb2ec101850x02...,2024-08-30 03:04:05+00:00,"Will Apple launch the iPhone 16, Watch, and Ai...",CLOSED,1.294382,1,0.025888,1.952878,...,1.952878,False,0.0,3,0.03,0.602608,0.446287,2024-08-30,2,True


## Adding weekly time window


In [22]:
trader_agent_metrics = trader_agent_metrics.sort_values(by="creation_timestamp", ascending=True)

trader_agent_metrics["month_year_week"] = (
    trader_agent_metrics["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d")
)

  trader_agent_metrics["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d")


In [23]:
trader_agent_metrics.head()

Unnamed: 0,trader_address,market_creator,trade_id,creation_timestamp,title,market_status,collateral_amount,outcome_index,trade_fee_amount,outcomes_tokens_traded,...,redeemed,redeemed_amount,num_mech_calls,mech_fee_amount,net_earnings,roi,creation_date,nr_trades_per_market,multibet,month_year_week
2209,0x95ecc70d9f4feb162ed9f41c4432d990c36c8f57,quickstart,0x33ec6978224941c04b51704cbc55943898c13a1b0x95...,2024-07-20 02:10:10+00:00,Will iOS 18's significant upgrade for the iPho...,CLOSED,0.16,1,0.0032,0.310165,...,False,0.0,3,0.03,-0.1932,-1.0,2024-07-20,2,True,Jul-21
6588,0xf089874165be0377680683fd5187a058dea82683,quickstart,0x5658c1834d053e92143956069926ccef1cb3c92b0xf0...,2024-07-20 05:18:00+00:00,Will Argentina win the Copa America 2024?,CLOSED,1.0,0,0.02,1.839649,...,True,1.839649,2,0.02,0.799649,0.768893,2024-07-20,2,True,Jul-21
1316,0x49f4e3d8edc85efda9b0a36d96e406a59b13fcc2,quickstart,0x33ec6978224941c04b51704cbc55943898c13a1b0x49...,2024-07-20 05:28:50+00:00,Will iOS 18's significant upgrade for the iPho...,CLOSED,1.070855,1,0.021417,1.925006,...,False,0.0,3,0.03,-1.122272,-1.0,2024-07-20,2,True,Jul-21
7776,0xe283e408c6017447da9fe092d52c386753699680,pearl,0xb13be57227a3e806b36c72ab1cd5792df2e5e8070xe2...,2024-07-20 05:51:20+00:00,Will a scientific study be published on 23 Jul...,CLOSED,1.0,1,0.02,1.839649,...,False,0.0,2,0.02,-1.04,-1.0,2024-07-20,1,False,Jul-21
6344,0x480e5b5abd27cd754745871116e79caf90468dd4,quickstart,0x5658c1834d053e92143956069926ccef1cb3c92b0x48...,2024-07-20 06:32:05+00:00,Will Argentina win the Copa America 2024?,CLOSED,1.2,0,0.024,1.964669,...,True,1.964669,2,0.02,0.720669,0.579316,2024-07-20,1,False,Jul-21


In [24]:
trader_agent_metrics.market_creator.value_counts()

market_creator
quickstart    6896
pearl         1314
Name: count, dtype: int64

In [26]:
DEFAULT_MECH_FEE = 0.01  # xDAI

In [41]:
from tqdm import tqdm

In [42]:
def compute_metrics(trader_address: str, trader_data: pd.DataFrame) -> dict:

    if len(trader_data) == 0:
        print("No data to compute metrics")
        return {}

    weekly_metrics = {}
    weekly_metrics["trader_address"] = trader_address
    total_net_earnings = trader_data.net_earnings.sum()
    total_bet_amounts = trader_data.collateral_amount.sum()
    total_num_mech_calls = trader_data.num_mech_calls.sum()
    weekly_metrics["net_earnings"] = total_net_earnings
    weekly_metrics["earnings"] = trader_data.earnings.sum()
    weekly_metrics["bet_amount"] = total_bet_amounts
    weekly_metrics["nr_mech_calls"] = total_num_mech_calls
    total_fee_amounts = trader_data.mech_fee_amount.sum()
    total_costs = (
        total_bet_amounts
        + total_fee_amounts
        + (total_num_mech_calls * DEFAULT_MECH_FEE)
    )
    weekly_metrics["roi"] = total_net_earnings / total_costs
    print(weekly_metrics)
    return weekly_metrics


def compute_trader_metrics_by_trader_type(
    trader_address: str, week_traders_data: pd.DataFrame, trader_type: str = "all"
) -> pd.DataFrame:
    """This function computes for a specific week the different metrics: roi, net_earnings, earnings, bet_amount, nr_mech_calls.
    The global roi of the trader agent by computing the individual net profit and the indivicual costs values
    achieved per market and dividing both.
    It is possible to filter by trader type: multibet, singlebet, all"""
    assert "trader_type" in week_traders_data.columns
    filtered_traders_data = week_traders_data.loc[
        week_traders_data["trader_address"] == trader_address
    ]

    if trader_type != "all":  # compute only for the specific type
        filtered_traders_data = filtered_traders_data.loc[
            filtered_traders_data["trader_type"] == trader_type
        ]
        if len(filtered_traders_data) == 0:
            return pd.DataFrame()  # No Data

    return compute_metrics(trader_address, filtered_traders_data)


def compute_trader_metrics_by_market_creator(
    trader_address: str, week_traders_data: pd.DataFrame, market_creator: str = "all"
) -> dict:
    """This function computes for a specific week the different metrics: roi, net_earnings, earnings, bet_amount, nr_mech_calls.
    The global roi of the trader agent by computing the individual net profit and the indivicual costs values
    achieved per market and dividing both.
    It is possible to filter by market creator: quickstart, pearl, all"""
    assert "market_creator" in week_traders_data.columns
    filtered_traders_data = week_traders_data.loc[
        week_traders_data["trader_address"] == trader_address
    ]
    if market_creator != "all":  # compute only for the specific market creator
        print(f"Filtering only specific market creators = {market_creator}")
        filtered_traders_data = filtered_traders_data.loc[
            filtered_traders_data["market_creator"] == market_creator
        ]
        if len(filtered_traders_data) == 0:
            print(f"No data. Skipping market creator {market_creator}")
            return {}  # No Data
    print(
        f"Volume of data for trader {trader_address} and market creator {market_creator} = {len(filtered_traders_data)}"
    )
    metrics = compute_metrics(trader_address, filtered_traders_data)
    return metrics


def merge_trader_metrics(
    trader: str, weekly_data: pd.DataFrame, week: str
) -> pd.DataFrame:
    trader_metrics = []
    # computation as specification 1 for all types of markets
    weekly_metrics_all = compute_trader_metrics_by_market_creator(
        trader, weekly_data, market_creator="all"
    )
    weekly_metrics_all["month_year_week"] = week
    weekly_metrics_all["market_creator"] = "all"
    trader_metrics.append(weekly_metrics_all)

    # computation as specification 1 for quickstart markets
    weekly_metrics_qs = compute_trader_metrics_by_market_creator(
        trader, weekly_data, market_creator="quickstart"
    )
    if len(weekly_metrics_qs) > 0:
        weekly_metrics_qs["month_year_week"] = week
        weekly_metrics_qs["market_creator"] = "quickstart"
        trader_metrics.append(weekly_metrics_qs)
    # computation as specification 1 for pearl markets
    weekly_metrics_pearl = compute_trader_metrics_by_market_creator(
        trader, weekly_data, market_creator="pearl"
    )
    if len(weekly_metrics_pearl) > 0:
        weekly_metrics_pearl["month_year_week"] = week
        weekly_metrics_pearl["market_creator"] = "pearl"
        trader_metrics.append(weekly_metrics_pearl)
    result = pd.DataFrame.from_dict(trader_metrics, orient="columns")
    print(f"Total length of all trader metrics for this week = {len(result)}")
    print(result.head())
    return result


def compute_weekly_metrics_by_market_creator(
    trader_agents_data: pd.DataFrame,
) -> pd.DataFrame:
    """Function to compute the metrics at the trader level per week and with different categories by market creator"""
    contents = []
    all_weeks = list(trader_agents_data.month_year_week.unique())
    for week in all_weeks:
        weekly_data = trader_agents_data.loc[
            trader_agents_data["month_year_week"] == week
        ]
        print(f"Computing weekly metrics for week ={week} by market creator")
        # traverse each trader agent
        traders = list(weekly_data.trader_address.unique())
        for trader in tqdm(traders, desc=f"Trader' metrics", unit="metrics"):
        #for trader in traders:
            contents.append(merge_trader_metrics(trader, weekly_data, week))
        break
    print("End computing all weekly metrics by market creator")
    return pd.concat(contents, ignore_index=True)

In [None]:
compute_weekly_metrics_by_market_creator(trader_agent_metrics)

In [2]:
tools_df = pd.read_parquet("../tmp/tools.parquet")

In [3]:
tools_df.error.value_counts()

error
0    780323
1    214538
Name: count, dtype: int64

In [4]:
len(tools_df)

994861

In [5]:
(214538/994861)*100

21.564620585187278