In [1]:
import random
from datetime import datetime, timedelta
import pandas as pd
import numpy as np


# Function to generate random timestamps and response times
def generate_random_data(date, start_time, end_time, count, response_time_range, null_percentage):
    # Combine date with start and end times
    start_datetime = datetime.combine(date, start_time)
    end_datetime = datetime.combine(date, end_time)
    
    # Generate random timestamps
    random_timestamps = [
        start_datetime + timedelta(seconds=random.randint(0, int((end_datetime - start_datetime).total_seconds())))
        for _ in range(count)
    ]
    
    # Sort the timestamps
    random_timestamps.sort()
    
    # Generate random response times
    random_response_times = [
        random.randint(response_time_range[0], response_time_range[1]) for _ in range(count)
    ]
    
    # Introduce null values in response times
    null_count = int(null_percentage * count)
    null_indices = random.sample(range(count), null_count)
    for idx in null_indices:
        random_response_times[idx] = None
    
    # Create a pandas DataFrame
    data = {
        'Timestamp': random_timestamps,
        'ResponseTime(ms)': random_response_times
    }
    df = pd.DataFrame(data)
    return df

# Parameters
date = datetime.strptime('2024-07-26', '%Y-%m-%d').date()
start_time = datetime.strptime('12:00:00', '%H:%M:%S').time()
end_time = datetime.strptime('12:30:00', '%H:%M:%S').time()
count = 60
response_time_range = (100, 250)
null_percentage = 0.50

# Generate random data and get the DataFrame
df = generate_random_data(date, start_time, end_time, count, response_time_range, null_percentage)
df.head()


Unnamed: 0,Timestamp,ResponseTime(ms)
0,2024-07-26 12:00:12,169.0
1,2024-07-26 12:00:27,
2,2024-07-26 12:00:42,
3,2024-07-26 12:00:57,146.0
4,2024-07-26 12:01:30,202.0


In [109]:
# Function to calculate the specified percentile of response times over specified frequency
def calculate_percentile(df, freq, percentile):
    """
    freq: Frequency for grouping the data (e.g., '1Min', '5Min', '1H')
    percentile: Percentile value (e.g., 0.95, 0.99)
    """
    percentile_df = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"].quantile(percentile).reset_index(name=f"p{int(percentile*100)}_ResponseTime(ms)")
    percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
    return percentile_df



# df.groupby(pd.Grouper(key='Timestamp', freq='1Min'))["ResponseTime(ms)"]\
#     .quantile(0.95).reset_index(name="p95_ResponseTime(ms)")

percentile_df = calculate_percentile(df, '1min', 0.95)
percentile_df.head(10)

Unnamed: 0,Timestamp,p95_ResponseTime(ms)
0,2024-07-26 12:02:00,
1,2024-07-26 12:03:00,
2,2024-07-26 12:04:00,184.8
3,2024-07-26 12:05:00,
4,2024-07-26 12:06:00,181.3
5,2024-07-26 12:07:00,223.0
6,2024-07-26 12:08:00,196.2
7,2024-07-26 12:09:00,151.0
8,2024-07-26 12:10:00,
9,2024-07-26 12:11:00,227.45


In [82]:
percentile_df["p95_ResponseTime(ms)"].values

array([229.8 ,    nan, 224.  ,    nan, 234.  ,    nan, 162.5 ,    nan,
       136.  ,    nan, 205.35,    nan,    nan, 183.  , 241.  , 221.8 ,
          nan, 116.4 , 174.65, 133.35, 176.  , 127.  , 209.85, 207.  ,
       200.  , 241.25, 217.  ,    nan, 188.7 , 188.  ])

In [81]:
def chunk_list(input_list, size=3):
    while input_list:
        chunk = input_list[:size]
        yield chunk
        input_list = input_list[size:]


# for chunk in chunk_list(list(percentile_df["p95_ResponseTime(ms)"].values)):
#     print(chunk)


In [72]:
s_ = "0-X-X"
# len(s_) - s_.count("-")

3

In [112]:
def evaluate_alarm_state(percentile_df, percentile_value, threshold, datapoints_to_alarm, evaluation_range=5):
    data_points = list(percentile_df[f"p{int(percentile_value*100)}_ResponseTime(ms)"].values)
    
    data_table_dict = {
        "DataPoints": [],
        "# of data points that must be filled": [],
        "MISSING": [],
        "IGNORE": [],
        "BREACHING": [],
        "NOT BREACHING": []
    }
    
    for chunk in chunk_list(data_points, size=evaluation_range):
        data_point_repr = ''
        num_dp_that_must_be_filled = 0
        # missing_state, ignore_state, breaching_state, not_breaching_state = None, None, None, None
        for dp in chunk:
            if dp is None:
                data_point_repr += '-'
            elif dp < threshold:
                data_point_repr += '0'
            else:
                data_point_repr += 'X'
        
        # Fill the remaining data points with '-' if the chunk is less than evaluation_range
        if len(chunk) < evaluation_range:
            data_point_repr += '-'*(evaluation_range - len(chunk))
                
        if data_point_repr.count('-') > (evaluation_range - datapoints_to_alarm):
            num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('0'), data_point_repr.count('X')])
        
        
        data_table_dict["DataPoints"].append(data_point_repr)
        data_table_dict["# of data points that must be filled"].append(num_dp_that_must_be_filled)
    
    data_table_dict["MISSING"] = [""]*len(data_table_dict["DataPoints"])
    data_table_dict["IGNORE"] = [""]*len(data_table_dict["DataPoints"])
    data_table_dict["BREACHING"] = [""]*len(data_table_dict["DataPoints"])
    data_table_dict["NOT BREACHING"] = [""]*len(data_table_dict["DataPoints"])
    
    return pd.DataFrame(data_table_dict)


evaluate_alarm_state(
    percentile_df=percentile_df,
    threshold=150,
    percentile_value=0.95,
    datapoints_to_alarm=3,
)

Unnamed: 0,DataPoints,# of data points that must be filled,MISSING,IGNORE,BREACHING,NOT BREACHING
0,--X-X,1,,,,
1,XXX-X,0,,,,
2,XX0--,0,,,,
3,XXXXX,0,,,,
4,X00-X,0,,,,
5,--X--,2,,,,
