FireShadow's picture
Initial clean commit
1721aea
raw
history blame
2.7 kB
"""
Basic descriptive statistics for Difference in Means.
"""
from typing import Dict, Any
import pandas as pd
import numpy as np
import logging
logger = logging.getLogger(__name__)
def run_dim_diagnostics(df: pd.DataFrame, treatment: str, outcome: str) -> Dict[str, Any]:
"""
Calculates basic descriptive statistics for treatment and control groups.
Args:
df: Input DataFrame (should already be filtered for NaNs in treatment/outcome).
treatment: Name of the binary treatment variable column.
outcome: Name of the outcome variable column.
Returns:
Dictionary containing group means, standard deviations, and counts.
"""
details = {}
try:
grouped = df.groupby(treatment)[outcome]
stats = grouped.agg(['mean', 'std', 'count'])
# Ensure both groups (0 and 1) are present if possible
control_stats = stats.loc[0].to_dict() if 0 in stats.index else {'mean': np.nan, 'std': np.nan, 'count': 0}
treated_stats = stats.loc[1].to_dict() if 1 in stats.index else {'mean': np.nan, 'std': np.nan, 'count': 0}
details['control_group_stats'] = control_stats
details['treated_group_stats'] = treated_stats
if control_stats['count'] == 0 or treated_stats['count'] == 0:
logger.warning("One or both treatment groups have zero observations.")
return {"status": "Warning - Empty Group(s)", "details": details}
# Simple check for variance difference (Levene's test could be added)
control_std = control_stats.get('std', 0)
treated_std = treated_stats.get('std', 0)
if control_std > 0 and treated_std > 0:
ratio = (control_std**2) / (treated_std**2)
details['variance_ratio_control_div_treated'] = ratio
if ratio > 4 or ratio < 0.25: # Rule of thumb
details['variance_homogeneity_status'] = "Potentially Unequal (ratio > 4 or < 0.25)"
else:
details['variance_homogeneity_status'] = "Likely Similar"
else:
details['variance_homogeneity_status'] = "Could not calculate (zero variance in a group)"
return {"status": "Success", "details": details}
except KeyError as ke:
logger.error(f"KeyError during diagnostics: {ke}. Treatment levels might not be 0/1.")
return {"status": "Failed", "error": f"Treatment levels might not be 0/1: {ke}", "details": details}
except Exception as e:
logger.error(f"Error running Difference in Means diagnostics: {e}")
return {"status": "Failed", "error": str(e), "details": details}