FireShadow's picture
Initial clean commit
1721aea
"""
Basic descriptive statistics for Difference in Means.
"""
from typing import Dict, Any
import pandas as pd
import numpy as np
import logging
logger = logging.getLogger(__name__)
def run_dim_diagnostics(df: pd.DataFrame, treatment: str, outcome: str) -> Dict[str, Any]:
"""
Calculates basic descriptive statistics for treatment and control groups.
Args:
df: Input DataFrame (should already be filtered for NaNs in treatment/outcome).
treatment: Name of the binary treatment variable column.
outcome: Name of the outcome variable column.
Returns:
Dictionary containing group means, standard deviations, and counts.
"""
details = {}
try:
grouped = df.groupby(treatment)[outcome]
stats = grouped.agg(['mean', 'std', 'count'])
# Ensure both groups (0 and 1) are present if possible
control_stats = stats.loc[0].to_dict() if 0 in stats.index else {'mean': np.nan, 'std': np.nan, 'count': 0}
treated_stats = stats.loc[1].to_dict() if 1 in stats.index else {'mean': np.nan, 'std': np.nan, 'count': 0}
details['control_group_stats'] = control_stats
details['treated_group_stats'] = treated_stats
if control_stats['count'] == 0 or treated_stats['count'] == 0:
logger.warning("One or both treatment groups have zero observations.")
return {"status": "Warning - Empty Group(s)", "details": details}
# Simple check for variance difference (Levene's test could be added)
control_std = control_stats.get('std', 0)
treated_std = treated_stats.get('std', 0)
if control_std > 0 and treated_std > 0:
ratio = (control_std**2) / (treated_std**2)
details['variance_ratio_control_div_treated'] = ratio
if ratio > 4 or ratio < 0.25: # Rule of thumb
details['variance_homogeneity_status'] = "Potentially Unequal (ratio > 4 or < 0.25)"
else:
details['variance_homogeneity_status'] = "Likely Similar"
else:
details['variance_homogeneity_status'] = "Could not calculate (zero variance in a group)"
return {"status": "Success", "details": details}
except KeyError as ke:
logger.error(f"KeyError during diagnostics: {ke}. Treatment levels might not be 0/1.")
return {"status": "Failed", "error": f"Treatment levels might not be 0/1: {ke}", "details": details}
except Exception as e:
logger.error(f"Error running Difference in Means diagnostics: {e}")
return {"status": "Failed", "error": str(e), "details": details}