Spaces:
Running
Running
""" | |
Basic descriptive statistics for Difference in Means. | |
""" | |
from typing import Dict, Any | |
import pandas as pd | |
import numpy as np | |
import logging | |
logger = logging.getLogger(__name__) | |
def run_dim_diagnostics(df: pd.DataFrame, treatment: str, outcome: str) -> Dict[str, Any]: | |
""" | |
Calculates basic descriptive statistics for treatment and control groups. | |
Args: | |
df: Input DataFrame (should already be filtered for NaNs in treatment/outcome). | |
treatment: Name of the binary treatment variable column. | |
outcome: Name of the outcome variable column. | |
Returns: | |
Dictionary containing group means, standard deviations, and counts. | |
""" | |
details = {} | |
try: | |
grouped = df.groupby(treatment)[outcome] | |
stats = grouped.agg(['mean', 'std', 'count']) | |
# Ensure both groups (0 and 1) are present if possible | |
control_stats = stats.loc[0].to_dict() if 0 in stats.index else {'mean': np.nan, 'std': np.nan, 'count': 0} | |
treated_stats = stats.loc[1].to_dict() if 1 in stats.index else {'mean': np.nan, 'std': np.nan, 'count': 0} | |
details['control_group_stats'] = control_stats | |
details['treated_group_stats'] = treated_stats | |
if control_stats['count'] == 0 or treated_stats['count'] == 0: | |
logger.warning("One or both treatment groups have zero observations.") | |
return {"status": "Warning - Empty Group(s)", "details": details} | |
# Simple check for variance difference (Levene's test could be added) | |
control_std = control_stats.get('std', 0) | |
treated_std = treated_stats.get('std', 0) | |
if control_std > 0 and treated_std > 0: | |
ratio = (control_std**2) / (treated_std**2) | |
details['variance_ratio_control_div_treated'] = ratio | |
if ratio > 4 or ratio < 0.25: # Rule of thumb | |
details['variance_homogeneity_status'] = "Potentially Unequal (ratio > 4 or < 0.25)" | |
else: | |
details['variance_homogeneity_status'] = "Likely Similar" | |
else: | |
details['variance_homogeneity_status'] = "Could not calculate (zero variance in a group)" | |
return {"status": "Success", "details": details} | |
except KeyError as ke: | |
logger.error(f"KeyError during diagnostics: {ke}. Treatment levels might not be 0/1.") | |
return {"status": "Failed", "error": f"Treatment levels might not be 0/1: {ke}", "details": details} | |
except Exception as e: | |
logger.error(f"Error running Difference in Means diagnostics: {e}") | |
return {"status": "Failed", "error": str(e), "details": details} | |