Spaces:
Running
Running
import pytest | |
import pandas as pd | |
import numpy as np | |
import statsmodels.api as sm | |
from auto_causal.methods.linear_regression.diagnostics import run_lr_diagnostics | |
# Reuse the sample data fixture from estimator tests | |
def sample_data(): | |
"""Generates simple synthetic data for testing LR.""" | |
np.random.seed(42) | |
n_samples = 100 | |
treatment_effect = 2.0 | |
X1 = np.random.normal(0, 1, n_samples) | |
X2 = np.random.normal(5, 2, n_samples) | |
treatment = np.random.binomial(1, 0.5, n_samples) | |
error = np.random.normal(0, 1, n_samples) | |
outcome = 1.0 + treatment_effect * treatment + 0.5 * X1 - 1.5 * X2 + error | |
df = pd.DataFrame({ | |
'outcome': outcome, | |
'treatment': treatment, | |
'covariate1': X1, | |
'covariate2': X2 | |
}) | |
return df | |
def test_run_lr_diagnostics_implementation(sample_data): | |
"""Tests the implemented diagnostics function with real results.""" | |
# Run a regression to get a real results object | |
df_analysis = sample_data.dropna() | |
covariates = ['covariate1', 'covariate2'] | |
X = df_analysis[['treatment'] + covariates] | |
X = sm.add_constant(X) | |
y = df_analysis['outcome'] | |
model = sm.OLS(y, X) | |
results = model.fit() | |
# Run diagnostics | |
diagnostics = run_lr_diagnostics(results, X) | |
assert isinstance(diagnostics, dict) | |
assert diagnostics["status"] == "Success" | |
assert "details" in diagnostics | |
details = diagnostics["details"] | |
# Check for key diagnostic metrics | |
assert "r_squared" in details | |
assert "adj_r_squared" in details | |
assert "f_statistic" in details | |
assert "f_p_value" in details | |
assert "n_observations" in details | |
assert "degrees_of_freedom_resid" in details | |
# Check normality test results | |
assert "residuals_normality_jb_stat" in details | |
assert "residuals_normality_jb_p_value" in details | |
assert "residuals_skewness" in details | |
assert "residuals_kurtosis" in details | |
assert "residuals_normality_status" in details | |
assert isinstance(details["residuals_normality_status"], str) | |
# Check homoscedasticity test results | |
assert "homoscedasticity_bp_lm_stat" in details | |
assert "homoscedasticity_bp_lm_p_value" in details | |
assert "homoscedasticity_bp_f_stat" in details | |
assert "homoscedasticity_bp_f_p_value" in details | |
assert "homoscedasticity_status" in details | |
assert isinstance(details["homoscedasticity_status"], str) | |
# Check placeholder statuses | |
assert "linearity_check" in details | |
assert "multicollinearity_check" in details | |
assert details["linearity_check"] == "Requires visual inspection (e.g., residual vs fitted plot)" | |
assert details["multicollinearity_check"] == "Not Implemented (Requires VIF)" | |
# Check types (basic) | |
assert isinstance(details["r_squared"], float) | |
assert isinstance(details["f_p_value"], float) | |
assert isinstance(details["n_observations"], int) | |
def test_run_lr_diagnostics_failure(): | |
"""Test diagnostic failure mode (e.g., passing wrong object).""" | |
# Pass a non-results object | |
diagnostics = run_lr_diagnostics("not a results object", pd.DataFrame({'const': [1]})) | |
assert diagnostics["status"] == "Failed" | |
assert "error" in diagnostics | |