Spaces:
Running
Running
File size: 3,271 Bytes
1721aea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import pytest
import pandas as pd
import numpy as np
import statsmodels.api as sm
from auto_causal.methods.linear_regression.diagnostics import run_lr_diagnostics
# Reuse the sample data fixture from estimator tests
@pytest.fixture
def sample_data():
"""Generates simple synthetic data for testing LR."""
np.random.seed(42)
n_samples = 100
treatment_effect = 2.0
X1 = np.random.normal(0, 1, n_samples)
X2 = np.random.normal(5, 2, n_samples)
treatment = np.random.binomial(1, 0.5, n_samples)
error = np.random.normal(0, 1, n_samples)
outcome = 1.0 + treatment_effect * treatment + 0.5 * X1 - 1.5 * X2 + error
df = pd.DataFrame({
'outcome': outcome,
'treatment': treatment,
'covariate1': X1,
'covariate2': X2
})
return df
def test_run_lr_diagnostics_implementation(sample_data):
"""Tests the implemented diagnostics function with real results."""
# Run a regression to get a real results object
df_analysis = sample_data.dropna()
covariates = ['covariate1', 'covariate2']
X = df_analysis[['treatment'] + covariates]
X = sm.add_constant(X)
y = df_analysis['outcome']
model = sm.OLS(y, X)
results = model.fit()
# Run diagnostics
diagnostics = run_lr_diagnostics(results, X)
assert isinstance(diagnostics, dict)
assert diagnostics["status"] == "Success"
assert "details" in diagnostics
details = diagnostics["details"]
# Check for key diagnostic metrics
assert "r_squared" in details
assert "adj_r_squared" in details
assert "f_statistic" in details
assert "f_p_value" in details
assert "n_observations" in details
assert "degrees_of_freedom_resid" in details
# Check normality test results
assert "residuals_normality_jb_stat" in details
assert "residuals_normality_jb_p_value" in details
assert "residuals_skewness" in details
assert "residuals_kurtosis" in details
assert "residuals_normality_status" in details
assert isinstance(details["residuals_normality_status"], str)
# Check homoscedasticity test results
assert "homoscedasticity_bp_lm_stat" in details
assert "homoscedasticity_bp_lm_p_value" in details
assert "homoscedasticity_bp_f_stat" in details
assert "homoscedasticity_bp_f_p_value" in details
assert "homoscedasticity_status" in details
assert isinstance(details["homoscedasticity_status"], str)
# Check placeholder statuses
assert "linearity_check" in details
assert "multicollinearity_check" in details
assert details["linearity_check"] == "Requires visual inspection (e.g., residual vs fitted plot)"
assert details["multicollinearity_check"] == "Not Implemented (Requires VIF)"
# Check types (basic)
assert isinstance(details["r_squared"], float)
assert isinstance(details["f_p_value"], float)
assert isinstance(details["n_observations"], int)
def test_run_lr_diagnostics_failure():
"""Test diagnostic failure mode (e.g., passing wrong object)."""
# Pass a non-results object
diagnostics = run_lr_diagnostics("not a results object", pd.DataFrame({'const': [1]}))
assert diagnostics["status"] == "Failed"
assert "error" in diagnostics
|