File size: 3,271 Bytes
1721aea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import pytest
import pandas as pd
import numpy as np
import statsmodels.api as sm
from auto_causal.methods.linear_regression.diagnostics import run_lr_diagnostics

# Reuse the sample data fixture from estimator tests
@pytest.fixture
def sample_data():
    """Generates simple synthetic data for testing LR."""
    np.random.seed(42)
    n_samples = 100
    treatment_effect = 2.0
    X1 = np.random.normal(0, 1, n_samples)
    X2 = np.random.normal(5, 2, n_samples)
    treatment = np.random.binomial(1, 0.5, n_samples)
    error = np.random.normal(0, 1, n_samples)
    outcome = 1.0 + treatment_effect * treatment + 0.5 * X1 - 1.5 * X2 + error
    
    df = pd.DataFrame({
        'outcome': outcome,
        'treatment': treatment,
        'covariate1': X1,
        'covariate2': X2
    })
    return df

def test_run_lr_diagnostics_implementation(sample_data):
    """Tests the implemented diagnostics function with real results."""
    # Run a regression to get a real results object
    df_analysis = sample_data.dropna()
    covariates = ['covariate1', 'covariate2']
    X = df_analysis[['treatment'] + covariates]
    X = sm.add_constant(X)
    y = df_analysis['outcome']
    model = sm.OLS(y, X)
    results = model.fit()
    
    # Run diagnostics
    diagnostics = run_lr_diagnostics(results, X) 
    
    assert isinstance(diagnostics, dict)
    assert diagnostics["status"] == "Success"
    assert "details" in diagnostics
    details = diagnostics["details"]
    
    # Check for key diagnostic metrics
    assert "r_squared" in details
    assert "adj_r_squared" in details
    assert "f_statistic" in details
    assert "f_p_value" in details
    assert "n_observations" in details
    assert "degrees_of_freedom_resid" in details
    
    # Check normality test results
    assert "residuals_normality_jb_stat" in details
    assert "residuals_normality_jb_p_value" in details
    assert "residuals_skewness" in details
    assert "residuals_kurtosis" in details
    assert "residuals_normality_status" in details
    assert isinstance(details["residuals_normality_status"], str)

    # Check homoscedasticity test results
    assert "homoscedasticity_bp_lm_stat" in details
    assert "homoscedasticity_bp_lm_p_value" in details
    assert "homoscedasticity_bp_f_stat" in details
    assert "homoscedasticity_bp_f_p_value" in details
    assert "homoscedasticity_status" in details
    assert isinstance(details["homoscedasticity_status"], str)
    
    # Check placeholder statuses
    assert "linearity_check" in details
    assert "multicollinearity_check" in details
    assert details["linearity_check"] == "Requires visual inspection (e.g., residual vs fitted plot)"
    assert details["multicollinearity_check"] == "Not Implemented (Requires VIF)"
    
    # Check types (basic)
    assert isinstance(details["r_squared"], float)
    assert isinstance(details["f_p_value"], float)
    assert isinstance(details["n_observations"], int)

def test_run_lr_diagnostics_failure():
    """Test diagnostic failure mode (e.g., passing wrong object)."""
    # Pass a non-results object
    diagnostics = run_lr_diagnostics("not a results object", pd.DataFrame({'const': [1]}))
    assert diagnostics["status"] == "Failed"
    assert "error" in diagnostics