File size: 4,550 Bytes
1721aea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import pytest
import pandas as pd
import numpy as np
from auto_causal.methods.instrumental_variable.diagnostics import (
    calculate_first_stage_f_statistic,
    run_overidentification_test
)
from statsmodels.sandbox.regression.gmm import IV2SLS

# Fixture for basic IV data
@pytest.fixture
def iv_data():
    np.random.seed(42)
    n = 1000
    Z1 = np.random.normal(0, 1, n) # Instrument 1
    Z2 = np.random.normal(0, 1, n) # Instrument 2 (for over-ID test)
    W = np.random.normal(0, 1, n)  # Exogenous Covariate
    U = np.random.normal(0, 1, n)  # Unobserved Confounder

    # Strong Instrument Case
    T_strong = 0.5 * Z1 + 0.5 * W + 0.5 * U + np.random.normal(0, 1, n)
    Y_strong = 2.0 * T_strong + 1.0 * W + 1.0 * U + np.random.normal(0, 1, n)

    # Weak Instrument Case
    T_weak = 0.05 * Z1 + 0.5 * W + 0.5 * U + np.random.normal(0, 1, n)
    Y_weak = 2.0 * T_weak + 1.0 * W + 1.0 * U + np.random.normal(0, 1, n)

    df_strong = pd.DataFrame({'Y': Y_strong, 'T': T_strong, 'Z1': Z1, 'Z2': Z2, 'W': W, 'U': U})
    df_weak = pd.DataFrame({'Y': Y_weak, 'T': T_weak, 'Z1': Z1, 'Z2': Z2, 'W': W, 'U': U})

    return df_strong, df_weak


def test_calculate_first_stage_f_statistic_strong(iv_data):
    df_strong, _ = iv_data
    f_stat, p_val = calculate_first_stage_f_statistic(
        df=df_strong, treatment='T', instruments=['Z1'], covariates=['W']
    )
    assert f_stat is not None
    assert p_val is not None
    assert f_stat > 10 # Expect strong instrument
    assert p_val < 0.01 # Expect significance

def test_calculate_first_stage_f_statistic_weak(iv_data):
    _, df_weak = iv_data
    f_stat, p_val = calculate_first_stage_f_statistic(
        df=df_weak, treatment='T', instruments=['Z1'], covariates=['W']
    )
    assert f_stat is not None
    assert p_val is not None
    # Note: With random noise, weak instrument test might occasionally pass 10, but should be low
    assert f_stat < 15 # Check it's not extremely high
    # P-value might still be significant if sample size is large

def test_calculate_first_stage_f_statistic_no_instruments(caplog):
    """Test graceful handling when no instruments are provided."""
    df = pd.DataFrame({'T': [1, 2], 'W': [3, 4]})
    # Should now return (None, None) and log a warning, not raise Exception
    # with pytest.raises(Exception): # OLD assertion
    #      calculate_first_stage_f_statistic(
    #         df=df, treatment='T', instruments=[], covariates=['W']
    #      )
    f_stat, p_val = calculate_first_stage_f_statistic(
        df=df, treatment='T', instruments=[], covariates=['W']
    )
    assert f_stat is None
    assert p_val is None
    assert "No instruments provided" in caplog.text # Check log message


def test_run_overidentification_test_applicable(iv_data):
    df_strong, _ = iv_data
    # Need to run statsmodels IV first to get results object
    df_copy = df_strong.copy()
    df_copy['intercept'] = 1
    endog = df_copy['Y']
    exog_vars = ['intercept', 'W', 'T']
    instrument_vars = ['intercept', 'W', 'Z1', 'Z2'] # Z1, Z2 are instruments

    iv_model = IV2SLS(endog=endog, exog=df_copy[exog_vars], instrument=df_copy[instrument_vars])
    sm_results = iv_model.fit()

    stat, p_val, status = run_overidentification_test(
        sm_results=sm_results,
        df=df_strong,
        treatment='T',
        outcome='Y',
        instruments=['Z1', 'Z2'],
        covariates=['W']
    )

    assert "Test successful" in status
    assert stat is not None
    assert p_val is not None
    assert stat >= 0
    # In this correctly specified model, we expect the test to NOT reject H0 (p > 0.05)
    assert p_val > 0.05

def test_run_overidentification_test_not_applicable(iv_data):
    df_strong, _ = iv_data
    # Only one instrument
    stat, p_val, status = run_overidentification_test(
        sm_results=None, # Not needed if not applicable
        df=df_strong,
        treatment='T',
        outcome='Y',
        instruments=['Z1'],
        covariates=['W']
    )
    assert stat is None
    assert p_val is None
    assert "not applicable" in status.lower()

def test_run_overidentification_test_no_sm_results(iv_data):
    df_strong, _ = iv_data
    # More than one instrument, but no sm_results provided
    stat, p_val, status = run_overidentification_test(
        sm_results=None,
        df=df_strong,
        treatment='T',
        outcome='Y',
        instruments=['Z1', 'Z2'],
        covariates=['W']
    )
    assert stat is None
    assert p_val is None
    assert "object not available" in status.lower()