# Placeholder for IV-specific diagnostic functions import pandas as pd import statsmodels.api as sm from statsmodels.regression.linear_model import OLS # from statsmodels.sandbox.regression.gmm import IV2SLSResults # Removed problematic import from typing import Dict, Any, List, Tuple, Optional import logging # Import logging import numpy as np # Import numpy for np.zeros # Configure logger logger = logging.getLogger(__name__) def calculate_first_stage_f_statistic(df: pd.DataFrame, treatment: str, instruments: List[str], covariates: List[str]) -> Tuple[Optional[float], Optional[float]]: """ Calculates the F-statistic for instrument relevance in the first stage regression. Regresses treatment ~ instruments + covariates. Tests the joint significance of the instrument coefficients. Args: df: Input DataFrame. treatment: Name of the treatment variable. instruments: List of instrument variable names. covariates: List of covariate names. Returns: A tuple containing (F-statistic, p-value). Returns (None, None) on error. """ logger.info("Diagnostics: Calculating First-Stage F-statistic...") try: df_copy = df.copy() df_copy['intercept'] = 1 exog_vars = ['intercept'] + covariates all_first_stage_exog = list(dict.fromkeys(exog_vars + instruments)) # Ensure unique columns endog = df_copy[treatment] exog = df_copy[all_first_stage_exog] # Check for perfect multicollinearity before fitting if exog.shape[1] > 1: corr_matrix = exog.corr() # Check if correlation matrix calculation failed (e.g., constant columns) or high correlation if corr_matrix.isnull().values.any() or (corr_matrix.abs() > 0.9999).sum().sum() > exog.shape[1]: # Check off-diagonal elements logger.warning("High multicollinearity or constant column detected in first stage exogenous variables.") # Note: statsmodels OLS might handle perfect collinearity by dropping columns, but F-test might be unreliable. first_stage_model = OLS(endog, exog).fit() # Construct the restriction matrix (R) to test H0: instrument coeffs = 0 num_instruments = len(instruments) if num_instruments == 0: logger.warning("No instruments provided for F-statistic calculation.") return None, None num_exog_total = len(all_first_stage_exog) # Ensure instruments are actually in the fitted model's exog names (in case statsmodels dropped some) fitted_exog_names = first_stage_model.model.exog_names valid_instruments = [inst for inst in instruments if inst in fitted_exog_names] if not valid_instruments: logger.error("None of the provided instruments were included in the first-stage regression model (possibly due to collinearity).") return None, None if len(valid_instruments) < len(instruments): logger.warning(f"Instruments dropped by OLS: {set(instruments) - set(valid_instruments)}") instrument_indices = [fitted_exog_names.index(inst) for inst in valid_instruments] # Need to adjust R matrix size based on fitted model's exog R = np.zeros((len(valid_instruments), len(fitted_exog_names))) for i, idx in enumerate(instrument_indices): R[i, idx] = 1 # Perform F-test f_test_result = first_stage_model.f_test(R) f_statistic = float(f_test_result.fvalue) p_value = float(f_test_result.pvalue) logger.info(f" F-statistic: {f_statistic:.4f}, p-value: {p_value:.4f}") return f_statistic, p_value except Exception as e: logger.error(f"Error calculating first-stage F-statistic: {e}", exc_info=True) return None, None def run_overidentification_test(sm_results: Optional[Any], df: pd.DataFrame, treatment: str, outcome: str, instruments: List[str], covariates: List[str]) -> Tuple[Optional[float], Optional[float], Optional[str]]: """ Runs an overidentification test (Sargan-Hansen) if applicable. This test is only valid if the number of instruments exceeds the number of endogenous regressors (typically 1, the treatment variable). Requires results from a statsmodels IV estimation. Args: sm_results: The fitted results object from statsmodels IV2SLS.fit(). df: Input DataFrame. treatment: Name of the treatment variable. outcome: Name of the outcome variable. instruments: List of instrument variable names. covariates: List of covariate names. Returns: Tuple: (test_statistic, p_value, status_message) or (None, None, error_message) """ logger.info("Diagnostics: Running Overidentification Test...") num_instruments = len(instruments) num_endog = 1 # Assuming only one treatment variable is endogenous if num_instruments <= num_endog: logger.info(" Over-ID test not applicable (model is exactly identified or underidentified).") return None, None, "Test not applicable (Need more instruments than endogenous regressors)" if sm_results is None or not hasattr(sm_results, 'resid'): logger.warning(" Over-ID test requires valid statsmodels results object with residuals.") return None, None, "Statsmodels results object not available or invalid for test." try: # Statsmodels IV2SLSResults does not seem to have a direct method for this test (as of common versions). # We need to calculate it manually using residuals and instruments. # Formula: N * R^2 from regressing residuals (u_hat) on all exogenous variables (instruments + covariates). # Degrees of freedom = num_instruments - num_endogenous_vars residuals = sm_results.resid df_copy = df.copy() df_copy['intercept'] = 1 exog_vars = ['intercept'] + covariates all_exog_instruments = list(dict.fromkeys(exog_vars + instruments)) # Ensure columns exist in the dataframe before selecting missing_cols = [col for col in all_exog_instruments if col not in df_copy.columns] if missing_cols: raise ValueError(f"Missing columns required for Over-ID test: {missing_cols}") exog_for_test = df_copy[all_exog_instruments] # Check shapes match after potential NA handling in main estimator if len(residuals) != exog_for_test.shape[0]: # Attempt to align based on index if lengths differ (might happen if NAs were dropped) logger.warning(f"Residual length ({len(residuals)}) differs from exog_for_test rows ({exog_for_test.shape[0]}). Trying to align indices.") common_index = residuals.index.intersection(exog_for_test.index) if len(common_index) == 0: raise ValueError("Cannot align residuals and exogenous variables for Over-ID test after NA handling.") residuals = residuals.loc[common_index] exog_for_test = exog_for_test.loc[common_index] logger.warning(f"Aligned to {len(common_index)} common observations.") # Regress residuals on all exogenous instruments aux_model = OLS(residuals, exog_for_test).fit() r_squared = aux_model.rsquared n_obs = len(residuals) # Use length of residuals after potential alignment test_statistic = n_obs * r_squared # Calculate p-value from Chi-squared distribution from scipy.stats import chi2 degrees_of_freedom = num_instruments - num_endog if degrees_of_freedom < 0: # This shouldn't happen if the initial check passed, but as a safeguard raise ValueError("Degrees of freedom for Sargan test are negative.") elif degrees_of_freedom == 0: # R-squared should be 0 if exactly identified, but handle edge case p_value = 1.0 if np.isclose(test_statistic, 0) else 0.0 else: p_value = chi2.sf(test_statistic, degrees_of_freedom) logger.info(f" Sargan Test Statistic: {test_statistic:.4f}, p-value: {p_value:.4f}, df: {degrees_of_freedom}") return test_statistic, p_value, "Test successful" except Exception as e: logger.error(f"Error running overidentification test: {e}", exc_info=True) return None, None, f"Error during test: {e}" def run_iv_diagnostics(df: pd.DataFrame, treatment: str, outcome: str, instruments: List[str], covariates: List[str], sm_results: Optional[Any] = None, dw_results: Optional[Any] = None) -> Dict[str, Any]: """ Runs standard IV diagnostic checks. Args: df: Input DataFrame. treatment: Name of the treatment variable. outcome: Name of the outcome variable. instruments: List of instrument variable names. covariates: List of covariate names. sm_results: Optional fitted results object from statsmodels IV2SLS.fit(). dw_results: Optional results object from DoWhy (structure may vary). Returns: Dictionary containing diagnostic results. """ diagnostics = {} # 1. Instrument Relevance / Weak Instrument Test (First-Stage F-statistic) f_stat, f_p_val = calculate_first_stage_f_statistic(df, treatment, instruments, covariates) diagnostics['first_stage_f_statistic'] = f_stat diagnostics['first_stage_p_value'] = f_p_val diagnostics['is_instrument_weak'] = (f_stat < 10) if f_stat is not None else None # Common rule of thumb if f_stat is None: diagnostics['weak_instrument_test_status'] = "Error during calculation" elif diagnostics['is_instrument_weak']: diagnostics['weak_instrument_test_status'] = "Warning: Instrument(s) may be weak (F < 10)" else: diagnostics['weak_instrument_test_status'] = "Instrument(s) appear sufficiently strong (F >= 10)" # 2. Overidentification Test (e.g., Sargan-Hansen) overid_stat, overid_p_val, overid_status = run_overidentification_test(sm_results, df, treatment, outcome, instruments, covariates) diagnostics['overid_test_statistic'] = overid_stat diagnostics['overid_test_p_value'] = overid_p_val diagnostics['overid_test_status'] = overid_status diagnostics['overid_test_applicable'] = not ("not applicable" in overid_status.lower() if overid_status else True) # 3. Exogeneity/Exclusion Restriction (Conceptual Check) diagnostics['exclusion_restriction_assumption'] = "Assumed based on graph/input; cannot be statistically tested directly. Qualitative LLM check recommended." # Potential future additions: # - Endogeneity tests (e.g., Hausman test - requires comparing OLS and IV estimates) return diagnostics