JG1310 commited on
Commit
e09dd64
·
verified ·
1 Parent(s): 9d4ec19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +553 -2
app.py CHANGED
@@ -851,6 +851,287 @@ def one_way_anova(
851
  except Exception as e:
852
  return {"error": f"Unexpected error in one-way ANOVA: {str(e)}"}
853
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
854
  def chi_square_test(
855
  dataframe: Optional[pd.DataFrame] = None,
856
  observed_str: Optional[str] = None,
@@ -1765,6 +2046,276 @@ def create_anova_tab():
1765
  show_api=False
1766
  )
1767
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1768
  def create_chi_square_tab():
1769
  """Create a complete chi-square goodness of fit test tab with all components and handlers."""
1770
 
@@ -2125,9 +2676,9 @@ def create_t_test_interface():
2125
  description="**t-test between paired groups**"
2126
  )
2127
 
2128
- # Create paired t-test tab
2129
- anova_components = create_anova_tab()
2130
  one_sample_components = create_one_sample_t_test_tab()
 
 
2131
  chi_square_components = create_chi_square_tab()
2132
  corr_components = create_correlation_tab()
2133
 
 
851
  except Exception as e:
852
  return {"error": f"Unexpected error in one-way ANOVA: {str(e)}"}
853
 
854
+ def multi_way_anova(
855
+ dataframe: Optional[pd.DataFrame] = None,
856
+ dependent_var: Optional[str] = None,
857
+ factors: Optional[str] = None,
858
+ alpha: float = 0.05,
859
+ effect_thresholds: str = "0.01,0.06,0.14",
860
+ include_interactions: bool = True,
861
+ max_interaction_order: Optional[int] = None,
862
+ sum_squares_type: int = 2
863
+ ) -> Dict[str, Any]:
864
+ """
865
+ Accepts multiple categorical factors and performs Multi-Way ANOVA to determine whether there are
866
+ statistically significant differences between group means when multiple factors are involved simultaneously.
867
+ Multi-way ANOVA extends the one-way ANOVA framework to handle complex experimental designs with multiple
868
+ categorical independent variables (factors), each with two or more levels. Unlike one-way ANOVA which tests
869
+ a single factor, multi-way ANOVA can simultaneously test: (1) main effects of each individual factor,
870
+ (2) interaction effects between factors, and (3) higher-order interactions. The test uses F-statistics to
871
+ compare variance between groups to variance within groups for each effect. Eta-squared (η²) measures effect
872
+ size as the proportion of total variance explained by each factor and interaction, with interpretation:
873
+ η² < 0.01 = negligible, 0.01-0.06 = small, 0.06-0.14 = medium, >0.14 = large (custom thresholds may be used).
874
+ EXAMPLE USE CASES: 2-way ANOVA for treatment × gender effects on blood pressure, 3-way ANOVA for teaching
875
+ method × school type × student age on test scores, 4-way ANOVA for drug × dose × gender × age effects on recovery.
876
+
877
+ Args:
878
+ dataframe (Optional[pd.DataFrame]): DataFrame containing the experimental data with factors as columns
879
+ and the dependent variable. All factors must be categorical.
880
+ If provided, dependent_var and factors parameters are required.
881
+ dependent_var (Optional[str]): Name of the dependent (outcome) variable column in the DataFrame.
882
+ Must be a continuous numeric variable.
883
+ Example: "test_score", "recovery_time", "blood_pressure"
884
+ factors (Optional[str]): Comma-separated string of factor column names from the DataFrame.
885
+ Format: "factor1,factor2,factor3"
886
+ Example: "treatment,gender,age_group" for a 3-way ANOVA
887
+ Each factor must be categorical with 2 or more levels.
888
+ alpha (float): Significance level for the test (probability of Type I error). Reject null hypothesis if p_value below this threshold.
889
+ Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
890
+ effect_thresholds (str): Three comma-separated values defining eta-squared effect size boundaries.
891
+ Format: "small_threshold,medium_threshold,large_threshold"
892
+ Default "0.01,0.06,0.14" means: <0.01=negligible, 0.01-0.06=small, 0.06-0.14=medium, >0.14=large
893
+ These follow Cohen's conventions for eta-squared interpretation.
894
+ include_interactions (bool): Whether to include interaction terms in the model.
895
+ True (default): Tests main effects AND interactions
896
+ False: Tests only main effects (additive model)
897
+ max_interaction_order (Optional[int]): Maximum order of interactions to include in the model.
898
+ If None, includes all possible interactions up to the number of factors.
899
+ Example: For 4 factors, setting to 2 includes only 2-way interactions.
900
+ Useful for simplifying complex models with many factors.
901
+ sum_squares_type (int): Type of sum of squares calculation for the ANOVA table.
902
+ Type 1: Sequential (depends on order of factors)
903
+ Type 2: Marginal (recommended for balanced designs, default)
904
+ Type 3: Partial (recommended for unbalanced designs)
905
+
906
+ Returns:
907
+ dict: Comprehensive test results with the following keys:
908
+ - test_type (str): Description of the multi-way ANOVA performed (e.g., "3-way ANOVA with interactions")
909
+ - anova_table (pd.DataFrame): Complete ANOVA table with sum of squares, F-statistics, p-values, etc.
910
+ - significant_effects (List[str]): List of statistically significant main effects and interactions
911
+ - effect_sizes (Dict[str, float]): Eta-squared values for each effect measuring proportion of variance explained
912
+ - effect_interpretations (Dict[str, str]): Categorical interpretation of each effect size ("negligible", "small", "medium", "large")
913
+ - factor_summaries (Dict[str, dict]): Descriptive statistics for each factor level
914
+ - model_summary (dict): Overall model statistics (R², F-statistic, AIC, BIC, etc.)
915
+ - formula_used (str): The statsmodels formula string used for the analysis
916
+ - design_summary (dict): Information about the experimental design (balanced/unbalanced, sample sizes)
917
+ - alpha (float): Echo of significance level used
918
+ - factors_analyzed (List[str]): Echo of factors included in the analysis
919
+ - sum_squares_type (int): Echo of sum of squares type used
920
+ - effect_thresholds (List[float]): Echo of effect size thresholds used
921
+ """
922
+ try:
923
+ # Parse effect size thresholds
924
+ try:
925
+ thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
926
+ if len(thresholds) != 3:
927
+ return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
928
+ except:
929
+ return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.01,0.06,0.14')"}
930
+
931
+ # Validate inputs
932
+ if dataframe is None or dataframe.empty:
933
+ return {"error": "DataFrame cannot be None or empty"}
934
+
935
+ if not dependent_var:
936
+ return {"error": "Dependent variable name is required"}
937
+
938
+ if dependent_var not in dataframe.columns:
939
+ return {"error": f"Dependent variable '{dependent_var}' not found in DataFrame columns"}
940
+
941
+ if not factors:
942
+ return {"error": "Factor names are required. Provide as comma-separated string (e.g., 'factor1,factor2,factor3')"}
943
+
944
+ # Parse factors
945
+ try:
946
+ factor_list = [f.strip() for f in factors.split(',') if f.strip()]
947
+ if len(factor_list) < 2:
948
+ return {"error": "At least 2 factors are required for multi-way ANOVA"}
949
+ except:
950
+ return {"error": "Invalid factors format. Use comma-separated factor names (e.g., 'treatment,gender,age_group')"}
951
+
952
+ # Check factors exist in DataFrame
953
+ missing_factors = [f for f in factor_list if f not in dataframe.columns]
954
+ if missing_factors:
955
+ return {"error": f"Factors not found in DataFrame: {missing_factors}"}
956
+
957
+ # Validate sum of squares type
958
+ if sum_squares_type not in [1, 2, 3]:
959
+ return {"error": "sum_squares_type must be 1, 2, or 3"}
960
+
961
+ # Clean and prepare the data
962
+ analysis_columns = [dependent_var] + factor_list
963
+ analysis_df = dataframe[analysis_columns].copy()
964
+
965
+ # Remove rows with missing values
966
+ initial_rows = len(analysis_df)
967
+ analysis_df = analysis_df.dropna()
968
+ final_rows = len(analysis_df)
969
+
970
+ if final_rows < initial_rows * 0.5:
971
+ return {"error": f"Too much missing data: only {final_rows} out of {initial_rows} rows usable"}
972
+
973
+ if final_rows < 20:
974
+ return {"error": f"Insufficient data after removing missing values: {final_rows} rows remaining (minimum 20 required)"}
975
+
976
+ # Validate dependent variable is numeric
977
+ try:
978
+ analysis_df[dependent_var] = pd.to_numeric(analysis_df[dependent_var])
979
+ except:
980
+ return {"error": f"Dependent variable '{dependent_var}' must be numeric"}
981
+
982
+ # Ensure factors are categorical and check levels
983
+ factor_level_counts = {}
984
+ for factor in factor_list:
985
+ analysis_df[factor] = analysis_df[factor].astype('category')
986
+ unique_levels = len(analysis_df[factor].cat.categories)
987
+ factor_level_counts[factor] = unique_levels
988
+
989
+ if unique_levels < 2:
990
+ return {"error": f"Factor '{factor}' must have at least 2 levels. Found {unique_levels} level(s)"}
991
+
992
+ if unique_levels > 20:
993
+ return {"error": f"Factor '{factor}' has too many levels ({unique_levels}). Consider combining levels or using a different analysis method"}
994
+
995
+ # Check for sufficient observations per factor combination
996
+ try:
997
+ cell_counts = analysis_df.groupby(factor_list).size()
998
+ min_cell_size = cell_counts.min()
999
+ empty_cells = (cell_counts == 0).sum()
1000
+
1001
+ if min_cell_size < 2:
1002
+ return {"error": f"Some factor combinations have fewer than 2 observations. Minimum cell size: {min_cell_size}"}
1003
+
1004
+ if empty_cells > 0:
1005
+ return {"error": f"Missing data: {empty_cells} factor combinations have no observations"}
1006
+
1007
+ except Exception as e:
1008
+ return {"error": f"Error checking experimental design: {str(e)}"}
1009
+
1010
+ # Build formula components
1011
+ formula_terms = []
1012
+
1013
+ # Add main effects (always included)
1014
+ for factor in factor_list:
1015
+ formula_terms.append(f"C({factor})")
1016
+
1017
+ # Add interaction terms if requested
1018
+ if include_interactions and len(factor_list) > 1:
1019
+ max_order = max_interaction_order if max_interaction_order is not None else len(factor_list)
1020
+ max_order = min(max_order, len(factor_list)) # Don't exceed number of factors
1021
+
1022
+ # Generate all interaction combinations
1023
+ for order in range(2, max_order + 1):
1024
+ for combination in itertools.combinations(factor_list, order):
1025
+ interaction_term = ":".join([f"C({factor})" for factor in combination])
1026
+ formula_terms.append(interaction_term)
1027
+
1028
+ # Build the complete formula
1029
+ formula = f"{dependent_var} ~ " + " + ".join(formula_terms)
1030
+
1031
+ # Fit the model
1032
+ try:
1033
+ model = ols(formula, data=analysis_df).fit()
1034
+ except Exception as e:
1035
+ return {"error": f"Model fitting failed: {str(e)}. This may indicate perfect multicollinearity or insufficient data variation"}
1036
+
1037
+ # Generate ANOVA table
1038
+ try:
1039
+ anova_table = sm.stats.anova_lm(model, typ=sum_squares_type)
1040
+ except Exception as e:
1041
+ return {"error": f"ANOVA table generation failed: {str(e)}"}
1042
+
1043
+ # Calculate effect sizes (eta-squared)
1044
+ effect_sizes = {}
1045
+ effect_interpretations = {}
1046
+ total_ss = anova_table['sum_sq'].sum()
1047
+
1048
+ for index, row in anova_table.iterrows():
1049
+ if index != 'Residual':
1050
+ eta_squared = row['sum_sq'] / total_ss
1051
+ effect_sizes[index] = eta_squared
1052
+
1053
+ # Interpret effect size
1054
+ small_threshold, medium_threshold, large_threshold = thresholds
1055
+ if eta_squared < small_threshold:
1056
+ effect_interpretations[index] = "negligible"
1057
+ elif eta_squared < medium_threshold:
1058
+ effect_interpretations[index] = "small"
1059
+ elif eta_squared < large_threshold:
1060
+ effect_interpretations[index] = "medium"
1061
+ else:
1062
+ effect_interpretations[index] = "large"
1063
+
1064
+ # Identify significant effects
1065
+ significant_effects = []
1066
+ for index, row in anova_table.iterrows():
1067
+ if index != 'Residual' and row['PR(>F)'] < alpha:
1068
+ significant_effects.append(index)
1069
+
1070
+ # Calculate factor summaries
1071
+ factor_summaries = {}
1072
+ for factor in factor_list:
1073
+ factor_stats = analysis_df.groupby(factor)[dependent_var].agg(['mean', 'std', 'count']).round(4)
1074
+ factor_summaries[factor] = factor_stats.to_dict('index')
1075
+
1076
+ # Model summary statistics
1077
+ model_summary = {
1078
+ "r_squared": model.rsquared,
1079
+ "adj_r_squared": model.rsquared_adj,
1080
+ "f_statistic": model.fvalue,
1081
+ "f_pvalue": model.f_pvalue,
1082
+ "aic": model.aic,
1083
+ "bic": model.bic,
1084
+ "df_model": model.df_model,
1085
+ "df_resid": model.df_resid,
1086
+ "n_observations": int(model.nobs),
1087
+ "mse_resid": model.mse_resid
1088
+ }
1089
+
1090
+ # Design summary
1091
+ total_combinations = np.prod(list(factor_level_counts.values()))
1092
+ observed_combinations = len(cell_counts)
1093
+ balanced = len(cell_counts.unique()) == 1 # All cells have same count
1094
+
1095
+ design_summary = {
1096
+ "n_factors": len(factor_list),
1097
+ "factor_levels": factor_level_counts,
1098
+ "total_possible_combinations": total_combinations,
1099
+ "observed_combinations": observed_combinations,
1100
+ "is_balanced": balanced,
1101
+ "min_cell_size": int(min_cell_size),
1102
+ "max_cell_size": int(cell_counts.max()),
1103
+ "mean_cell_size": round(cell_counts.mean(), 2)
1104
+ }
1105
+
1106
+ # Determine test description
1107
+ n_factors = len(factor_list)
1108
+ test_description = f"{n_factors}-way ANOVA"
1109
+
1110
+ if include_interactions:
1111
+ max_order_desc = max_interaction_order if max_interaction_order else n_factors
1112
+ test_description += f" with interactions (up to {max_order_desc}-way)"
1113
+ else:
1114
+ test_description += " (main effects only)"
1115
+
1116
+ return {
1117
+ "test_type": test_description,
1118
+ "anova_table": anova_table,
1119
+ "significant_effects": significant_effects,
1120
+ "effect_sizes": effect_sizes,
1121
+ "effect_interpretations": effect_interpretations,
1122
+ "factor_summaries": factor_summaries,
1123
+ "model_summary": model_summary,
1124
+ "formula_used": formula,
1125
+ "design_summary": design_summary,
1126
+ "alpha": alpha,
1127
+ "factors_analyzed": factor_list,
1128
+ "sum_squares_type": sum_squares_type,
1129
+ "effect_thresholds": thresholds
1130
+ }
1131
+
1132
+ except Exception as e:
1133
+ return {"error": f"Unexpected error in multi-way ANOVA: {str(e)}"}
1134
+
1135
  def chi_square_test(
1136
  dataframe: Optional[pd.DataFrame] = None,
1137
  observed_str: Optional[str] = None,
 
2046
  show_api=False
2047
  )
2048
 
2049
+ def create_multi_way_anova_tab():
2050
+ """Create a complete multi-way ANOVA tab with all components and handlers."""
2051
+
2052
+ with gr.TabItem("Multi-Way ANOVA"):
2053
+ gr.Markdown("**Compare means across multiple categorical factors simultaneously**")
2054
+
2055
+ # Input method selector
2056
+ input_method = gr.Radio(
2057
+ choices=["File Upload"],
2058
+ value="File Upload",
2059
+ label="Input Method",
2060
+ info="Multi-way ANOVA requires structured data - file upload recommended"
2061
+ )
2062
+
2063
+ # File upload input section
2064
+ with gr.Group(visible=True) as file_section:
2065
+ gr.Markdown("### File Upload")
2066
+ gr.Markdown("*Upload CSV or Excel file with dependent variable and multiple categorical factors*")
2067
+
2068
+ with gr.Row():
2069
+ file_upload = gr.File(
2070
+ label="Upload CSV/Excel File",
2071
+ file_types=[".csv", ".xlsx", ".xls"],
2072
+ type="filepath"
2073
+ )
2074
+ has_header = gr.Checkbox(
2075
+ label="File has header row",
2076
+ value=True,
2077
+ info="Check if first row contains column names"
2078
+ )
2079
+
2080
+ # Display loaded data preview
2081
+ data_preview = gr.Dataframe(
2082
+ label="Data Preview",
2083
+ interactive=False,
2084
+ row_count=10
2085
+ )
2086
+
2087
+ # Variable specification
2088
+ gr.Markdown("### Variable Specification")
2089
+ with gr.Row():
2090
+ dependent_var = gr.Dropdown(
2091
+ label="Dependent Variable",
2092
+ info="Select the continuous outcome variable",
2093
+ interactive=True
2094
+ )
2095
+ factors = gr.Textbox(
2096
+ label="Factors (comma-separated)",
2097
+ placeholder="treatment,gender,age_group",
2098
+ info="Enter factor column names separated by commas",
2099
+ lines=2
2100
+ )
2101
+
2102
+ # Advanced options
2103
+ gr.Markdown("### Analysis Options")
2104
+ with gr.Row():
2105
+ include_interactions = gr.Checkbox(
2106
+ label="Include Interactions",
2107
+ value=True,
2108
+ info="Test for interaction effects between factors"
2109
+ )
2110
+ max_interaction_order = gr.Number(
2111
+ label="Max Interaction Order",
2112
+ value=None,
2113
+ minimum=2,
2114
+ step=1,
2115
+ info="Maximum interaction order (leave empty for all interactions)"
2116
+ )
2117
+
2118
+ with gr.Row():
2119
+ sum_squares_type = gr.Dropdown(
2120
+ choices=[1, 2, 3],
2121
+ value=2,
2122
+ label="Sum of Squares Type",
2123
+ info="Type 2 for balanced, Type 3 for unbalanced designs"
2124
+ )
2125
+ alpha = gr.Number(
2126
+ value=0.05,
2127
+ minimum=0,
2128
+ maximum=1,
2129
+ step=0.01,
2130
+ label="Significance Level (α)",
2131
+ info="Probability threshold for statistical significance"
2132
+ )
2133
+
2134
+ with gr.Row():
2135
+ effect_thresholds = gr.Textbox(
2136
+ value="0.01,0.06,0.14",
2137
+ label="Effect Size Thresholds",
2138
+ info="Eta-squared boundaries: small,medium,large"
2139
+ )
2140
+
2141
+ # Action buttons
2142
+ with gr.Row():
2143
+ run_button = gr.Button("Run Multi-Way ANOVA", variant="primary", scale=1)
2144
+ clear_button = gr.Button("Clear All", variant="secondary", scale=1)
2145
+
2146
+ # Output display
2147
+ output = gr.JSON(label="Multi-Way ANOVA Results")
2148
+
2149
+ # Information section
2150
+ with gr.Accordion("Multi-Way ANOVA Information", open=False):
2151
+ gr.Markdown("""
2152
+ ### What is Multi-Way ANOVA?
2153
+
2154
+ Multi-way ANOVA extends one-way ANOVA to handle multiple categorical factors simultaneously:
2155
+
2156
+ **Main Effects**: How each factor independently affects the outcome
2157
+ **Interaction Effects**: How factors work together (non-additively)
2158
+
2159
+ ### Example Designs:
2160
+ - **2-way**: Treatment (A,B,C) × Gender (Male,Female) → 6 combinations
2161
+ - **3-way**: Drug (A,B) × Dose (Low,High) × Age (Young,Old) → 8 combinations
2162
+ - **4-way**: Method (A,B) × School (Public,Private) × Gender (M,F) × Grade (1st,2nd) → 16 combinations
2163
+
2164
+ ### Requirements:
2165
+ - All factors must be categorical (not continuous)
2166
+ - Dependent variable must be continuous
2167
+ - At least 2 observations per factor combination
2168
+ - Independence, normality, and equal variances assumptions
2169
+ """)
2170
+
2171
+ # Example data section
2172
+ with gr.Row():
2173
+ gr.Markdown("### Quick Examples")
2174
+ example_button = gr.Button("Load Example Data", variant="outline")
2175
+
2176
+ # State management
2177
+ loaded_dataframe = gr.State(value=None)
2178
+
2179
+ # Helper function to load and preview file data
2180
+ def load_multi_way_file(file_path, has_header_flag):
2181
+ if file_path is None:
2182
+ return None, None, []
2183
+
2184
+ try:
2185
+ # Determine header parameter
2186
+ header_param = 0 if has_header_flag else None
2187
+
2188
+ if file_path.endswith('.csv'):
2189
+ df = pd.read_csv(file_path, header=header_param)
2190
+ elif file_path.endswith(('.xlsx', '.xls')):
2191
+ df = pd.read_excel(file_path, header=header_param)
2192
+ else:
2193
+ return None, pd.DataFrame({'Error': ['Unsupported file format']}), []
2194
+
2195
+ # Set column names if no header
2196
+ if not has_header_flag:
2197
+ df.columns = [f'Column_{i+1}' for i in range(len(df.columns))]
2198
+
2199
+ # Get column options for dropdown
2200
+ column_options = list(df.columns)
2201
+
2202
+ # Return dataframe, preview, and column options
2203
+ preview = df.head(15)
2204
+ return df, preview, column_options
2205
+
2206
+ except Exception as e:
2207
+ error_df = pd.DataFrame({'Error': [f"Failed to load file: {str(e)}"]})
2208
+ return None, error_df, []
2209
+
2210
+ # Clear form function
2211
+ def clear_multi_way():
2212
+ return (
2213
+ None, # loaded_dataframe
2214
+ None, # data_preview
2215
+ [], # dependent_var choices
2216
+ None, # dependent_var value
2217
+ "", # factors
2218
+ True, # include_interactions
2219
+ None, # max_interaction_order
2220
+ 2, # sum_squares_type
2221
+ 0.05, # alpha
2222
+ "0.01,0.06,0.14", # effect_thresholds
2223
+ {} # output
2224
+ )
2225
+
2226
+ # Example data function
2227
+ def load_multi_way_example():
2228
+ # Create example 3-way ANOVA data
2229
+ np.random.seed(42)
2230
+
2231
+ treatments = ['Control', 'Treatment_A', 'Treatment_B']
2232
+ genders = ['Male', 'Female']
2233
+ ages = ['Young', 'Old']
2234
+
2235
+ data = []
2236
+ for treatment in treatments:
2237
+ for gender in genders:
2238
+ for age in ages:
2239
+ # Generate scores with different effects
2240
+ base_score = 50
2241
+ treatment_effect = {'Control': 0, 'Treatment_A': 8, 'Treatment_B': 12}[treatment]
2242
+ gender_effect = {'Male': 3, 'Female': -3}[gender]
2243
+ age_effect = {'Young': 5, 'Old': -5}[age]
2244
+
2245
+ # Add interaction: Treatment_B works better for older patients
2246
+ interaction_effect = 0
2247
+ if treatment == 'Treatment_B' and age == 'Old':
2248
+ interaction_effect = 6
2249
+
2250
+ n_per_cell = 15
2251
+ mean_score = base_score + treatment_effect + gender_effect + age_effect + interaction_effect
2252
+ scores = np.random.normal(mean_score, 6, n_per_cell)
2253
+
2254
+ for score in scores:
2255
+ data.append({
2256
+ 'test_score': round(score, 2),
2257
+ 'treatment': treatment,
2258
+ 'gender': gender,
2259
+ 'age_group': age
2260
+ })
2261
+
2262
+ df = pd.DataFrame(data)
2263
+ preview = df.head(15)
2264
+ column_options = list(df.columns)
2265
+
2266
+ return df, preview, column_options, 'test_score', 'treatment,gender,age_group'
2267
+
2268
+ # EVENT HANDLERS
2269
+
2270
+ # File upload handlers
2271
+ file_upload.change(
2272
+ fn=load_multi_way_file,
2273
+ inputs=[file_upload, has_header],
2274
+ outputs=[loaded_dataframe, data_preview, dependent_var],
2275
+ show_api=False
2276
+ )
2277
+
2278
+ has_header.change(
2279
+ fn=load_multi_way_file,
2280
+ inputs=[file_upload, has_header],
2281
+ outputs=[loaded_dataframe, data_preview, dependent_var],
2282
+ show_api=False
2283
+ )
2284
+
2285
+ # MAIN STATISTICAL FUNCTION CALL - Exposed to MCP!
2286
+ run_button.click(
2287
+ fn=multi_way_anova,
2288
+ inputs=[
2289
+ loaded_dataframe, # dataframe
2290
+ dependent_var, # dependent_var
2291
+ factors, # factors
2292
+ alpha, # alpha
2293
+ effect_thresholds, # effect_thresholds
2294
+ include_interactions, # include_interactions
2295
+ max_interaction_order, # max_interaction_order
2296
+ sum_squares_type # sum_squares_type
2297
+ ],
2298
+ outputs=output
2299
+ )
2300
+
2301
+ # Clear form handler
2302
+ clear_button.click(
2303
+ fn=clear_multi_way,
2304
+ outputs=[
2305
+ loaded_dataframe, data_preview, dependent_var, dependent_var,
2306
+ factors, include_interactions, max_interaction_order,
2307
+ sum_squares_type, alpha, effect_thresholds, output
2308
+ ],
2309
+ show_api=False
2310
+ )
2311
+
2312
+ # Example data handler
2313
+ example_button.click(
2314
+ fn=load_multi_way_example,
2315
+ outputs=[loaded_dataframe, data_preview, dependent_var, dependent_var, factors],
2316
+ show_api=False
2317
+ )
2318
+
2319
  def create_chi_square_tab():
2320
  """Create a complete chi-square goodness of fit test tab with all components and handlers."""
2321
 
 
2676
  description="**t-test between paired groups**"
2677
  )
2678
 
 
 
2679
  one_sample_components = create_one_sample_t_test_tab()
2680
+ anova_components = create_anova_tab()
2681
+ manova_components = create_multi_way_anova_tab()
2682
  chi_square_components = create_chi_square_tab()
2683
  corr_components = create_correlation_tab()
2684