import gradio as gr import numpy as np from scipy import stats from typing import List, Dict, Any, Union, Tuple import json def independent_t_test(group1: str, group2: str, equal_var: bool = True, alternative: str = "two-sided") -> Dict[str, Any]: """ Perform an independent samples t-test between two groups. Args: group1 (str): Comma-separated values for group 1 (e.g., "1.2,2.3,3.4,2.1") group2 (str): Comma-separated values for group 2 (e.g., "2.1,3.2,4.1,3.5") equal_var (bool): If True, perform standard t-test assuming equal variances. If False, perform Welch's t-test alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater' Returns: dict: Test results including t-statistic, p-value, degrees of freedom, and interpretation """ try: # Parse input data data1 = [float(x.strip()) for x in group1.split(',') if x.strip()] data2 = [float(x.strip()) for x in group2.split(',') if x.strip()] if len(data1) < 2 or len(data2) < 2: return {"error": "Each group must have at least 2 observations"} # Perform t-test t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=equal_var, alternative=alternative) # Calculate descriptive statistics desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)} desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)} # Degrees of freedom if equal_var: df = len(data1) + len(data2) - 2 else: # Welch's formula for unequal variances s1_sq, s2_sq = desc1["std"]**2, desc2["std"]**2 n1, n2 = desc1["n"], desc2["n"] df = (s1_sq/n1 + s2_sq/n2)**2 / ((s1_sq/n1)**2/(n1-1) + (s2_sq/n2)**2/(n2-1)) # Effect size (Cohen's d) pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / (len(data1)+len(data2)-2)) cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std # Interpretation significance = "significant" if p_value < 0.05 else "not significant" effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large" return { "test_type": f"Independent t-test ({'equal variances' if equal_var else 'unequal variances'})", "t_statistic": round(t_stat, 4), "p_value": round(p_value, 6), "degrees_of_freedom": round(df, 2), "cohens_d": round(cohens_d, 4), "group1_stats": desc1, "group2_stats": desc2, "result": f"The difference between groups is {significance} (p = {p_value:.6f})", "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}", "alternative_hypothesis": alternative } except Exception as e: return {"error": f"Error performing t-test: {str(e)}"} def paired_t_test(before: str, after: str, alternative: str = "two-sided") -> Dict[str, Any]: """ Perform a paired samples t-test. Args: before (str): Comma-separated values for before condition after (str): Comma-separated values for after condition alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater' Returns: dict: Test results including t-statistic, p-value, and interpretation """ try: # Parse input data data_before = [float(x.strip()) for x in before.split(',') if x.strip()] data_after = [float(x.strip()) for x in after.split(',') if x.strip()] if len(data_before) != len(data_after): return {"error": "Before and after groups must have the same number of observations"} if len(data_before) < 2: return {"error": "Need at least 2 paired observations"} # Perform paired t-test t_stat, p_value = stats.ttest_rel(data_before, data_after, alternative=alternative) # Calculate differences and descriptive statistics differences = np.array(data_after) - np.array(data_before) mean_diff = np.mean(differences) std_diff = np.std(differences, ddof=1) # Effect size (Cohen's d for paired samples) cohens_d = mean_diff / std_diff # Degrees of freedom df = len(data_before) - 1 # Interpretation significance = "significant" if p_value < 0.05 else "not significant" effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large" return { "test_type": "Paired t-test", "t_statistic": round(t_stat, 4), "p_value": round(p_value, 6), "degrees_of_freedom": df, "mean_difference": round(mean_diff, 4), "std_difference": round(std_diff, 4), "cohens_d": round(cohens_d, 4), "before_mean": round(np.mean(data_before), 4), "after_mean": round(np.mean(data_after), 4), "result": f"The paired difference is {significance} (p = {p_value:.6f})", "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}", "alternative_hypothesis": alternative } except Exception as e: return {"error": f"Error performing paired t-test: {str(e)}"} def one_sample_t_test(sample: str, population_mean: float, alternative: str = "two-sided") -> Dict[str, Any]: """ Perform a one-sample t-test against a population mean. Args: sample (str): Comma-separated sample values population_mean (float): Hypothesized population mean alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater' Returns: dict: Test results including t-statistic, p-value, and interpretation """ try: # Parse input data data = [float(x.strip()) for x in sample.split(',') if x.strip()] if len(data) < 2: return {"error": "Sample must have at least 2 observations"} # Perform one-sample t-test t_stat, p_value = stats.ttest_1samp(data, population_mean, alternative=alternative) # Calculate descriptive statistics sample_mean = np.mean(data) sample_std = np.std(data, ddof=1) sample_size = len(data) # Effect size (Cohen's d) cohens_d = (sample_mean - population_mean) / sample_std # Degrees of freedom df = sample_size - 1 # Interpretation significance = "significant" if p_value < 0.05 else "not significant" effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large" return { "test_type": "One-sample t-test", "t_statistic": round(t_stat, 4), "p_value": round(p_value, 6), "degrees_of_freedom": df, "sample_mean": round(sample_mean, 4), "population_mean": population_mean, "sample_std": round(sample_std, 4), "sample_size": sample_size, "cohens_d": round(cohens_d, 4), "result": f"Sample mean differs {significance}ly from population mean (p = {p_value:.6f})", "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}", "alternative_hypothesis": alternative } except Exception as e: return {"error": f"Error performing one-sample t-test: {str(e)}"} def one_way_anova(*groups: str) -> Dict[str, Any]: """ Perform a one-way ANOVA test. Args: *groups: Variable number of comma-separated group values (minimum 2 groups) Returns: dict: ANOVA results including F-statistic, p-value, and interpretation """ try: # Parse input data parsed_groups = [] for i, group in enumerate(groups): if not group.strip(): continue data = [float(x.strip()) for x in group.split(',') if x.strip()] if len(data) < 2: return {"error": f"Group {i+1} must have at least 2 observations"} parsed_groups.append(data) if len(parsed_groups) < 2: return {"error": "Need at least 2 groups for ANOVA"} # Perform one-way ANOVA f_stat, p_value = stats.f_oneway(*parsed_groups) # Calculate descriptive statistics for each group group_stats = [] overall_data = [] for i, group in enumerate(parsed_groups): group_stats.append({ "group": i+1, "n": len(group), "mean": round(np.mean(group), 4), "std": round(np.std(group, ddof=1), 4) }) overall_data.extend(group) # Calculate effect size (eta-squared) # SS_between / SS_total overall_mean = np.mean(overall_data) ss_total = sum((x - overall_mean)**2 for x in overall_data) ss_between = sum(len(group) * (np.mean(group) - overall_mean)**2 for group in parsed_groups) eta_squared = ss_between / ss_total if ss_total > 0 else 0 # Degrees of freedom df_between = len(parsed_groups) - 1 df_within = len(overall_data) - len(parsed_groups) # Interpretation significance = "significant" if p_value < 0.05 else "not significant" effect_size_interp = "small" if eta_squared < 0.06 else "medium" if eta_squared < 0.14 else "large" return { "test_type": "One-way ANOVA", "f_statistic": round(f_stat, 4), "p_value": round(p_value, 6), "df_between": df_between, "df_within": df_within, "eta_squared": round(eta_squared, 4), "group_statistics": group_stats, "result": f"Group differences are {significance} (p = {p_value:.6f})", "effect_size": f"Effect size (η² = {eta_squared:.4f}) is {effect_size_interp}", "note": "If significant, consider post-hoc tests to identify specific group differences" } except Exception as e: return {"error": f"Error performing ANOVA: {str(e)}"} def chi_square_test(observed: str, expected: str = None) -> Dict[str, Any]: """ Perform a chi-square goodness of fit test. Args: observed (str): Comma-separated observed frequencies expected (str): Comma-separated expected frequencies (optional, defaults to equal distribution) Returns: dict: Chi-square test results """ try: # Parse observed frequencies obs_data = [float(x.strip()) for x in observed.split(',') if x.strip()] # Parse expected frequencies or create equal distribution if expected and expected.strip(): exp_data = [float(x.strip()) for x in expected.split(',') if x.strip()] if len(obs_data) != len(exp_data): return {"error": "Observed and expected must have the same number of categories"} else: # Equal distribution total = sum(obs_data) exp_data = [total / len(obs_data)] * len(obs_data) # Perform chi-square test chi2_stat, p_value = stats.chisquare(obs_data, exp_data) # Degrees of freedom df = len(obs_data) - 1 # Effect size (Cramér's V for goodness of fit) n = sum(obs_data) cramers_v = np.sqrt(chi2_stat / (n * (len(obs_data) - 1))) # Interpretation significance = "significant" if p_value < 0.05 else "not significant" effect_size_interp = "small" if cramers_v < 0.3 else "medium" if cramers_v < 0.5 else "large" return { "test_type": "Chi-square goodness of fit test", "chi_square_statistic": round(chi2_stat, 4), "p_value": round(p_value, 6), "degrees_of_freedom": df, "cramers_v": round(cramers_v, 4), "observed_frequencies": obs_data, "expected_frequencies": [round(x, 2) for x in exp_data], "result": f"Observed frequencies differ {significance}ly from expected (p = {p_value:.6f})", "effect_size": f"Effect size (Cramér's V = {cramers_v:.4f}) is {effect_size_interp}" } except Exception as e: return {"error": f"Error performing chi-square test: {str(e)}"} def correlation_test(x_values: str, y_values: str, method: str = "pearson") -> Dict[str, Any]: """ Perform correlation analysis between two variables. Args: x_values (str): Comma-separated X variable values y_values (str): Comma-separated Y variable values method (str): Correlation method - 'pearson', 'spearman', or 'kendall' Returns: dict: Correlation results including coefficient and p-value """ try: # Parse input data x_data = [float(x.strip()) for x in x_values.split(',') if x.strip()] y_data = [float(y.strip()) for y in y_values.split(',') if y.strip()] if len(x_data) != len(y_data): return {"error": "X and Y variables must have the same number of observations"} if len(x_data) < 3: return {"error": "Need at least 3 observations for correlation"} # Perform correlation test if method.lower() == "pearson": corr_coef, p_value = stats.pearsonr(x_data, y_data) test_name = "Pearson correlation" elif method.lower() == "spearman": corr_coef, p_value = stats.spearmanr(x_data, y_data) test_name = "Spearman rank correlation" elif method.lower() == "kendall": corr_coef, p_value = stats.kendalltau(x_data, y_data) test_name = "Kendall's tau correlation" else: return {"error": "Method must be 'pearson', 'spearman', or 'kendall'"} # Interpretation significance = "significant" if p_value < 0.05 else "not significant" # Correlation strength interpretation abs_corr = abs(corr_coef) if abs_corr < 0.3: strength = "weak" elif abs_corr < 0.7: strength = "moderate" else: strength = "strong" direction = "positive" if corr_coef > 0 else "negative" return { "test_type": test_name, "correlation_coefficient": round(corr_coef, 4), "p_value": round(p_value, 6), "sample_size": len(x_data), "result": f"The correlation is {significance} (p = {p_value:.6f})", "interpretation": f"{strength.title()} {direction} correlation (r = {corr_coef:.4f})", "method": method.lower() } except Exception as e: return {"error": f"Error performing correlation test: {str(e)}"} # Create Gradio interfaces for each function demo = gr.TabbedInterface( [ gr.Interface( fn=independent_t_test, inputs=[ gr.Textbox(placeholder="1.2,2.3,3.4,2.1", label="Group 1 (comma-separated)"), gr.Textbox(placeholder="2.1,3.2,4.1,3.5", label="Group 2 (comma-separated)"), gr.Checkbox(value=True, label="Equal variances"), gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis") ], outputs=gr.JSON(), title="Independent T-Test", description="Compare means between two independent groups" ), gr.Interface( fn=paired_t_test, inputs=[ gr.Textbox(placeholder="10,12,11,13", label="Before (comma-separated)"), gr.Textbox(placeholder="12,14,13,15", label="After (comma-separated)"), gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis") ], outputs=gr.JSON(), title="Paired T-Test", description="Compare paired/matched samples" ), gr.Interface( fn=one_sample_t_test, inputs=[ gr.Textbox(placeholder="10,12,11,13,9", label="Sample (comma-separated)"), gr.Number(value=10, label="Population mean"), gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis") ], outputs=gr.JSON(), title="One-Sample T-Test", description="Test sample mean against population mean" ), gr.Interface( fn=one_way_anova, inputs=[ gr.Textbox(placeholder="1,2,3,2", label="Group 1 (comma-separated)"), gr.Textbox(placeholder="4,5,6,5", label="Group 2 (comma-separated)"), gr.Textbox(placeholder="7,8,9,8", label="Group 3 (comma-separated)", info="Optional"), gr.Textbox(placeholder="", label="Group 4 (comma-separated)", info="Optional"), gr.Textbox(placeholder="", label="Group 5 (comma-separated)", info="Optional") ], outputs=gr.JSON(), title="One-Way ANOVA", description="Compare means across multiple groups" ), gr.Interface( fn=chi_square_test, inputs=[ gr.Textbox(placeholder="10,20,15,25", label="Observed frequencies (comma-separated)"), gr.Textbox(placeholder="", label="Expected frequencies (optional, comma-separated)") ], outputs=gr.JSON(), title="Chi-Square Test", description="Test goodness of fit for categorical data" ), gr.Interface( fn=correlation_test, inputs=[ gr.Textbox(placeholder="1,2,3,4,5", label="X values (comma-separated)"), gr.Textbox(placeholder="2,4,6,8,10", label="Y values (comma-separated)"), gr.Dropdown(["pearson", "spearman", "kendall"], value="pearson", label="Correlation method") ], outputs=gr.JSON(), title="Correlation Analysis", description="Test correlation between two variables" ) ], tab_names=["Independent T-Test", "Paired T-Test", "One-Sample T-Test", "ANOVA", "Chi-Square", "Correlation"] ) if __name__ == "__main__": print(f"Gradio version: {gr.__version__}") demo.launch(mcp_server=True)