|
import gradio as gr |
|
import numpy as np |
|
from scipy import stats |
|
from typing import List, Dict, Any, Union, Tuple |
|
import json |
|
|
|
def independent_t_test(group1: str, group2: str, equal_var: bool = True, alternative: str = "two-sided") -> Dict[str, Any]: |
|
""" |
|
Perform an independent samples t-test between two groups. |
|
|
|
Args: |
|
group1 (str): Comma-separated values for group 1 (e.g., "1.2,2.3,3.4,2.1") |
|
group2 (str): Comma-separated values for group 2 (e.g., "2.1,3.2,4.1,3.5") |
|
equal_var (bool): If True, perform standard t-test assuming equal variances. If False, perform Welch's t-test |
|
alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater' |
|
|
|
Returns: |
|
dict: Test results including t-statistic, p-value, degrees of freedom, and interpretation |
|
""" |
|
try: |
|
|
|
data1 = [float(x.strip()) for x in group1.split(',') if x.strip()] |
|
data2 = [float(x.strip()) for x in group2.split(',') if x.strip()] |
|
|
|
if len(data1) < 2 or len(data2) < 2: |
|
return {"error": "Each group must have at least 2 observations"} |
|
|
|
|
|
t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=equal_var, alternative=alternative) |
|
|
|
|
|
desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)} |
|
desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)} |
|
|
|
|
|
if equal_var: |
|
df = len(data1) + len(data2) - 2 |
|
else: |
|
|
|
s1_sq, s2_sq = desc1["std"]**2, desc2["std"]**2 |
|
n1, n2 = desc1["n"], desc2["n"] |
|
df = (s1_sq/n1 + s2_sq/n2)**2 / ((s1_sq/n1)**2/(n1-1) + (s2_sq/n2)**2/(n2-1)) |
|
|
|
|
|
pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / (len(data1)+len(data2)-2)) |
|
cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std |
|
|
|
|
|
significance = "significant" if p_value < 0.05 else "not significant" |
|
effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large" |
|
|
|
return { |
|
"test_type": f"Independent t-test ({'equal variances' if equal_var else 'unequal variances'})", |
|
"t_statistic": round(t_stat, 4), |
|
"p_value": round(p_value, 6), |
|
"degrees_of_freedom": round(df, 2), |
|
"cohens_d": round(cohens_d, 4), |
|
"group1_stats": desc1, |
|
"group2_stats": desc2, |
|
"result": f"The difference between groups is {significance} (p = {p_value:.6f})", |
|
"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}", |
|
"alternative_hypothesis": alternative |
|
} |
|
except Exception as e: |
|
return {"error": f"Error performing t-test: {str(e)}"} |
|
|
|
def paired_t_test(before: str, after: str, alternative: str = "two-sided") -> Dict[str, Any]: |
|
""" |
|
Perform a paired samples t-test. |
|
|
|
Args: |
|
before (str): Comma-separated values for before condition |
|
after (str): Comma-separated values for after condition |
|
alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater' |
|
|
|
Returns: |
|
dict: Test results including t-statistic, p-value, and interpretation |
|
""" |
|
try: |
|
|
|
data_before = [float(x.strip()) for x in before.split(',') if x.strip()] |
|
data_after = [float(x.strip()) for x in after.split(',') if x.strip()] |
|
|
|
if len(data_before) != len(data_after): |
|
return {"error": "Before and after groups must have the same number of observations"} |
|
|
|
if len(data_before) < 2: |
|
return {"error": "Need at least 2 paired observations"} |
|
|
|
|
|
t_stat, p_value = stats.ttest_rel(data_before, data_after, alternative=alternative) |
|
|
|
|
|
differences = np.array(data_after) - np.array(data_before) |
|
mean_diff = np.mean(differences) |
|
std_diff = np.std(differences, ddof=1) |
|
|
|
|
|
cohens_d = mean_diff / std_diff |
|
|
|
|
|
df = len(data_before) - 1 |
|
|
|
|
|
significance = "significant" if p_value < 0.05 else "not significant" |
|
effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large" |
|
|
|
return { |
|
"test_type": "Paired t-test", |
|
"t_statistic": round(t_stat, 4), |
|
"p_value": round(p_value, 6), |
|
"degrees_of_freedom": df, |
|
"mean_difference": round(mean_diff, 4), |
|
"std_difference": round(std_diff, 4), |
|
"cohens_d": round(cohens_d, 4), |
|
"before_mean": round(np.mean(data_before), 4), |
|
"after_mean": round(np.mean(data_after), 4), |
|
"result": f"The paired difference is {significance} (p = {p_value:.6f})", |
|
"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}", |
|
"alternative_hypothesis": alternative |
|
} |
|
except Exception as e: |
|
return {"error": f"Error performing paired t-test: {str(e)}"} |
|
|
|
def one_sample_t_test(sample: str, population_mean: float, alternative: str = "two-sided") -> Dict[str, Any]: |
|
""" |
|
Perform a one-sample t-test against a population mean. |
|
|
|
Args: |
|
sample (str): Comma-separated sample values |
|
population_mean (float): Hypothesized population mean |
|
alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater' |
|
|
|
Returns: |
|
dict: Test results including t-statistic, p-value, and interpretation |
|
""" |
|
try: |
|
|
|
data = [float(x.strip()) for x in sample.split(',') if x.strip()] |
|
|
|
if len(data) < 2: |
|
return {"error": "Sample must have at least 2 observations"} |
|
|
|
|
|
t_stat, p_value = stats.ttest_1samp(data, population_mean, alternative=alternative) |
|
|
|
|
|
sample_mean = np.mean(data) |
|
sample_std = np.std(data, ddof=1) |
|
sample_size = len(data) |
|
|
|
|
|
cohens_d = (sample_mean - population_mean) / sample_std |
|
|
|
|
|
df = sample_size - 1 |
|
|
|
|
|
significance = "significant" if p_value < 0.05 else "not significant" |
|
effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large" |
|
|
|
return { |
|
"test_type": "One-sample t-test", |
|
"t_statistic": round(t_stat, 4), |
|
"p_value": round(p_value, 6), |
|
"degrees_of_freedom": df, |
|
"sample_mean": round(sample_mean, 4), |
|
"population_mean": population_mean, |
|
"sample_std": round(sample_std, 4), |
|
"sample_size": sample_size, |
|
"cohens_d": round(cohens_d, 4), |
|
"result": f"Sample mean differs {significance}ly from population mean (p = {p_value:.6f})", |
|
"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}", |
|
"alternative_hypothesis": alternative |
|
} |
|
except Exception as e: |
|
return {"error": f"Error performing one-sample t-test: {str(e)}"} |
|
|
|
def one_way_anova(*groups: str) -> Dict[str, Any]: |
|
""" |
|
Perform a one-way ANOVA test. |
|
|
|
Args: |
|
*groups: Variable number of comma-separated group values (minimum 2 groups) |
|
|
|
Returns: |
|
dict: ANOVA results including F-statistic, p-value, and interpretation |
|
""" |
|
try: |
|
|
|
parsed_groups = [] |
|
for i, group in enumerate(groups): |
|
if not group.strip(): |
|
continue |
|
data = [float(x.strip()) for x in group.split(',') if x.strip()] |
|
if len(data) < 2: |
|
return {"error": f"Group {i+1} must have at least 2 observations"} |
|
parsed_groups.append(data) |
|
|
|
if len(parsed_groups) < 2: |
|
return {"error": "Need at least 2 groups for ANOVA"} |
|
|
|
|
|
f_stat, p_value = stats.f_oneway(*parsed_groups) |
|
|
|
|
|
group_stats = [] |
|
overall_data = [] |
|
for i, group in enumerate(parsed_groups): |
|
group_stats.append({ |
|
"group": i+1, |
|
"n": len(group), |
|
"mean": round(np.mean(group), 4), |
|
"std": round(np.std(group, ddof=1), 4) |
|
}) |
|
overall_data.extend(group) |
|
|
|
|
|
|
|
overall_mean = np.mean(overall_data) |
|
ss_total = sum((x - overall_mean)**2 for x in overall_data) |
|
ss_between = sum(len(group) * (np.mean(group) - overall_mean)**2 for group in parsed_groups) |
|
eta_squared = ss_between / ss_total if ss_total > 0 else 0 |
|
|
|
|
|
df_between = len(parsed_groups) - 1 |
|
df_within = len(overall_data) - len(parsed_groups) |
|
|
|
|
|
significance = "significant" if p_value < 0.05 else "not significant" |
|
effect_size_interp = "small" if eta_squared < 0.06 else "medium" if eta_squared < 0.14 else "large" |
|
|
|
return { |
|
"test_type": "One-way ANOVA", |
|
"f_statistic": round(f_stat, 4), |
|
"p_value": round(p_value, 6), |
|
"df_between": df_between, |
|
"df_within": df_within, |
|
"eta_squared": round(eta_squared, 4), |
|
"group_statistics": group_stats, |
|
"result": f"Group differences are {significance} (p = {p_value:.6f})", |
|
"effect_size": f"Effect size (η² = {eta_squared:.4f}) is {effect_size_interp}", |
|
"note": "If significant, consider post-hoc tests to identify specific group differences" |
|
} |
|
except Exception as e: |
|
return {"error": f"Error performing ANOVA: {str(e)}"} |
|
|
|
def chi_square_test(observed: str, expected: str = None) -> Dict[str, Any]: |
|
""" |
|
Perform a chi-square goodness of fit test. |
|
|
|
Args: |
|
observed (str): Comma-separated observed frequencies |
|
expected (str): Comma-separated expected frequencies (optional, defaults to equal distribution) |
|
|
|
Returns: |
|
dict: Chi-square test results |
|
""" |
|
try: |
|
|
|
obs_data = [float(x.strip()) for x in observed.split(',') if x.strip()] |
|
|
|
|
|
if expected and expected.strip(): |
|
exp_data = [float(x.strip()) for x in expected.split(',') if x.strip()] |
|
if len(obs_data) != len(exp_data): |
|
return {"error": "Observed and expected must have the same number of categories"} |
|
else: |
|
|
|
total = sum(obs_data) |
|
exp_data = [total / len(obs_data)] * len(obs_data) |
|
|
|
|
|
chi2_stat, p_value = stats.chisquare(obs_data, exp_data) |
|
|
|
|
|
df = len(obs_data) - 1 |
|
|
|
|
|
n = sum(obs_data) |
|
cramers_v = np.sqrt(chi2_stat / (n * (len(obs_data) - 1))) |
|
|
|
|
|
significance = "significant" if p_value < 0.05 else "not significant" |
|
effect_size_interp = "small" if cramers_v < 0.3 else "medium" if cramers_v < 0.5 else "large" |
|
|
|
return { |
|
"test_type": "Chi-square goodness of fit test", |
|
"chi_square_statistic": round(chi2_stat, 4), |
|
"p_value": round(p_value, 6), |
|
"degrees_of_freedom": df, |
|
"cramers_v": round(cramers_v, 4), |
|
"observed_frequencies": obs_data, |
|
"expected_frequencies": [round(x, 2) for x in exp_data], |
|
"result": f"Observed frequencies differ {significance}ly from expected (p = {p_value:.6f})", |
|
"effect_size": f"Effect size (Cramér's V = {cramers_v:.4f}) is {effect_size_interp}" |
|
} |
|
except Exception as e: |
|
return {"error": f"Error performing chi-square test: {str(e)}"} |
|
|
|
def correlation_test(x_values: str, y_values: str, method: str = "pearson") -> Dict[str, Any]: |
|
""" |
|
Perform correlation analysis between two variables. |
|
|
|
Args: |
|
x_values (str): Comma-separated X variable values |
|
y_values (str): Comma-separated Y variable values |
|
method (str): Correlation method - 'pearson', 'spearman', or 'kendall' |
|
|
|
Returns: |
|
dict: Correlation results including coefficient and p-value |
|
""" |
|
try: |
|
|
|
x_data = [float(x.strip()) for x in x_values.split(',') if x.strip()] |
|
y_data = [float(y.strip()) for y in y_values.split(',') if y.strip()] |
|
|
|
if len(x_data) != len(y_data): |
|
return {"error": "X and Y variables must have the same number of observations"} |
|
|
|
if len(x_data) < 3: |
|
return {"error": "Need at least 3 observations for correlation"} |
|
|
|
|
|
if method.lower() == "pearson": |
|
corr_coef, p_value = stats.pearsonr(x_data, y_data) |
|
test_name = "Pearson correlation" |
|
elif method.lower() == "spearman": |
|
corr_coef, p_value = stats.spearmanr(x_data, y_data) |
|
test_name = "Spearman rank correlation" |
|
elif method.lower() == "kendall": |
|
corr_coef, p_value = stats.kendalltau(x_data, y_data) |
|
test_name = "Kendall's tau correlation" |
|
else: |
|
return {"error": "Method must be 'pearson', 'spearman', or 'kendall'"} |
|
|
|
|
|
significance = "significant" if p_value < 0.05 else "not significant" |
|
|
|
|
|
abs_corr = abs(corr_coef) |
|
if abs_corr < 0.3: |
|
strength = "weak" |
|
elif abs_corr < 0.7: |
|
strength = "moderate" |
|
else: |
|
strength = "strong" |
|
|
|
direction = "positive" if corr_coef > 0 else "negative" |
|
|
|
return { |
|
"test_type": test_name, |
|
"correlation_coefficient": round(corr_coef, 4), |
|
"p_value": round(p_value, 6), |
|
"sample_size": len(x_data), |
|
"result": f"The correlation is {significance} (p = {p_value:.6f})", |
|
"interpretation": f"{strength.title()} {direction} correlation (r = {corr_coef:.4f})", |
|
"method": method.lower() |
|
} |
|
except Exception as e: |
|
return {"error": f"Error performing correlation test: {str(e)}"} |
|
|
|
|
|
demo = gr.TabbedInterface( |
|
[ |
|
gr.Interface( |
|
fn=independent_t_test, |
|
inputs=[ |
|
gr.Textbox(placeholder="1.2,2.3,3.4,2.1", label="Group 1 (comma-separated)"), |
|
gr.Textbox(placeholder="2.1,3.2,4.1,3.5", label="Group 2 (comma-separated)"), |
|
gr.Checkbox(value=True, label="Equal variances"), |
|
gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis") |
|
], |
|
outputs=gr.JSON(), |
|
title="Independent T-Test", |
|
description="Compare means between two independent groups" |
|
), |
|
gr.Interface( |
|
fn=paired_t_test, |
|
inputs=[ |
|
gr.Textbox(placeholder="10,12,11,13", label="Before (comma-separated)"), |
|
gr.Textbox(placeholder="12,14,13,15", label="After (comma-separated)"), |
|
gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis") |
|
], |
|
outputs=gr.JSON(), |
|
title="Paired T-Test", |
|
description="Compare paired/matched samples" |
|
), |
|
gr.Interface( |
|
fn=one_sample_t_test, |
|
inputs=[ |
|
gr.Textbox(placeholder="10,12,11,13,9", label="Sample (comma-separated)"), |
|
gr.Number(value=10, label="Population mean"), |
|
gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis") |
|
], |
|
outputs=gr.JSON(), |
|
title="One-Sample T-Test", |
|
description="Test sample mean against population mean" |
|
), |
|
gr.Interface( |
|
fn=one_way_anova, |
|
inputs=[ |
|
gr.Textbox(placeholder="1,2,3,2", label="Group 1 (comma-separated)"), |
|
gr.Textbox(placeholder="4,5,6,5", label="Group 2 (comma-separated)"), |
|
gr.Textbox(placeholder="7,8,9,8", label="Group 3 (comma-separated)", info="Optional"), |
|
gr.Textbox(placeholder="", label="Group 4 (comma-separated)", info="Optional"), |
|
gr.Textbox(placeholder="", label="Group 5 (comma-separated)", info="Optional") |
|
], |
|
outputs=gr.JSON(), |
|
title="One-Way ANOVA", |
|
description="Compare means across multiple groups" |
|
), |
|
gr.Interface( |
|
fn=chi_square_test, |
|
inputs=[ |
|
gr.Textbox(placeholder="10,20,15,25", label="Observed frequencies (comma-separated)"), |
|
gr.Textbox(placeholder="", label="Expected frequencies (optional, comma-separated)") |
|
], |
|
outputs=gr.JSON(), |
|
title="Chi-Square Test", |
|
description="Test goodness of fit for categorical data" |
|
), |
|
gr.Interface( |
|
fn=correlation_test, |
|
inputs=[ |
|
gr.Textbox(placeholder="1,2,3,4,5", label="X values (comma-separated)"), |
|
gr.Textbox(placeholder="2,4,6,8,10", label="Y values (comma-separated)"), |
|
gr.Dropdown(["pearson", "spearman", "kendall"], value="pearson", label="Correlation method") |
|
], |
|
outputs=gr.JSON(), |
|
title="Correlation Analysis", |
|
description="Test correlation between two variables" |
|
) |
|
], |
|
tab_names=["Independent T-Test", "Paired T-Test", "One-Sample T-Test", "ANOVA", "Chi-Square", "Correlation"] |
|
) |
|
|
|
if __name__ == "__main__": |
|
print(f"Gradio version: {gr.__version__}") |
|
demo.launch(mcp_server=True) |
|
|