JG1310's picture
Update app.py
9673562 verified
raw
history blame
18.8 kB
import gradio as gr
import numpy as np
from scipy import stats
from typing import List, Dict, Any, Union, Tuple
import json
def independent_t_test(group1: str, group2: str, equal_var: bool = True, alternative: str = "two-sided") -> Dict[str, Any]:
"""
Perform an independent samples t-test between two groups.
Args:
group1 (str): Comma-separated values for group 1 (e.g., "1.2,2.3,3.4,2.1")
group2 (str): Comma-separated values for group 2 (e.g., "2.1,3.2,4.1,3.5")
equal_var (bool): If True, perform standard t-test assuming equal variances. If False, perform Welch's t-test
alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
Returns:
dict: Test results including t-statistic, p-value, degrees of freedom, and interpretation
"""
try:
# Parse input data
data1 = [float(x.strip()) for x in group1.split(',') if x.strip()]
data2 = [float(x.strip()) for x in group2.split(',') if x.strip()]
if len(data1) < 2 or len(data2) < 2:
return {"error": "Each group must have at least 2 observations"}
# Perform t-test
t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=equal_var, alternative=alternative)
# Calculate descriptive statistics
desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
# Degrees of freedom
if equal_var:
df = len(data1) + len(data2) - 2
else:
# Welch's formula for unequal variances
s1_sq, s2_sq = desc1["std"]**2, desc2["std"]**2
n1, n2 = desc1["n"], desc2["n"]
df = (s1_sq/n1 + s2_sq/n2)**2 / ((s1_sq/n1)**2/(n1-1) + (s2_sq/n2)**2/(n2-1))
# Effect size (Cohen's d)
pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / (len(data1)+len(data2)-2))
cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std
# Interpretation
significance = "significant" if p_value < 0.05 else "not significant"
effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
return {
"test_type": f"Independent t-test ({'equal variances' if equal_var else 'unequal variances'})",
"t_statistic": round(t_stat, 4),
"p_value": round(p_value, 6),
"degrees_of_freedom": round(df, 2),
"cohens_d": round(cohens_d, 4),
"group1_stats": desc1,
"group2_stats": desc2,
"result": f"The difference between groups is {significance} (p = {p_value:.6f})",
"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
"alternative_hypothesis": alternative
}
except Exception as e:
return {"error": f"Error performing t-test: {str(e)}"}
def paired_t_test(before: str, after: str, alternative: str = "two-sided") -> Dict[str, Any]:
"""
Perform a paired samples t-test.
Args:
before (str): Comma-separated values for before condition
after (str): Comma-separated values for after condition
alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
Returns:
dict: Test results including t-statistic, p-value, and interpretation
"""
try:
# Parse input data
data_before = [float(x.strip()) for x in before.split(',') if x.strip()]
data_after = [float(x.strip()) for x in after.split(',') if x.strip()]
if len(data_before) != len(data_after):
return {"error": "Before and after groups must have the same number of observations"}
if len(data_before) < 2:
return {"error": "Need at least 2 paired observations"}
# Perform paired t-test
t_stat, p_value = stats.ttest_rel(data_before, data_after, alternative=alternative)
# Calculate differences and descriptive statistics
differences = np.array(data_after) - np.array(data_before)
mean_diff = np.mean(differences)
std_diff = np.std(differences, ddof=1)
# Effect size (Cohen's d for paired samples)
cohens_d = mean_diff / std_diff
# Degrees of freedom
df = len(data_before) - 1
# Interpretation
significance = "significant" if p_value < 0.05 else "not significant"
effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
return {
"test_type": "Paired t-test",
"t_statistic": round(t_stat, 4),
"p_value": round(p_value, 6),
"degrees_of_freedom": df,
"mean_difference": round(mean_diff, 4),
"std_difference": round(std_diff, 4),
"cohens_d": round(cohens_d, 4),
"before_mean": round(np.mean(data_before), 4),
"after_mean": round(np.mean(data_after), 4),
"result": f"The paired difference is {significance} (p = {p_value:.6f})",
"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
"alternative_hypothesis": alternative
}
except Exception as e:
return {"error": f"Error performing paired t-test: {str(e)}"}
def one_sample_t_test(sample: str, population_mean: float, alternative: str = "two-sided") -> Dict[str, Any]:
"""
Perform a one-sample t-test against a population mean.
Args:
sample (str): Comma-separated sample values
population_mean (float): Hypothesized population mean
alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
Returns:
dict: Test results including t-statistic, p-value, and interpretation
"""
try:
# Parse input data
data = [float(x.strip()) for x in sample.split(',') if x.strip()]
if len(data) < 2:
return {"error": "Sample must have at least 2 observations"}
# Perform one-sample t-test
t_stat, p_value = stats.ttest_1samp(data, population_mean, alternative=alternative)
# Calculate descriptive statistics
sample_mean = np.mean(data)
sample_std = np.std(data, ddof=1)
sample_size = len(data)
# Effect size (Cohen's d)
cohens_d = (sample_mean - population_mean) / sample_std
# Degrees of freedom
df = sample_size - 1
# Interpretation
significance = "significant" if p_value < 0.05 else "not significant"
effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
return {
"test_type": "One-sample t-test",
"t_statistic": round(t_stat, 4),
"p_value": round(p_value, 6),
"degrees_of_freedom": df,
"sample_mean": round(sample_mean, 4),
"population_mean": population_mean,
"sample_std": round(sample_std, 4),
"sample_size": sample_size,
"cohens_d": round(cohens_d, 4),
"result": f"Sample mean differs {significance}ly from population mean (p = {p_value:.6f})",
"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
"alternative_hypothesis": alternative
}
except Exception as e:
return {"error": f"Error performing one-sample t-test: {str(e)}"}
def one_way_anova(*groups: str) -> Dict[str, Any]:
"""
Perform a one-way ANOVA test.
Args:
*groups: Variable number of comma-separated group values (minimum 2 groups)
Returns:
dict: ANOVA results including F-statistic, p-value, and interpretation
"""
try:
# Parse input data
parsed_groups = []
for i, group in enumerate(groups):
if not group.strip():
continue
data = [float(x.strip()) for x in group.split(',') if x.strip()]
if len(data) < 2:
return {"error": f"Group {i+1} must have at least 2 observations"}
parsed_groups.append(data)
if len(parsed_groups) < 2:
return {"error": "Need at least 2 groups for ANOVA"}
# Perform one-way ANOVA
f_stat, p_value = stats.f_oneway(*parsed_groups)
# Calculate descriptive statistics for each group
group_stats = []
overall_data = []
for i, group in enumerate(parsed_groups):
group_stats.append({
"group": i+1,
"n": len(group),
"mean": round(np.mean(group), 4),
"std": round(np.std(group, ddof=1), 4)
})
overall_data.extend(group)
# Calculate effect size (eta-squared)
# SS_between / SS_total
overall_mean = np.mean(overall_data)
ss_total = sum((x - overall_mean)**2 for x in overall_data)
ss_between = sum(len(group) * (np.mean(group) - overall_mean)**2 for group in parsed_groups)
eta_squared = ss_between / ss_total if ss_total > 0 else 0
# Degrees of freedom
df_between = len(parsed_groups) - 1
df_within = len(overall_data) - len(parsed_groups)
# Interpretation
significance = "significant" if p_value < 0.05 else "not significant"
effect_size_interp = "small" if eta_squared < 0.06 else "medium" if eta_squared < 0.14 else "large"
return {
"test_type": "One-way ANOVA",
"f_statistic": round(f_stat, 4),
"p_value": round(p_value, 6),
"df_between": df_between,
"df_within": df_within,
"eta_squared": round(eta_squared, 4),
"group_statistics": group_stats,
"result": f"Group differences are {significance} (p = {p_value:.6f})",
"effect_size": f"Effect size (η² = {eta_squared:.4f}) is {effect_size_interp}",
"note": "If significant, consider post-hoc tests to identify specific group differences"
}
except Exception as e:
return {"error": f"Error performing ANOVA: {str(e)}"}
def chi_square_test(observed: str, expected: str = None) -> Dict[str, Any]:
"""
Perform a chi-square goodness of fit test.
Args:
observed (str): Comma-separated observed frequencies
expected (str): Comma-separated expected frequencies (optional, defaults to equal distribution)
Returns:
dict: Chi-square test results
"""
try:
# Parse observed frequencies
obs_data = [float(x.strip()) for x in observed.split(',') if x.strip()]
# Parse expected frequencies or create equal distribution
if expected and expected.strip():
exp_data = [float(x.strip()) for x in expected.split(',') if x.strip()]
if len(obs_data) != len(exp_data):
return {"error": "Observed and expected must have the same number of categories"}
else:
# Equal distribution
total = sum(obs_data)
exp_data = [total / len(obs_data)] * len(obs_data)
# Perform chi-square test
chi2_stat, p_value = stats.chisquare(obs_data, exp_data)
# Degrees of freedom
df = len(obs_data) - 1
# Effect size (Cramér's V for goodness of fit)
n = sum(obs_data)
cramers_v = np.sqrt(chi2_stat / (n * (len(obs_data) - 1)))
# Interpretation
significance = "significant" if p_value < 0.05 else "not significant"
effect_size_interp = "small" if cramers_v < 0.3 else "medium" if cramers_v < 0.5 else "large"
return {
"test_type": "Chi-square goodness of fit test",
"chi_square_statistic": round(chi2_stat, 4),
"p_value": round(p_value, 6),
"degrees_of_freedom": df,
"cramers_v": round(cramers_v, 4),
"observed_frequencies": obs_data,
"expected_frequencies": [round(x, 2) for x in exp_data],
"result": f"Observed frequencies differ {significance}ly from expected (p = {p_value:.6f})",
"effect_size": f"Effect size (Cramér's V = {cramers_v:.4f}) is {effect_size_interp}"
}
except Exception as e:
return {"error": f"Error performing chi-square test: {str(e)}"}
def correlation_test(x_values: str, y_values: str, method: str = "pearson") -> Dict[str, Any]:
"""
Perform correlation analysis between two variables.
Args:
x_values (str): Comma-separated X variable values
y_values (str): Comma-separated Y variable values
method (str): Correlation method - 'pearson', 'spearman', or 'kendall'
Returns:
dict: Correlation results including coefficient and p-value
"""
try:
# Parse input data
x_data = [float(x.strip()) for x in x_values.split(',') if x.strip()]
y_data = [float(y.strip()) for y in y_values.split(',') if y.strip()]
if len(x_data) != len(y_data):
return {"error": "X and Y variables must have the same number of observations"}
if len(x_data) < 3:
return {"error": "Need at least 3 observations for correlation"}
# Perform correlation test
if method.lower() == "pearson":
corr_coef, p_value = stats.pearsonr(x_data, y_data)
test_name = "Pearson correlation"
elif method.lower() == "spearman":
corr_coef, p_value = stats.spearmanr(x_data, y_data)
test_name = "Spearman rank correlation"
elif method.lower() == "kendall":
corr_coef, p_value = stats.kendalltau(x_data, y_data)
test_name = "Kendall's tau correlation"
else:
return {"error": "Method must be 'pearson', 'spearman', or 'kendall'"}
# Interpretation
significance = "significant" if p_value < 0.05 else "not significant"
# Correlation strength interpretation
abs_corr = abs(corr_coef)
if abs_corr < 0.3:
strength = "weak"
elif abs_corr < 0.7:
strength = "moderate"
else:
strength = "strong"
direction = "positive" if corr_coef > 0 else "negative"
return {
"test_type": test_name,
"correlation_coefficient": round(corr_coef, 4),
"p_value": round(p_value, 6),
"sample_size": len(x_data),
"result": f"The correlation is {significance} (p = {p_value:.6f})",
"interpretation": f"{strength.title()} {direction} correlation (r = {corr_coef:.4f})",
"method": method.lower()
}
except Exception as e:
return {"error": f"Error performing correlation test: {str(e)}"}
# Create Gradio interfaces for each function
demo = gr.TabbedInterface(
[
gr.Interface(
fn=independent_t_test,
inputs=[
gr.Textbox(placeholder="1.2,2.3,3.4,2.1", label="Group 1 (comma-separated)"),
gr.Textbox(placeholder="2.1,3.2,4.1,3.5", label="Group 2 (comma-separated)"),
gr.Checkbox(value=True, label="Equal variances"),
gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
],
outputs=gr.JSON(),
title="Independent T-Test",
description="Compare means between two independent groups"
),
gr.Interface(
fn=paired_t_test,
inputs=[
gr.Textbox(placeholder="10,12,11,13", label="Before (comma-separated)"),
gr.Textbox(placeholder="12,14,13,15", label="After (comma-separated)"),
gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
],
outputs=gr.JSON(),
title="Paired T-Test",
description="Compare paired/matched samples"
),
gr.Interface(
fn=one_sample_t_test,
inputs=[
gr.Textbox(placeholder="10,12,11,13,9", label="Sample (comma-separated)"),
gr.Number(value=10, label="Population mean"),
gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
],
outputs=gr.JSON(),
title="One-Sample T-Test",
description="Test sample mean against population mean"
),
gr.Interface(
fn=one_way_anova,
inputs=[
gr.Textbox(placeholder="1,2,3,2", label="Group 1 (comma-separated)"),
gr.Textbox(placeholder="4,5,6,5", label="Group 2 (comma-separated)"),
gr.Textbox(placeholder="7,8,9,8", label="Group 3 (comma-separated)", info="Optional"),
gr.Textbox(placeholder="", label="Group 4 (comma-separated)", info="Optional"),
gr.Textbox(placeholder="", label="Group 5 (comma-separated)", info="Optional")
],
outputs=gr.JSON(),
title="One-Way ANOVA",
description="Compare means across multiple groups"
),
gr.Interface(
fn=chi_square_test,
inputs=[
gr.Textbox(placeholder="10,20,15,25", label="Observed frequencies (comma-separated)"),
gr.Textbox(placeholder="", label="Expected frequencies (optional, comma-separated)")
],
outputs=gr.JSON(),
title="Chi-Square Test",
description="Test goodness of fit for categorical data"
),
gr.Interface(
fn=correlation_test,
inputs=[
gr.Textbox(placeholder="1,2,3,4,5", label="X values (comma-separated)"),
gr.Textbox(placeholder="2,4,6,8,10", label="Y values (comma-separated)"),
gr.Dropdown(["pearson", "spearman", "kendall"], value="pearson", label="Correlation method")
],
outputs=gr.JSON(),
title="Correlation Analysis",
description="Test correlation between two variables"
)
],
tab_names=["Independent T-Test", "Paired T-Test", "One-Sample T-Test", "ANOVA", "Chi-Square", "Correlation"]
)
if __name__ == "__main__":
print(f"Gradio version: {gr.__version__}")
demo.launch(mcp_server=True)