Spaces:

Agents-MCP-Hackathon
/

Statistical-Analysis-MCP

Running

App Files Files Community

Statistical-Analysis-MCP / app.py

JG1310

Update app.py

9673562 verified about 2 months ago

raw

history blame

18.8 kB

	import gradio as gr
	import numpy as np
	from scipy import stats
	from typing import List, Dict, Any, Union, Tuple
	import json

	def independent_t_test(group1: str, group2: str, equal_var: bool = True, alternative: str = "two-sided") -> Dict[str, Any]:
	"""
	Perform an independent samples t-test between two groups.

	Args:
	group1 (str): Comma-separated values for group 1 (e.g., "1.2,2.3,3.4,2.1")
	group2 (str): Comma-separated values for group 2 (e.g., "2.1,3.2,4.1,3.5")
	equal_var (bool): If True, perform standard t-test assuming equal variances. If False, perform Welch's t-test
	alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'

	Returns:
	dict: Test results including t-statistic, p-value, degrees of freedom, and interpretation
	"""
	try:
	# Parse input data
	data1 = [float(x.strip()) for x in group1.split(',') if x.strip()]
	data2 = [float(x.strip()) for x in group2.split(',') if x.strip()]

	if len(data1) < 2 or len(data2) < 2:
	return {"error": "Each group must have at least 2 observations"}

	# Perform t-test
	t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=equal_var, alternative=alternative)

	# Calculate descriptive statistics
	desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
	desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}

	# Degrees of freedom
	if equal_var:
	df = len(data1) + len(data2) - 2
	else:
	# Welch's formula for unequal variances
	s1_sq, s2_sq = desc1["std"]2, desc2["std"]2
	n1, n2 = desc1["n"], desc2["n"]
	df = (s1_sq/n1 + s2_sq/n2)2 / ((s1_sq/n1)2/(n1-1) + (s2_sq/n2)**2/(n2-1))

	# Effect size (Cohen's d)
	pooled_std = np.sqrt(((len(data1)-1)desc1["std"]2 + (len(data2)-1)desc2["std"]**2) / (len(data1)+len(data2)-2))
	cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std

	# Interpretation
	significance = "significant" if p_value < 0.05 else "not significant"
	effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"

	return {
	"test_type": f"Independent t-test ({'equal variances' if equal_var else 'unequal variances'})",
	"t_statistic": round(t_stat, 4),
	"p_value": round(p_value, 6),
	"degrees_of_freedom": round(df, 2),
	"cohens_d": round(cohens_d, 4),
	"group1_stats": desc1,
	"group2_stats": desc2,
	"result": f"The difference between groups is {significance} (p = {p_value:.6f})",
	"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
	"alternative_hypothesis": alternative
	}
	except Exception as e:
	return {"error": f"Error performing t-test: {str(e)}"}

	def paired_t_test(before: str, after: str, alternative: str = "two-sided") -> Dict[str, Any]:
	"""
	Perform a paired samples t-test.

	Args:
	before (str): Comma-separated values for before condition
	after (str): Comma-separated values for after condition
	alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'

	Returns:
	dict: Test results including t-statistic, p-value, and interpretation
	"""
	try:
	# Parse input data
	data_before = [float(x.strip()) for x in before.split(',') if x.strip()]
	data_after = [float(x.strip()) for x in after.split(',') if x.strip()]

	if len(data_before) != len(data_after):
	return {"error": "Before and after groups must have the same number of observations"}

	if len(data_before) < 2:
	return {"error": "Need at least 2 paired observations"}

	# Perform paired t-test
	t_stat, p_value = stats.ttest_rel(data_before, data_after, alternative=alternative)

	# Calculate differences and descriptive statistics
	differences = np.array(data_after) - np.array(data_before)
	mean_diff = np.mean(differences)
	std_diff = np.std(differences, ddof=1)

	# Effect size (Cohen's d for paired samples)
	cohens_d = mean_diff / std_diff

	# Degrees of freedom
	df = len(data_before) - 1

	# Interpretation
	significance = "significant" if p_value < 0.05 else "not significant"
	effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"

	return {
	"test_type": "Paired t-test",
	"t_statistic": round(t_stat, 4),
	"p_value": round(p_value, 6),
	"degrees_of_freedom": df,
	"mean_difference": round(mean_diff, 4),
	"std_difference": round(std_diff, 4),
	"cohens_d": round(cohens_d, 4),
	"before_mean": round(np.mean(data_before), 4),
	"after_mean": round(np.mean(data_after), 4),
	"result": f"The paired difference is {significance} (p = {p_value:.6f})",
	"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
	"alternative_hypothesis": alternative
	}
	except Exception as e:
	return {"error": f"Error performing paired t-test: {str(e)}"}

	def one_sample_t_test(sample: str, population_mean: float, alternative: str = "two-sided") -> Dict[str, Any]:
	"""
	Perform a one-sample t-test against a population mean.

	Args:
	sample (str): Comma-separated sample values
	population_mean (float): Hypothesized population mean
	alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'

	Returns:
	dict: Test results including t-statistic, p-value, and interpretation
	"""
	try:
	# Parse input data
	data = [float(x.strip()) for x in sample.split(',') if x.strip()]

	if len(data) < 2:
	return {"error": "Sample must have at least 2 observations"}

	# Perform one-sample t-test
	t_stat, p_value = stats.ttest_1samp(data, population_mean, alternative=alternative)

	# Calculate descriptive statistics
	sample_mean = np.mean(data)
	sample_std = np.std(data, ddof=1)
	sample_size = len(data)

	# Effect size (Cohen's d)
	cohens_d = (sample_mean - population_mean) / sample_std

	# Degrees of freedom
	df = sample_size - 1

	# Interpretation
	significance = "significant" if p_value < 0.05 else "not significant"
	effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"

	return {
	"test_type": "One-sample t-test",
	"t_statistic": round(t_stat, 4),
	"p_value": round(p_value, 6),
	"degrees_of_freedom": df,
	"sample_mean": round(sample_mean, 4),
	"population_mean": population_mean,
	"sample_std": round(sample_std, 4),
	"sample_size": sample_size,
	"cohens_d": round(cohens_d, 4),
	"result": f"Sample mean differs {significance}ly from population mean (p = {p_value:.6f})",
	"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
	"alternative_hypothesis": alternative
	}
	except Exception as e:
	return {"error": f"Error performing one-sample t-test: {str(e)}"}

	def one_way_anova(*groups: str) -> Dict[str, Any]:
	"""
	Perform a one-way ANOVA test.

	Args:
	*groups: Variable number of comma-separated group values (minimum 2 groups)

	Returns:
	dict: ANOVA results including F-statistic, p-value, and interpretation
	"""
	try:
	# Parse input data
	parsed_groups = []
	for i, group in enumerate(groups):
	if not group.strip():
	continue
	data = [float(x.strip()) for x in group.split(',') if x.strip()]
	if len(data) < 2:
	return {"error": f"Group {i+1} must have at least 2 observations"}
	parsed_groups.append(data)

	if len(parsed_groups) < 2:
	return {"error": "Need at least 2 groups for ANOVA"}

	# Perform one-way ANOVA
	f_stat, p_value = stats.f_oneway(*parsed_groups)

	# Calculate descriptive statistics for each group
	group_stats = []
	overall_data = []
	for i, group in enumerate(parsed_groups):
	group_stats.append({
	"group": i+1,
	"n": len(group),
	"mean": round(np.mean(group), 4),
	"std": round(np.std(group, ddof=1), 4)
	})
	overall_data.extend(group)

	# Calculate effect size (eta-squared)
	# SS_between / SS_total
	overall_mean = np.mean(overall_data)
	ss_total = sum((x - overall_mean)**2 for x in overall_data)
	ss_between = sum(len(group) * (np.mean(group) - overall_mean)**2 for group in parsed_groups)
	eta_squared = ss_between / ss_total if ss_total > 0 else 0

	# Degrees of freedom
	df_between = len(parsed_groups) - 1
	df_within = len(overall_data) - len(parsed_groups)

	# Interpretation
	significance = "significant" if p_value < 0.05 else "not significant"
	effect_size_interp = "small" if eta_squared < 0.06 else "medium" if eta_squared < 0.14 else "large"

	return {
	"test_type": "One-way ANOVA",
	"f_statistic": round(f_stat, 4),
	"p_value": round(p_value, 6),
	"df_between": df_between,
	"df_within": df_within,
	"eta_squared": round(eta_squared, 4),
	"group_statistics": group_stats,
	"result": f"Group differences are {significance} (p = {p_value:.6f})",
	"effect_size": f"Effect size (η² = {eta_squared:.4f}) is {effect_size_interp}",
	"note": "If significant, consider post-hoc tests to identify specific group differences"
	}
	except Exception as e:
	return {"error": f"Error performing ANOVA: {str(e)}"}

	def chi_square_test(observed: str, expected: str = None) -> Dict[str, Any]:
	"""
	Perform a chi-square goodness of fit test.

	Args:
	observed (str): Comma-separated observed frequencies
	expected (str): Comma-separated expected frequencies (optional, defaults to equal distribution)

	Returns:
	dict: Chi-square test results
	"""
	try:
	# Parse observed frequencies
	obs_data = [float(x.strip()) for x in observed.split(',') if x.strip()]

	# Parse expected frequencies or create equal distribution
	if expected and expected.strip():
	exp_data = [float(x.strip()) for x in expected.split(',') if x.strip()]
	if len(obs_data) != len(exp_data):
	return {"error": "Observed and expected must have the same number of categories"}
	else:
	# Equal distribution
	total = sum(obs_data)
	exp_data = [total / len(obs_data)] * len(obs_data)

	# Perform chi-square test
	chi2_stat, p_value = stats.chisquare(obs_data, exp_data)

	# Degrees of freedom
	df = len(obs_data) - 1

	# Effect size (Cramér's V for goodness of fit)
	n = sum(obs_data)
	cramers_v = np.sqrt(chi2_stat / (n * (len(obs_data) - 1)))

	# Interpretation
	significance = "significant" if p_value < 0.05 else "not significant"
	effect_size_interp = "small" if cramers_v < 0.3 else "medium" if cramers_v < 0.5 else "large"

	return {
	"test_type": "Chi-square goodness of fit test",
	"chi_square_statistic": round(chi2_stat, 4),
	"p_value": round(p_value, 6),
	"degrees_of_freedom": df,
	"cramers_v": round(cramers_v, 4),
	"observed_frequencies": obs_data,
	"expected_frequencies": [round(x, 2) for x in exp_data],
	"result": f"Observed frequencies differ {significance}ly from expected (p = {p_value:.6f})",
	"effect_size": f"Effect size (Cramér's V = {cramers_v:.4f}) is {effect_size_interp}"
	}
	except Exception as e:
	return {"error": f"Error performing chi-square test: {str(e)}"}

	def correlation_test(x_values: str, y_values: str, method: str = "pearson") -> Dict[str, Any]:
	"""
	Perform correlation analysis between two variables.

	Args:
	x_values (str): Comma-separated X variable values
	y_values (str): Comma-separated Y variable values
	method (str): Correlation method - 'pearson', 'spearman', or 'kendall'

	Returns:
	dict: Correlation results including coefficient and p-value
	"""
	try:
	# Parse input data
	x_data = [float(x.strip()) for x in x_values.split(',') if x.strip()]
	y_data = [float(y.strip()) for y in y_values.split(',') if y.strip()]

	if len(x_data) != len(y_data):
	return {"error": "X and Y variables must have the same number of observations"}

	if len(x_data) < 3:
	return {"error": "Need at least 3 observations for correlation"}

	# Perform correlation test
	if method.lower() == "pearson":
	corr_coef, p_value = stats.pearsonr(x_data, y_data)
	test_name = "Pearson correlation"
	elif method.lower() == "spearman":
	corr_coef, p_value = stats.spearmanr(x_data, y_data)
	test_name = "Spearman rank correlation"
	elif method.lower() == "kendall":
	corr_coef, p_value = stats.kendalltau(x_data, y_data)
	test_name = "Kendall's tau correlation"
	else:
	return {"error": "Method must be 'pearson', 'spearman', or 'kendall'"}

	# Interpretation
	significance = "significant" if p_value < 0.05 else "not significant"

	# Correlation strength interpretation
	abs_corr = abs(corr_coef)
	if abs_corr < 0.3:
	strength = "weak"
	elif abs_corr < 0.7:
	strength = "moderate"
	else:
	strength = "strong"

	direction = "positive" if corr_coef > 0 else "negative"

	return {
	"test_type": test_name,
	"correlation_coefficient": round(corr_coef, 4),
	"p_value": round(p_value, 6),
	"sample_size": len(x_data),
	"result": f"The correlation is {significance} (p = {p_value:.6f})",
	"interpretation": f"{strength.title()} {direction} correlation (r = {corr_coef:.4f})",
	"method": method.lower()
	}
	except Exception as e:
	return {"error": f"Error performing correlation test: {str(e)}"}

	# Create Gradio interfaces for each function
	demo = gr.TabbedInterface(
	[
	gr.Interface(
	fn=independent_t_test,
	inputs=[
	gr.Textbox(placeholder="1.2,2.3,3.4,2.1", label="Group 1 (comma-separated)"),
	gr.Textbox(placeholder="2.1,3.2,4.1,3.5", label="Group 2 (comma-separated)"),
	gr.Checkbox(value=True, label="Equal variances"),
	gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
	],
	outputs=gr.JSON(),
	title="Independent T-Test",
	description="Compare means between two independent groups"
	),
	gr.Interface(
	fn=paired_t_test,
	inputs=[
	gr.Textbox(placeholder="10,12,11,13", label="Before (comma-separated)"),
	gr.Textbox(placeholder="12,14,13,15", label="After (comma-separated)"),
	gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
	],
	outputs=gr.JSON(),
	title="Paired T-Test",
	description="Compare paired/matched samples"
	),
	gr.Interface(
	fn=one_sample_t_test,
	inputs=[
	gr.Textbox(placeholder="10,12,11,13,9", label="Sample (comma-separated)"),
	gr.Number(value=10, label="Population mean"),
	gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
	],
	outputs=gr.JSON(),
	title="One-Sample T-Test",
	description="Test sample mean against population mean"
	),
	gr.Interface(
	fn=one_way_anova,
	inputs=[
	gr.Textbox(placeholder="1,2,3,2", label="Group 1 (comma-separated)"),
	gr.Textbox(placeholder="4,5,6,5", label="Group 2 (comma-separated)"),
	gr.Textbox(placeholder="7,8,9,8", label="Group 3 (comma-separated)", info="Optional"),
	gr.Textbox(placeholder="", label="Group 4 (comma-separated)", info="Optional"),
	gr.Textbox(placeholder="", label="Group 5 (comma-separated)", info="Optional")
	],
	outputs=gr.JSON(),
	title="One-Way ANOVA",
	description="Compare means across multiple groups"
	),
	gr.Interface(
	fn=chi_square_test,
	inputs=[
	gr.Textbox(placeholder="10,20,15,25", label="Observed frequencies (comma-separated)"),
	gr.Textbox(placeholder="", label="Expected frequencies (optional, comma-separated)")
	],
	outputs=gr.JSON(),
	title="Chi-Square Test",
	description="Test goodness of fit for categorical data"
	),
	gr.Interface(
	fn=correlation_test,
	inputs=[
	gr.Textbox(placeholder="1,2,3,4,5", label="X values (comma-separated)"),
	gr.Textbox(placeholder="2,4,6,8,10", label="Y values (comma-separated)"),
	gr.Dropdown(["pearson", "spearman", "kendall"], value="pearson", label="Correlation method")
	],
	outputs=gr.JSON(),
	title="Correlation Analysis",
	description="Test correlation between two variables"
	)
	],
	tab_names=["Independent T-Test", "Paired T-Test", "One-Sample T-Test", "ANOVA", "Chi-Square", "Correlation"]
	)

	if __name__ == "__main__":
	print(f"Gradio version: {gr.__version__}")
	demo.launch(mcp_server=True)