Spaces:

Agents-MCP-Hackathon
/

Statistical-Analysis-MCP

Running

App Files Files Community

JG1310 commited on Jun 6

Commit

18a47c7

verified ·

1 Parent(s): a5fd229

Update app.py

Browse files

Files changed (1) hide show

app.py +366 -676

app.py CHANGED Viewed

@@ -1,749 +1,439 @@
 import gradio as gr
 import numpy as np
-import pandas as pd
 from scipy import stats
-from typing import List, Dict, Any, Optional, Union
-def parse_numeric_input(data: str) -> List[float]:
     """
-    Parse comma-separated string of numbers into a list of floats.
     Args:
-        data (str): Comma-separated string of numbers (e.g., "1.2,2.3,3.4,2.1")
-    Returns:
-        List[float]: Parsed numeric data
-    Raises:
-        ValueError: If data cannot be parsed as numeric values
-    Example:
-        >>> parse_numeric_input("85.2,90.1,78.5,92.3")
-        [85.2, 90.1, 78.5, 92.3]
-    """
-    try:
-        parsed = [float(x.strip()) for x in data.split(',') if x.strip()]
-        if not parsed:
-            raise ValueError("No valid numbers found in input string")
-        return parsed
-    except ValueError as e:
-        if "could not convert" in str(e):
-            raise ValueError(f"Cannot parse '{data}' as comma-separated numbers")
-        raise e
-def welch_t_test(
-    dataframe: Optional[pd.DataFrame] = None,
-    group1_str: Optional[str] = None,
-    group2_str: Optional[str] = None,
-    alternative: str = "two-sided",
-    alpha: float = 0.05,
-    effect_thresholds: str = "0.2,0.5,0.8"
-) -> Dict[str, Any]:
-    """
-    Welch's t-test supporting both DataFrame and string inputs for maximum compatibility.
-    Welch's t-test determines if there is a statistically significant difference between
-    the means of group1 and group2. Unlike Student's t-test, this does NOT assume equal
-    variances between groups, making it more robust and generally recommended for most situations.
-    WHEN TO USE: Compare average scores between two independent groups when you cannot assume
-    equal variances, or as a safer default choice. Preferred over Student's t-test in most cases.
-    Args:
-        dataframe (Optional[pd.DataFrame]): DataFrame containing group data in first two columns.
-                                           If provided, group1_str and group2_str will be ignored.
-        group1_str (Optional[str]): Comma-separated string of numeric values for the first group.
-                                   Example: "12.1,15.3,18.7,14.2,16.8" (reaction times for Group A)
-                                   Only used if dataframe is None or empty.
-        group2_str (Optional[str]): Comma-separated string of numeric values for the second group.
-                                   Example: "22.4,19.8,25.1,21.3" (reaction times for Group B)
-                                   Only used if dataframe is None or empty.
-        alternative (str): Direction of the alternative hypothesis:
-                          - "two-sided": group1 mean ≠ group2 mean (different in either direction)
-                          - "less": group1 mean < group2 mean (group1 is smaller)
-                          - "greater": group1 mean > group2 mean (group1 is larger)
-        alpha (float): Significance level for the test (probability of Type I error).
-                      Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
-        effect_thresholds (str): Three comma-separated values defining Cohen's d effect size boundaries.
-                               Format: "small_threshold,medium_threshold,large_threshold"
-                               Default "0.2,0.5,0.8" means: <0.2=negligible, 0.2-0.5=small, 0.5-0.8=medium, >0.8=large
     Returns:
-        dict: Comprehensive test results with the following keys:
-            - test_type (str): Always "Welch's t-test (unequal variances)"
-            - t_statistic (float): The calculated t-value using Welch's formula
-            - p_value (float): Probability of observing this result if null hypothesis is true
-            - degrees_of_freedom (float): Welch's adjusted df (usually non-integer), accounts for unequal variances
-            - cohens_d (float): Standardized effect size. Positive means group1 > group2, negative means group1 < group2
-            - pooled_std (float): Pooled standard deviation used in effect size calculation
-            - group1_stats (dict): Descriptive statistics for group1 (mean, std, n)
-            - group2_stats (dict): Descriptive statistics for group2 (mean, std, n)
-            - significant (bool): True if p_value < alpha
-            - effect_size (str): Categorical interpretation of Cohen's d magnitude
-            - alternative_hypothesis (str): Echo of alternative parameter
-            - alpha (float): Echo of significance level used
-            - effect_thresholds (List[float]): Echo of effect size thresholds used
-            - input_method (str): "dataframe" or "strings" - indicates which input method was used
     """
     try:
-        # Parse effect size thresholds
-        try:
-            thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
-            if len(thresholds) != 3:
-                return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
-        except:
-            return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.2,0.5,0.8')"}
-        # Method 1: DataFrame input (preferred for LLMs and data pipelines)
-        if dataframe is not None and not dataframe.empty:
-            # Use first two columns automatically
-            if len(dataframe.columns) < 2:
-                return {"error": f"DataFrame must have at least 2 columns. Found {len(dataframe.columns)} columns."}
-            # Extract and validate data from first two columns
-            try:
-                # Convert to numeric, coercing errors to NaN
-                col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
-                col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')
-                # Remove NaN values and convert to list
-                group1 = col1_numeric.dropna().tolist()
-                group2 = col2_numeric.dropna().tolist()
-                # Check if we lost too much data due to non-numeric values
-                original_count1 = len(dataframe.iloc[:, 0].dropna())
-                original_count2 = len(dataframe.iloc[:, 1].dropna())
-                if len(group1) < original_count1 * 0.5:  # Lost more than 50% of data
-                    return {"error": f"Column 1 contains too many non-numeric values. Only {len(group1)} out of {original_count1} values could be converted to numbers."}
-                if len(group2) < original_count2 * 0.5:  # Lost more than 50% of data
-                    return {"error": f"Column 2 contains too many non-numeric values. Only {len(group2)} out of {original_count2} values could be converted to numbers."}
-                input_method = "dataframe"
-            except Exception as e:
-                return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}
-        # Method 2: String input (preferred for humans and simple use cases)
-        elif group1_str and group2_str and group1_str.strip() and group2_str.strip():
-            try:
-                group1 = parse_numeric_input(group1_str)
-                group2 = parse_numeric_input(group2_str)
-                input_method = "strings"
-            except ValueError as e:
-                return {"error": f"String parsing error: {str(e)}"}
-        else:
-            return {"error": "Please provide either a DataFrame with data OR comma-separated strings for both groups. Do not leave inputs empty."}
-        # Validate extracted data
-        if len(group1) < 2:
-            return {"error": f"Group 1 must have at least 2 observations. Found {len(group1)} values."}
-        if len(group2) < 2:
-            return {"error": f"Group 2 must have at least 2 observations. Found {len(group2)} values."}
-        # Perform Welch's t-test analysis
-        # Convert to numpy arrays for calculations
-        data1 = np.array(group1)
-        data2 = np.array(group2)
-        # Perform Welch's t-test (unequal variances)
-        t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=False, alternative=alternative)
         # Calculate descriptive statistics
         desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
         desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
-        # Welch's degrees of freedom formula
-        s1_sq, s2_sq = desc1["std"]**2, desc2["std"]**2
-        n1, n2 = desc1["n"], desc2["n"]
-        df = (s1_sq/n1 + s2_sq/n2)**2 / ((s1_sq/n1)**2/(n1-1) + (s2_sq/n2)**2/(n2-1))
-        # Effect size (Cohen's d using pooled standard deviation for consistency)
-        # For Welch's test, we still typically use pooled SD for Cohen's d calculation
-        pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / (len(data1) + len(data2) - 2))
         cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std
-        # Interpretation using Cohen's canonical benchmarks
-        significant = p_value < alpha
-        abs_d = abs(cohens_d)
-        small_threshold, medium_threshold, large_threshold = thresholds
-        if abs_d < small_threshold:
-            effect_size_interp = "negligible"
-        elif abs_d < medium_threshold:
-            effect_size_interp = "small"
-        elif abs_d < large_threshold:
-            effect_size_interp = "medium"
-        else:
-            effect_size_interp = "large"
         return {
-            "test_type": "Welch's t-test (unequal variances)",
-            "t_statistic": t_stat,
-            "p_value": p_value,
-            "degrees_of_freedom": df,
-            "cohens_d": cohens_d,
-            "pooled_std": pooled_std,
             "group1_stats": desc1,
             "group2_stats": desc2,
-            "significant": significant,
-            "effect_size": effect_size_interp,
-            "alternative_hypothesis": alternative,
-            "alpha": alpha,
-            "effect_thresholds": thresholds,
-            "input_method": input_method
         }
     except Exception as e:
-        return {"error": f"Unexpected error in Welch's t-test: {str(e)}"}
-def student_t_test(
-    dataframe: Optional[pd.DataFrame] = None,
-    group1_str: Optional[str] = None,
-    group2_str: Optional[str] = None,
-    alternative: str = "two-sided",
-    alpha: float = 0.05,
-    effect_thresholds: str = "0.2,0.5,0.8"
-) -> Dict[str, Any]:
     """
-    Student's t-test supporting both DataFrame and string inputs for maximum compatibility.
-    Student's t-test is used to determine if there is a statistically significant difference between the means of two sets of sampled numbers, group1 and group2.
-    This test produces a key statistic known as the t_statistic. Depending on the 'alternative hypothesis' considered (e.g. group1 mean < group2 mean or simply
-    group1 mean ≠ group2 mean), the test quantifies the probability of observing the result (or more extreme) given the 'null hypothesis' is true (i.e. no difference exists)
-    as p_value. If the p_value falls below the threshold alpha, then the result is considered statistically significant, meaning we reject the null hypothesis in
-    favor of the alternative. cohens_d measures effect size, the practical magnitude of the difference between the means of group1 and group2, standardized by pooled standard
-    deviation. It can be interpreted with the help of effect_thresholds. This test assumes both groups have equal variances and normal distributions. Use Welch's t-test if variances are unequal.
-    You should supply either a dataframe with the first 2 columns containing sample data (ideal for large datasets or data pipelines), or strings (group1 and group2) containing
-    comma-delimited lists of sampled data (ideal for small, simple data sets).
-    WHEN TO USE: Compare average scores between two independent groups (e.g., treatment vs control,
-        before vs after with different participants, male vs female performance)
     Args:
-        dataframe (Optional[pd.DataFrame]): DataFrame containing group data in first two columns.
-                                           If provided, group1_str and group2_str will be ignored.
-        group1_str (Optional[str]): Comma-separated string of numeric values for the first group.
-                                   Example: "85.2,90.1,78.5,92.3" (test scores for Group A)
-                                   Only used if dataframe is None or empty.
-        group2_str (Optional[str]): Comma-separated string of numeric values for the second group.
-                                   Example: "88.1,85.7,91.2,87.4" (test scores for Group B)
-                                   Only used if dataframe is None or empty.
-        alternative (str): Direction of the alternative hypothesis:
-                          - "two-sided": group1 mean ≠ group2 mean (different in either direction)
-                          - "less": group1 mean < group2 mean (group1 is smaller)
-                          - "greater": group1 mean > group2 mean (group1 is larger)
-        alpha (float): Significance level for the test (probability of Type I error).
-                      Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
-        effect_thresholds (str): Three comma-separated values defining Cohen's d effect size boundaries.
-                               Format: "small_threshold,medium_threshold,large_threshold"
-                               Default "0.2,0.5,0.8" means: <0.2=negligible, 0.2-0.5=small, 0.5-0.8=medium, >0.8=large
-                               These are Cohen's canonical benchmarks for effect size interpretation.
     Returns:
-        dict: Comprehensive test results with the following keys:
-            - test_type (str): Always "Student's t-test"
-            - t_statistic (float): The calculated t-value, which measures how many standard errors the difference
-                    between group means is away from zero (assuming the null hypothesis is true).
-                    Larger absolute values indicate the observed difference is less likely under the null hypothesis.
-            - p_value (float): Probability of observing this result (or more extreme) if null hypothesis is true.
-                              Values < alpha indicate statistical significance.
-            - degrees_of_freedom (int): df = n1 + n2 - 2, degrees of freedom for the pooled variance estimate, used for determining critical t-values.
-            - cohens_d (float): Effect size measure. Positive means group1 > group2, negative means group1 < group2.
-                               Interpreted using Cohen's canonical benchmarks: negligible (<0.2), small (0.2), medium (0.5), large (0.8).
-            - pooled_std (float): Combined standard deviation used in Cohen's d calculation.
-            - group1_stats (dict): Descriptive statistics for group1 (mean, std, n)
-            - group2_stats (dict): Descriptive statistics for group2 (mean, std, n)
-            - significant (bool): True if p_value < alpha, False otherwise
-            - effect_size (str): Categorical interpretation ("negligible", "small", "medium", "large") based on |cohens_d| and effect_thresholds
-            - alternative_hypothesis (str): Echo of the alternative parameter used
-            - alpha (float): Echo of the significance level used
-            - effect_thresholds (List[float]): Echo of the thresholds used
-            - input_method (str): "dataframe" or "strings" - indicates which input method was used
     """
     try:
-        # Parse effect size thresholds
-        try:
-            thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
-            if len(thresholds) != 3:
-                return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
-        except:
-            return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.2,0.5,0.8')"}
-        # Method 1: DataFrame input (preferred for LLMs and data pipelines)
-        if dataframe is not None and not dataframe.empty:
-            # Use first two columns automatically
-            if len(dataframe.columns) < 2:
-                return {"error": f"DataFrame must have at least 2 columns. Found {len(dataframe.columns)} columns."}
-            # Extract and validate data from first two columns
-            try:
-                # Convert to numeric, coercing errors to NaN
-                col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
-                col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')
-                # Remove NaN values and convert to list
-                group1 = col1_numeric.dropna().tolist()
-                group2 = col2_numeric.dropna().tolist()
-                # Check if we lost too much data due to non-numeric values
-                original_count1 = len(dataframe.iloc[:, 0].dropna())
-                original_count2 = len(dataframe.iloc[:, 1].dropna())
-                if len(group1) < original_count1 * 0.5:  # Lost more than 50% of data
-                    return {"error": f"Column 1 contains too many non-numeric values. Only {len(group1)} out of {original_count1} values could be converted to numbers."}
-                if len(group2) < original_count2 * 0.5:  # Lost more than 50% of data
-                    return {"error": f"Column 2 contains too many non-numeric values. Only {len(group2)} out of {original_count2} values could be converted to numbers."}
-                input_method = "dataframe"
-            except Exception as e:
-                return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}
-        # Method 2: String input (preferred for humans and simple use cases)
-        elif group1_str and group2_str and group1_str.strip() and group2_str.strip():
-            try:
-                group1 = parse_numeric_input(group1_str)
-                group2 = parse_numeric_input(group2_str)
-                input_method = "strings"
-            except ValueError as e:
-                return {"error": f"String parsing error: {str(e)}"}
-        else:
-            return {"error": "Please provide either a DataFrame with data OR comma-separated strings for both groups. Do not leave inputs empty."}
-        # Validate extracted data
-        if len(group1) < 2:
-            return {"error": f"Group 1 must have at least 2 observations. Found {len(group1)} values."}
-        if len(group2) < 2:
-            return {"error": f"Group 2 must have at least 2 observations. Found {len(group2)} values."}
-        # Perform Student's t-test analysis directly
-        # Convert to numpy arrays for calculations
-        data1 = np.array(group1)
-        data2 = np.array(group2)
-        # Perform Student's t-test (equal variances)
-        t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=True, alternative=alternative)
-        # Calculate descriptive statistics
-        desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
-        desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
-        # Degrees of freedom (pooled)
-        df = len(data1) + len(data2) - 2
-        # Effect size (Cohen's d using pooled standard deviation)
-        pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / df)
-        cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std
-        # Interpretation using Cohen's canonical benchmarks
-        significant = p_value < alpha
-        abs_d = abs(cohens_d)
-        small_threshold, medium_threshold, large_threshold = thresholds
-        if abs_d < small_threshold:
-            effect_size_interp = "negligible"
-        elif abs_d < medium_threshold:
-            effect_size_interp = "small"
-        elif abs_d < large_threshold:
-            effect_size_interp = "medium"
-        else:
-            effect_size_interp = "large"
         return {
-            "test_type": "Student's t-test",
-            "t_statistic": t_stat,
-            "p_value": p_value,
             "degrees_of_freedom": df,
-            "cohens_d": cohens_d,
-            "pooled_std": pooled_std,
-            "group1_stats": desc1,
-            "group2_stats": desc2,
-            "significant": significant,
-            "effect_size": effect_size_interp,
-            "alternative_hypothesis": alternative,
-            "alpha": alpha,
-            "effect_thresholds": thresholds,
-            "input_method": input_method
         }
     except Exception as e:
-        return {"error": f"Unexpected error in flexible t-test: {str(e)}"}
-def load_uploaded_file(file_path, has_header_flag):
-    """Shared function to load uploaded files and return both the DataFrame and preview."""
-    if file_path is None:
-        return None, None
     try:
-        # Determine header parameter for pandas
-        header_param = 0 if has_header_flag else None
-        if file_path.endswith('.csv'):
-            df = pd.read_csv(file_path, header=header_param)
-        elif file_path.endswith(('.xlsx', '.xls')):
-            df = pd.read_excel(file_path, header=header_param)
-        else:
-            return None, pd.DataFrame({'Error': ['Unsupported file format']})
-        # Take only first two columns
-        if len(df.columns) >= 2:
-            df_subset = df.iloc[:, :2].copy()
-            # Set column names based on whether headers were detected
-            if has_header_flag and not str(df_subset.columns[0]).startswith('Unnamed'):
-                # Keep original column names if they exist and aren't auto-generated
-                df_subset.columns = [str(df_subset.columns[0]), str(df_subset.columns[1])]
-            else:
-                # Use default names
-                df_subset.columns = ['Group1', 'Group2']
-            # Convert columns to numeric, replacing non-numeric with NaN
-            df_subset.iloc[:, 0] = pd.to_numeric(df_subset.iloc[:, 0], errors='coerce')
-            df_subset.iloc[:, 1] = pd.to_numeric(df_subset.iloc[:, 1], errors='coerce')
-            # Remove rows where both values are NaN
-            df_subset = df_subset.dropna(how='all')
-            # Return full dataframe for processing and preview for display
-            preview = df_subset.head(10)  # Show first 10 rows
-            return df_subset, preview
-        else:
-            error_df = pd.DataFrame({'Error': ['File must have at least 2 columns']})
-            return None, error_df
     except Exception as e:
-        error_df = pd.DataFrame({'Error': [f"Failed to load file: {str(e)}"]})
-        return None, error_df
-def create_input_components():
-    """Create reusable input components for both test tabs."""
-    # Input method selector
-    input_method = gr.Radio(
-        choices=["File Upload", "Text Input"],
-        value="File Upload",
-        label="Choose Input Method",
-        info="Select how you want to provide your data"
-    )
-    # File upload input section
-    with gr.Group(visible=True) as file_section:
-        gr.Markdown("### File Upload")
-        gr.Markdown("*Upload CSV or Excel file - first two columns will be used as Group 1 and Group 2*")
-        with gr.Row():
-            file_upload = gr.File(
-                label="Upload CSV/Excel File",
-                file_types=[".csv", ".xlsx", ".xls"],
-                type="filepath"
-            )
-            has_header = gr.Checkbox(
-                label="File has header row",
-                value=True,
-                info="Check if first row contains column names"
-            )
-        # Display loaded data preview
-        data_preview = gr.Dataframe(
-            label="Data Preview (first two columns)",
-            interactive=False,
-            row_count=5
-        )
-    # Text input section
-    with gr.Group(visible=False) as text_section:
-        gr.Markdown("### Text Input")
-        gr.Markdown("*Enter comma-separated numbers for each group*")
-        group1_str = gr.Textbox(
-            placeholder="85.2,90.1,78.5,92.3,88.7",
-            label="Group 1 Data",
-            info="Comma-separated numbers (e.g., test scores for condition A)"
-        )
-        group2_str = gr.Textbox(
-            placeholder="88.1,85.7,91.2,87.4,89.3",
-            label="Group 2 Data",
-            info="Comma-separated numbers (e.g., test scores for condition B)"
-        )
-    return input_method, file_section, text_section, file_upload, has_header, data_preview, group1_str, group2_str
-def create_parameter_components():
-    """Create reusable parameter components for both test tabs."""
-    gr.Markdown("### Test Parameters")
-    with gr.Row():
-        alternative = gr.Dropdown(
-            choices=["two-sided", "less", "greater"],
-            value="two-sided",
-            label="Alternative Hypothesis",
-            info="two-sided: groups differ; less: group1 < group2; greater: group1 > group2"
-        )
-        alpha = gr.Number(
-            value=0.05,
-            minimum=0,
-            maximum=1,
-            step=0.01,
-            label="Significance Level (α)",
-            info="Probability threshold for statistical significance (typically 0.05)"
-        )
-        effect_thresholds = gr.Textbox(
-            value="0.2,0.5,0.8",
-            label="Effect Size Thresholds",
-            info="Cohen's d boundaries: small,medium,large (Cohen's canonical values)"
-        )
-    return alternative, alpha, effect_thresholds
-def create_t_test_interface():
-    """Enhanced Gradio interface for both Student's and Welch's t-tests."""
-    with gr.Blocks(title="T-Test Analysis", theme=gr.themes.Soft()) as demo:
-        gr.Markdown("""
-        # T-Test Statistical Analysis
-        **Purpose**: Compare the means of two independent groups to determine if they differ significantly.
-        - **Student's t-test**: Assumes equal variances between groups
-        - **Welch's t-test**: Does not assume equal variances (more robust, generally recommended)
-        """)
-        with gr.Tabs():
-            # Student's t-test tab
-            with gr.TabItem("Student's T-Test"):
-                gr.Markdown("**Assumes equal variances between groups**")
-                # Create input components
-                (student_input_method, student_file_section, student_text_section,
-                 student_file_upload, student_has_header, student_data_preview,
-                 student_group1_str, student_group2_str) = create_input_components()
-                # Create parameter components
-                student_alternative, student_alpha, student_effect_thresholds = create_parameter_components()
-                with gr.Row():
-                    student_run_button = gr.Button("Run Student's T-Test", variant="primary", scale=1)
-                    student_clear_button = gr.Button("Clear All", variant="secondary", scale=1)
-                student_output = gr.JSON(label="Statistical Test Results")
-                # Example data button
-                with gr.Row():
-                    gr.Markdown("### Quick Examples")
-                    student_example_button = gr.Button("Load Example Data", variant="outline")
-            # Welch's t-test tab
-            with gr.TabItem("Welch's T-Test"):
-                gr.Markdown("**Does not assume equal variances (more robust)**")
-                # Create input components
-                (welch_input_method, welch_file_section, welch_text_section,
-                 welch_file_upload, welch_has_header, welch_data_preview,
-                 welch_group1_str, welch_group2_str) = create_input_components()
-                # Create parameter components
-                welch_alternative, welch_alpha, welch_effect_thresholds = create_parameter_components()
-                with gr.Row():
-                    welch_run_button = gr.Button("Run Welch's T-Test", variant="primary", scale=1)
-                    welch_clear_button = gr.Button("Clear All", variant="secondary", scale=1)
-                welch_output = gr.JSON(label="Statistical Test Results")
-                # Example data button
-                with gr.Row():
-                    gr.Markdown("### Quick Examples")
-                    welch_example_button = gr.Button("Load Example Data", variant="outline")
-        # Shared state for loaded dataframes
-        student_loaded_dataframe = gr.State(value=None)
-        welch_loaded_dataframe = gr.State(value=None)
-        # Common functions for both tabs
-        def toggle_input_method(method):
-            if method == "File Upload":
-                return gr.update(visible=True), gr.update(visible=False)
-            else:
-                return gr.update(visible=False), gr.update(visible=True)
-        def run_student_test(method, loaded_df, g1_str, g2_str, alt, alph, thresh):
-            # Pass appropriate inputs based on selected method
-            if method == "File Upload":
-                return student_t_test(
-                    dataframe=loaded_df,
-                    group1_str=None,
-                    group2_str=None,
-                    alternative=alt,
-                    alpha=alph,
-                    effect_thresholds=thresh
-                )
-            else:
-                return student_t_test(
-                    dataframe=None,
-                    group1_str=g1_str,
-                    group2_str=g2_str,
-                    alternative=alt,
-                    alpha=alph,
-                    effect_thresholds=thresh
-                )
-        def run_welch_test(method, loaded_df, g1_str, g2_str, alt, alph, thresh):
-            # Pass appropriate inputs based on selected method
-            if method == "File Upload":
-                return welch_t_test(
-                    dataframe=loaded_df,
-                    group1_str=None,
-                    group2_str=None,
-                    alternative=alt,
-                    alpha=alph,
-                    effect_thresholds=thresh
-                )
-            else:
-                return welch_t_test(
-                    dataframe=None,
-                    group1_str=g1_str,
-                    group2_str=g2_str,
-                    alternative=alt,
-                    alpha=alph,
-                    effect_thresholds=thresh
-                )
-        def clear_all():
-            return (
-                "File Upload",  # input_method
-                None,           # loaded_dataframe
-                None,           # data_preview
-                "",             # group1_str
-                "",             # group2_str
-                "two-sided",    # alternative
-                0.05,           # alpha
-                "0.2,0.5,0.8",  # effect_thresholds
-                {}              # output
-            )
-        def load_example():
-            example_df = pd.DataFrame({
-                'Treatment': [85.2, 90.1, 78.5, 92.3, 88.7, 86.4, 89.2],
-                'Control': [88.1, 85.7, 91.2, 87.4, 89.3, 90.8, 86.9]
-            })
-            preview = example_df.head(10)
-            return "File Upload", example_df, preview, "", ""
-        # Student's t-test event handlers
-        student_input_method.change(
-            fn=toggle_input_method,
-            inputs=student_input_method,
-            outputs=[student_file_section, student_text_section]
-        )
-        student_file_upload.change(
-            fn=load_uploaded_file,
-            inputs=[student_file_upload, student_has_header],
-            outputs=[student_loaded_dataframe, student_data_preview]
-        )
-        student_has_header.change(
-            fn=load_uploaded_file,
-            inputs=[student_file_upload, student_has_header],
-            outputs=[student_loaded_dataframe, student_data_preview]
-        )
-        student_run_button.click(
-            fn=run_student_test,
-            inputs=[
-                student_input_method,
-                student_loaded_dataframe,
-                student_group1_str,
-                student_group2_str,
-                student_alternative,
-                student_alpha,
-                student_effect_thresholds
-            ],
-            outputs=student_output
-        )
-        student_clear_button.click(
-            fn=clear_all,
-            outputs=[
-                student_input_method, student_loaded_dataframe, student_data_preview,
-                student_group1_str, student_group2_str, student_alternative,
-                student_alpha, student_effect_thresholds, student_output
-            ]
-        )
-        student_example_button.click(
-            fn=load_example,
-            outputs=[student_input_method, student_loaded_dataframe, student_data_preview,
-                    student_group1_str, student_group2_str]
-        )
-        # Welch's t-test event handlers
-        welch_input_method.change(
-            fn=toggle_input_method,
-            inputs=welch_input_method,
-            outputs=[welch_file_section, welch_text_section]
-        )
-        welch_file_upload.change(
-            fn=load_uploaded_file,
-            inputs=[welch_file_upload, welch_has_header],
-            outputs=[welch_loaded_dataframe, welch_data_preview]
-        )
-        welch_has_header.change(
-            fn=load_uploaded_file,
-            inputs=[welch_file_upload, welch_has_header],
-            outputs=[welch_loaded_dataframe, welch_data_preview]
-        )
-        welch_run_button.click(
-            fn=run_welch_test,
             inputs=[
-                welch_input_method,
-                welch_loaded_dataframe,
-                welch_group1_str,
-                welch_group2_str,
-                welch_alternative,
-                welch_alpha,
-                welch_effect_thresholds
             ],
-            outputs=welch_output
-        )
-        welch_clear_button.click(
-            fn=clear_all,
-            outputs=[
-                welch_input_method, welch_loaded_dataframe, welch_data_preview,
-                welch_group1_str, welch_group2_str, welch_alternative,
-                welch_alpha, welch_effect_thresholds, welch_output
-            ]
-        )
-        welch_example_button.click(
-            fn=load_example,
-            outputs=[welch_input_method, welch_loaded_dataframe, welch_data_preview,
-                    welch_group1_str, welch_group2_str]
         )
-        gr.Markdown("""
-        ### Interpretation Guide
-        - **p-value**: Likelihood of result given the null hypothesis (default significance threshold is 0.05).
-        - **Cohen's d**: Measure of effect size (default effect thresholds are 0.2, 0.5 and 0.8 for small, medium and large effect sizes).
-        - **t-statistic**: Quantifies how many standard errors the mean difference is from zero.
-        - **Degrees of freedom**: Student's uses pooled df, Welch's uses adjusted df for unequal variances.
-        ### When to Use Which Test
-        - **Student's t-test**: Use when you can confidently assume equal variances between groups.
-        - **Welch's t-test**: Use when variances might be unequal, or as a safer default choice.
-        """)
-    return demo
-# Main execution
 if __name__ == "__main__":
-    demo = create_t_test_interface()
     demo.launch(mcp_server=True)

 import gradio as gr
 import numpy as np
 from scipy import stats
+from typing import List, Dict, Any, Union, Tuple
+import json
+def independent_t_test(group1: str, group2: str, equal_var: bool = True, alternative: str = "two-sided") -> Dict[str, Any]:
     """
+    Perform an independent samples t-test between two groups.
     Args:
+        group1 (str): Comma-separated values for group 1 (e.g., "1.2,2.3,3.4,2.1")
+        group2 (str): Comma-separated values for group 2 (e.g., "2.1,3.2,4.1,3.5")
+        equal_var (bool): If True, perform standard t-test assuming equal variances. If False, perform Welch's t-test
+        alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
     Returns:
+        dict: Test results including t-statistic, p-value, degrees of freedom, and interpretation
     """
     try:
+        # Parse input data
+        data1 = [float(x.strip()) for x in group1.split(',') if x.strip()]
+        data2 = [float(x.strip()) for x in group2.split(',') if x.strip()]
+        if len(data1) < 2 or len(data2) < 2:
+            return {"error": "Each group must have at least 2 observations"}
+        # Perform t-test
+        t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=equal_var, alternative=alternative)
         # Calculate descriptive statistics
         desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
         desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
+        # Degrees of freedom
+        if equal_var:
+            df = len(data1) + len(data2) - 2
+        else:
+            # Welch's formula for unequal variances
+            s1_sq, s2_sq = desc1["std"]**2, desc2["std"]**2
+            n1, n2 = desc1["n"], desc2["n"]
+            df = (s1_sq/n1 + s2_sq/n2)**2 / ((s1_sq/n1)**2/(n1-1) + (s2_sq/n2)**2/(n2-1))
+        # Effect size (Cohen's d)
+        pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / (len(data1)+len(data2)-2))
         cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std
+        # Interpretation
+        significance = "significant" if p_value < 0.05 else "not significant"
+        effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
         return {
+            "test_type": f"Independent t-test ({'equal variances' if equal_var else 'unequal variances'})",
+            "t_statistic": round(t_stat, 4),
+            "p_value": round(p_value, 6),
+            "degrees_of_freedom": round(df, 2),
+            "cohens_d": round(cohens_d, 4),
             "group1_stats": desc1,
             "group2_stats": desc2,
+            "result": f"The difference between groups is {significance} (p = {p_value:.6f})",
+            "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
+            "alternative_hypothesis": alternative
         }
     except Exception as e:
+        return {"error": f"Error performing t-test: {str(e)}"}
+def paired_t_test(before: str, after: str, alternative: str = "two-sided") -> Dict[str, Any]:
     """
+    Perform a paired samples t-test.
     Args:
+        before (str): Comma-separated values for before condition
+        after (str): Comma-separated values for after condition
+        alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
     Returns:
+        dict: Test results including t-statistic, p-value, and interpretation
     """
     try:
+        # Parse input data
+        data_before = [float(x.strip()) for x in before.split(',') if x.strip()]
+        data_after = [float(x.strip()) for x in after.split(',') if x.strip()]
+        if len(data_before) != len(data_after):
+            return {"error": "Before and after groups must have the same number of observations"}
+        if len(data_before) < 2:
+            return {"error": "Need at least 2 paired observations"}
+        # Perform paired t-test
+        t_stat, p_value = stats.ttest_rel(data_before, data_after, alternative=alternative)
+        # Calculate differences and descriptive statistics
+        differences = np.array(data_after) - np.array(data_before)
+        mean_diff = np.mean(differences)
+        std_diff = np.std(differences, ddof=1)
+        # Effect size (Cohen's d for paired samples)
+        cohens_d = mean_diff / std_diff
+        # Degrees of freedom
+        df = len(data_before) - 1
+        # Interpretation
+        significance = "significant" if p_value < 0.05 else "not significant"
+        effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
         return {
+            "test_type": "Paired t-test",
+            "t_statistic": round(t_stat, 4),
+            "p_value": round(p_value, 6),
             "degrees_of_freedom": df,
+            "mean_difference": round(mean_diff, 4),
+            "std_difference": round(std_diff, 4),
+            "cohens_d": round(cohens_d, 4),
+            "before_mean": round(np.mean(data_before), 4),
+            "after_mean": round(np.mean(data_after), 4),
+            "result": f"The paired difference is {significance} (p = {p_value:.6f})",
+            "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
+            "alternative_hypothesis": alternative
         }
     except Exception as e:
+        return {"error": f"Error performing paired t-test: {str(e)}"}
+def one_sample_t_test(sample: str, population_mean: float, alternative: str = "two-sided") -> Dict[str, Any]:
+    """
+    Perform a one-sample t-test against a population mean.
+    Args:
+        sample (str): Comma-separated sample values
+        population_mean (float): Hypothesized population mean
+        alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
+    Returns:
+        dict: Test results including t-statistic, p-value, and interpretation
+    """
     try:
+        # Parse input data
+        data = [float(x.strip()) for x in sample.split(',') if x.strip()]
+        if len(data) < 2:
+            return {"error": "Sample must have at least 2 observations"}
+        # Perform one-sample t-test
+        t_stat, p_value = stats.ttest_1samp(data, population_mean, alternative=alternative)
+        # Calculate descriptive statistics
+        sample_mean = np.mean(data)
+        sample_std = np.std(data, ddof=1)
+        sample_size = len(data)
+        # Effect size (Cohen's d)
+        cohens_d = (sample_mean - population_mean) / sample_std
+        # Degrees of freedom
+        df = sample_size - 1
+        # Interpretation
+        significance = "significant" if p_value < 0.05 else "not significant"
+        effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
+        return {
+            "test_type": "One-sample t-test",
+            "t_statistic": round(t_stat, 4),
+            "p_value": round(p_value, 6),
+            "degrees_of_freedom": df,
+            "sample_mean": round(sample_mean, 4),
+            "population_mean": population_mean,
+            "sample_std": round(sample_std, 4),
+            "sample_size": sample_size,
+            "cohens_d": round(cohens_d, 4),
+            "result": f"Sample mean differs {significance}ly from population mean (p = {p_value:.6f})",
+            "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
+            "alternative_hypothesis": alternative
+        }
     except Exception as e:
+        return {"error": f"Error performing one-sample t-test: {str(e)}"}
+def one_way_anova(*groups: str) -> Dict[str, Any]:
+    """
+    Perform a one-way ANOVA test.
+    Args:
+        *groups: Variable number of comma-separated group values (minimum 2 groups)
+    Returns:
+        dict: ANOVA results including F-statistic, p-value, and interpretation
+    """
+    try:
+        # Parse input data
+        parsed_groups = []
+        for i, group in enumerate(groups):
+            if not group.strip():
+                continue
+            data = [float(x.strip()) for x in group.split(',') if x.strip()]
+            if len(data) < 2:
+                return {"error": f"Group {i+1} must have at least 2 observations"}
+            parsed_groups.append(data)
+        if len(parsed_groups) < 2:
+            return {"error": "Need at least 2 groups for ANOVA"}
+        # Perform one-way ANOVA
+        f_stat, p_value = stats.f_oneway(*parsed_groups)
+        # Calculate descriptive statistics for each group
+        group_stats = []
+        overall_data = []
+        for i, group in enumerate(parsed_groups):
+            group_stats.append({
+                "group": i+1,
+                "n": len(group),
+                "mean": round(np.mean(group), 4),
+                "std": round(np.std(group, ddof=1), 4)
+            })
+            overall_data.extend(group)
+        # Calculate effect size (eta-squared)
+        # SS_between / SS_total
+        overall_mean = np.mean(overall_data)
+        ss_total = sum((x - overall_mean)**2 for x in overall_data)
+        ss_between = sum(len(group) * (np.mean(group) - overall_mean)**2 for group in parsed_groups)
+        eta_squared = ss_between / ss_total if ss_total > 0 else 0
+        # Degrees of freedom
+        df_between = len(parsed_groups) - 1
+        df_within = len(overall_data) - len(parsed_groups)
+        # Interpretation
+        significance = "significant" if p_value < 0.05 else "not significant"
+        effect_size_interp = "small" if eta_squared < 0.06 else "medium" if eta_squared < 0.14 else "large"
+        return {
+            "test_type": "One-way ANOVA",
+            "f_statistic": round(f_stat, 4),
+            "p_value": round(p_value, 6),
+            "df_between": df_between,
+            "df_within": df_within,
+            "eta_squared": round(eta_squared, 4),
+            "group_statistics": group_stats,
+            "result": f"Group differences are {significance} (p = {p_value:.6f})",
+            "effect_size": f"Effect size (η² = {eta_squared:.4f}) is {effect_size_interp}",
+            "note": "If significant, consider post-hoc tests to identify specific group differences"
+        }
+    except Exception as e:
+        return {"error": f"Error performing ANOVA: {str(e)}"}
+def chi_square_test(observed: str, expected: str = None) -> Dict[str, Any]:
+    """
+    Perform a chi-square goodness of fit test.
+    Args:
+        observed (str): Comma-separated observed frequencies
+        expected (str): Comma-separated expected frequencies (optional, defaults to equal distribution)
+    Returns:
+        dict: Chi-square test results
+    """
+    try:
+        # Parse observed frequencies
+        obs_data = [float(x.strip()) for x in observed.split(',') if x.strip()]
+        # Parse expected frequencies or create equal distribution
+        if expected and expected.strip():
+            exp_data = [float(x.strip()) for x in expected.split(',') if x.strip()]
+            if len(obs_data) != len(exp_data):
+                return {"error": "Observed and expected must have the same number of categories"}
+        else:
+            # Equal distribution
+            total = sum(obs_data)
+            exp_data = [total / len(obs_data)] * len(obs_data)
+        # Perform chi-square test
+        chi2_stat, p_value = stats.chisquare(obs_data, exp_data)
+        # Degrees of freedom
+        df = len(obs_data) - 1
+        # Effect size (Cramér's V for goodness of fit)
+        n = sum(obs_data)
+        cramers_v = np.sqrt(chi2_stat / (n * (len(obs_data) - 1)))
+        # Interpretation
+        significance = "significant" if p_value < 0.05 else "not significant"
+        effect_size_interp = "small" if cramers_v < 0.3 else "medium" if cramers_v < 0.5 else "large"
+        return {
+            "test_type": "Chi-square goodness of fit test",
+            "chi_square_statistic": round(chi2_stat, 4),
+            "p_value": round(p_value, 6),
+            "degrees_of_freedom": df,
+            "cramers_v": round(cramers_v, 4),
+            "observed_frequencies": obs_data,
+            "expected_frequencies": [round(x, 2) for x in exp_data],
+            "result": f"Observed frequencies differ {significance}ly from expected (p = {p_value:.6f})",
+            "effect_size": f"Effect size (Cramér's V = {cramers_v:.4f}) is {effect_size_interp}"
+        }
+    except Exception as e:
+        return {"error": f"Error performing chi-square test: {str(e)}"}
+def correlation_test(x_values: str, y_values: str, method: str = "pearson") -> Dict[str, Any]:
+    """
+    Perform correlation analysis between two variables.
+    Args:
+        x_values (str): Comma-separated X variable values
+        y_values (str): Comma-separated Y variable values
+        method (str): Correlation method - 'pearson', 'spearman', or 'kendall'
+    Returns:
+        dict: Correlation results including coefficient and p-value
+    """
+    try:
+        # Parse input data
+        x_data = [float(x.strip()) for x in x_values.split(',') if x.strip()]
+        y_data = [float(y.strip()) for y in y_values.split(',') if y.strip()]
+        if len(x_data) != len(y_data):
+            return {"error": "X and Y variables must have the same number of observations"}
+        if len(x_data) < 3:
+            return {"error": "Need at least 3 observations for correlation"}
+        # Perform correlation test
+        if method.lower() == "pearson":
+            corr_coef, p_value = stats.pearsonr(x_data, y_data)
+            test_name = "Pearson correlation"
+        elif method.lower() == "spearman":
+            corr_coef, p_value = stats.spearmanr(x_data, y_data)
+            test_name = "Spearman rank correlation"
+        elif method.lower() == "kendall":
+            corr_coef, p_value = stats.kendalltau(x_data, y_data)
+            test_name = "Kendall's tau correlation"
+        else:
+            return {"error": "Method must be 'pearson', 'spearman', or 'kendall'"}
+        # Interpretation
+        significance = "significant" if p_value < 0.05 else "not significant"
+        # Correlation strength interpretation
+        abs_corr = abs(corr_coef)
+        if abs_corr < 0.3:
+            strength = "weak"
+        elif abs_corr < 0.7:
+            strength = "moderate"
+        else:
+            strength = "strong"
+        direction = "positive" if corr_coef > 0 else "negative"
+        return {
+            "test_type": test_name,
+            "correlation_coefficient": round(corr_coef, 4),
+            "p_value": round(p_value, 6),
+            "sample_size": len(x_data),
+            "result": f"The correlation is {significance} (p = {p_value:.6f})",
+            "interpretation": f"{strength.title()} {direction} correlation (r = {corr_coef:.4f})",
+            "method": method.lower()
+        }
+    except Exception as e:
+        return {"error": f"Error performing correlation test: {str(e)}"}
+# Create Gradio interfaces for each function
+demo = gr.TabbedInterface(
+    [
+        gr.Interface(
+            fn=independent_t_test,
             inputs=[
+                gr.Textbox(placeholder="1.2,2.3,3.4,2.1", label="Group 1 (comma-separated)"),
+                gr.Textbox(placeholder="2.1,3.2,4.1,3.5", label="Group 2 (comma-separated)"),
+                gr.Checkbox(value=True, label="Equal variances"),
+                gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
             ],
+            outputs=gr.JSON(),
+            title="Independent T-Test",
+            description="Compare means between two independent groups"
+        ),
+        gr.Interface(
+            fn=paired_t_test,
+            inputs=[
+                gr.Textbox(placeholder="10,12,11,13", label="Before (comma-separated)"),
+                gr.Textbox(placeholder="12,14,13,15", label="After (comma-separated)"),
+                gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
+            ],
+            outputs=gr.JSON(),
+            title="Paired T-Test",
+            description="Compare paired/matched samples"
+        ),
+        gr.Interface(
+            fn=one_sample_t_test,
+            inputs=[
+                gr.Textbox(placeholder="10,12,11,13,9", label="Sample (comma-separated)"),
+                gr.Number(value=10, label="Population mean"),
+                gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
+            ],
+            outputs=gr.JSON(),
+            title="One-Sample T-Test",
+            description="Test sample mean against population mean"
+        ),
+        gr.Interface(
+            fn=one_way_anova,
+            inputs=[
+                gr.Textbox(placeholder="1,2,3,2", label="Group 1 (comma-separated)"),
+                gr.Textbox(placeholder="4,5,6,5", label="Group 2 (comma-separated)"),
+                gr.Textbox(placeholder="7,8,9,8", label="Group 3 (comma-separated)", info="Optional"),
+                gr.Textbox(placeholder="", label="Group 4 (comma-separated)", info="Optional"),
+                gr.Textbox(placeholder="", label="Group 5 (comma-separated)", info="Optional")
+            ],
+            outputs=gr.JSON(),
+            title="One-Way ANOVA",
+            description="Compare means across multiple groups"
+        ),
+        gr.Interface(
+            fn=chi_square_test,
+            inputs=[
+                gr.Textbox(placeholder="10,20,15,25", label="Observed frequencies (comma-separated)"),
+                gr.Textbox(placeholder="", label="Expected frequencies (optional, comma-separated)")
+            ],
+            outputs=gr.JSON(),
+            title="Chi-Square Test",
+            description="Test goodness of fit for categorical data"
+        ),
+        gr.Interface(
+            fn=correlation_test,
+            inputs=[
+                gr.Textbox(placeholder="1,2,3,4,5", label="X values (comma-separated)"),
+                gr.Textbox(placeholder="2,4,6,8,10", label="Y values (comma-separated)"),
+                gr.Dropdown(["pearson", "spearman", "kendall"], value="pearson", label="Correlation method")
+            ],
+            outputs=gr.JSON(),
+            title="Correlation Analysis",
+            description="Test correlation between two variables"
         )
+    ],
+    tab_names=["Independent T-Test", "Paired T-Test", "One-Sample T-Test", "ANOVA", "Chi-Square", "Correlation"]
+)
 if __name__ == "__main__":
     demo.launch(mcp_server=True)