JG1310 commited on
Commit
18a47c7
·
verified ·
1 Parent(s): a5fd229

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +366 -676
app.py CHANGED
@@ -1,749 +1,439 @@
1
  import gradio as gr
2
  import numpy as np
3
- import pandas as pd
4
  from scipy import stats
5
- from typing import List, Dict, Any, Optional, Union
 
6
 
7
- def parse_numeric_input(data: str) -> List[float]:
8
  """
9
- Parse comma-separated string of numbers into a list of floats.
10
 
11
  Args:
12
- data (str): Comma-separated string of numbers (e.g., "1.2,2.3,3.4,2.1")
13
-
14
- Returns:
15
- List[float]: Parsed numeric data
16
-
17
- Raises:
18
- ValueError: If data cannot be parsed as numeric values
19
-
20
- Example:
21
- >>> parse_numeric_input("85.2,90.1,78.5,92.3")
22
- [85.2, 90.1, 78.5, 92.3]
23
- """
24
- try:
25
- parsed = [float(x.strip()) for x in data.split(',') if x.strip()]
26
- if not parsed:
27
- raise ValueError("No valid numbers found in input string")
28
- return parsed
29
- except ValueError as e:
30
- if "could not convert" in str(e):
31
- raise ValueError(f"Cannot parse '{data}' as comma-separated numbers")
32
- raise e
33
-
34
- def welch_t_test(
35
- dataframe: Optional[pd.DataFrame] = None,
36
- group1_str: Optional[str] = None,
37
- group2_str: Optional[str] = None,
38
- alternative: str = "two-sided",
39
- alpha: float = 0.05,
40
- effect_thresholds: str = "0.2,0.5,0.8"
41
- ) -> Dict[str, Any]:
42
- """
43
- Welch's t-test supporting both DataFrame and string inputs for maximum compatibility.
44
-
45
- Welch's t-test determines if there is a statistically significant difference between
46
- the means of group1 and group2. Unlike Student's t-test, this does NOT assume equal
47
- variances between groups, making it more robust and generally recommended for most situations.
48
-
49
- WHEN TO USE: Compare average scores between two independent groups when you cannot assume
50
- equal variances, or as a safer default choice. Preferred over Student's t-test in most cases.
51
-
52
- Args:
53
- dataframe (Optional[pd.DataFrame]): DataFrame containing group data in first two columns.
54
- If provided, group1_str and group2_str will be ignored.
55
- group1_str (Optional[str]): Comma-separated string of numeric values for the first group.
56
- Example: "12.1,15.3,18.7,14.2,16.8" (reaction times for Group A)
57
- Only used if dataframe is None or empty.
58
- group2_str (Optional[str]): Comma-separated string of numeric values for the second group.
59
- Example: "22.4,19.8,25.1,21.3" (reaction times for Group B)
60
- Only used if dataframe is None or empty.
61
- alternative (str): Direction of the alternative hypothesis:
62
- - "two-sided": group1 mean ≠ group2 mean (different in either direction)
63
- - "less": group1 mean < group2 mean (group1 is smaller)
64
- - "greater": group1 mean > group2 mean (group1 is larger)
65
- alpha (float): Significance level for the test (probability of Type I error).
66
- Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
67
- effect_thresholds (str): Three comma-separated values defining Cohen's d effect size boundaries.
68
- Format: "small_threshold,medium_threshold,large_threshold"
69
- Default "0.2,0.5,0.8" means: <0.2=negligible, 0.2-0.5=small, 0.5-0.8=medium, >0.8=large
70
 
71
  Returns:
72
- dict: Comprehensive test results with the following keys:
73
- - test_type (str): Always "Welch's t-test (unequal variances)"
74
- - t_statistic (float): The calculated t-value using Welch's formula
75
- - p_value (float): Probability of observing this result if null hypothesis is true
76
- - degrees_of_freedom (float): Welch's adjusted df (usually non-integer), accounts for unequal variances
77
- - cohens_d (float): Standardized effect size. Positive means group1 > group2, negative means group1 < group2
78
- - pooled_std (float): Pooled standard deviation used in effect size calculation
79
- - group1_stats (dict): Descriptive statistics for group1 (mean, std, n)
80
- - group2_stats (dict): Descriptive statistics for group2 (mean, std, n)
81
- - significant (bool): True if p_value < alpha
82
- - effect_size (str): Categorical interpretation of Cohen's d magnitude
83
- - alternative_hypothesis (str): Echo of alternative parameter
84
- - alpha (float): Echo of significance level used
85
- - effect_thresholds (List[float]): Echo of effect size thresholds used
86
- - input_method (str): "dataframe" or "strings" - indicates which input method was used
87
  """
88
  try:
89
- # Parse effect size thresholds
90
- try:
91
- thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
92
- if len(thresholds) != 3:
93
- return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
94
- except:
95
- return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.2,0.5,0.8')"}
96
-
97
- # Method 1: DataFrame input (preferred for LLMs and data pipelines)
98
- if dataframe is not None and not dataframe.empty:
99
- # Use first two columns automatically
100
- if len(dataframe.columns) < 2:
101
- return {"error": f"DataFrame must have at least 2 columns. Found {len(dataframe.columns)} columns."}
102
-
103
- # Extract and validate data from first two columns
104
- try:
105
- # Convert to numeric, coercing errors to NaN
106
- col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
107
- col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')
108
-
109
- # Remove NaN values and convert to list
110
- group1 = col1_numeric.dropna().tolist()
111
- group2 = col2_numeric.dropna().tolist()
112
-
113
- # Check if we lost too much data due to non-numeric values
114
- original_count1 = len(dataframe.iloc[:, 0].dropna())
115
- original_count2 = len(dataframe.iloc[:, 1].dropna())
116
-
117
- if len(group1) < original_count1 * 0.5: # Lost more than 50% of data
118
- return {"error": f"Column 1 contains too many non-numeric values. Only {len(group1)} out of {original_count1} values could be converted to numbers."}
119
-
120
- if len(group2) < original_count2 * 0.5: # Lost more than 50% of data
121
- return {"error": f"Column 2 contains too many non-numeric values. Only {len(group2)} out of {original_count2} values could be converted to numbers."}
122
-
123
- input_method = "dataframe"
124
-
125
- except Exception as e:
126
- return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}
127
-
128
- # Method 2: String input (preferred for humans and simple use cases)
129
- elif group1_str and group2_str and group1_str.strip() and group2_str.strip():
130
- try:
131
- group1 = parse_numeric_input(group1_str)
132
- group2 = parse_numeric_input(group2_str)
133
- input_method = "strings"
134
- except ValueError as e:
135
- return {"error": f"String parsing error: {str(e)}"}
136
-
137
- else:
138
- return {"error": "Please provide either a DataFrame with data OR comma-separated strings for both groups. Do not leave inputs empty."}
139
-
140
- # Validate extracted data
141
- if len(group1) < 2:
142
- return {"error": f"Group 1 must have at least 2 observations. Found {len(group1)} values."}
143
 
144
- if len(group2) < 2:
145
- return {"error": f"Group 2 must have at least 2 observations. Found {len(group2)} values."}
146
 
147
- # Perform Welch's t-test analysis
148
- # Convert to numpy arrays for calculations
149
- data1 = np.array(group1)
150
- data2 = np.array(group2)
151
-
152
- # Perform Welch's t-test (unequal variances)
153
- t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=False, alternative=alternative)
154
 
155
  # Calculate descriptive statistics
156
  desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
157
  desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
158
 
159
- # Welch's degrees of freedom formula
160
- s1_sq, s2_sq = desc1["std"]**2, desc2["std"]**2
161
- n1, n2 = desc1["n"], desc2["n"]
162
- df = (s1_sq/n1 + s2_sq/n2)**2 / ((s1_sq/n1)**2/(n1-1) + (s2_sq/n2)**2/(n2-1))
 
 
 
 
163
 
164
- # Effect size (Cohen's d using pooled standard deviation for consistency)
165
- # For Welch's test, we still typically use pooled SD for Cohen's d calculation
166
- pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / (len(data1) + len(data2) - 2))
167
  cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std
168
 
169
- # Interpretation using Cohen's canonical benchmarks
170
- significant = p_value < alpha
171
- abs_d = abs(cohens_d)
172
- small_threshold, medium_threshold, large_threshold = thresholds
173
- if abs_d < small_threshold:
174
- effect_size_interp = "negligible"
175
- elif abs_d < medium_threshold:
176
- effect_size_interp = "small"
177
- elif abs_d < large_threshold:
178
- effect_size_interp = "medium"
179
- else:
180
- effect_size_interp = "large"
181
 
182
  return {
183
- "test_type": "Welch's t-test (unequal variances)",
184
- "t_statistic": t_stat,
185
- "p_value": p_value,
186
- "degrees_of_freedom": df,
187
- "cohens_d": cohens_d,
188
- "pooled_std": pooled_std,
189
  "group1_stats": desc1,
190
  "group2_stats": desc2,
191
- "significant": significant,
192
- "effect_size": effect_size_interp,
193
- "alternative_hypothesis": alternative,
194
- "alpha": alpha,
195
- "effect_thresholds": thresholds,
196
- "input_method": input_method
197
  }
198
-
199
  except Exception as e:
200
- return {"error": f"Unexpected error in Welch's t-test: {str(e)}"}
201
 
202
- def student_t_test(
203
- dataframe: Optional[pd.DataFrame] = None,
204
- group1_str: Optional[str] = None,
205
- group2_str: Optional[str] = None,
206
- alternative: str = "two-sided",
207
- alpha: float = 0.05,
208
- effect_thresholds: str = "0.2,0.5,0.8"
209
- ) -> Dict[str, Any]:
210
  """
211
- Student's t-test supporting both DataFrame and string inputs for maximum compatibility.
212
-
213
- Student's t-test is used to determine if there is a statistically significant difference between the means of two sets of sampled numbers, group1 and group2.
214
- This test produces a key statistic known as the t_statistic. Depending on the 'alternative hypothesis' considered (e.g. group1 mean < group2 mean or simply
215
- group1 mean ≠ group2 mean), the test quantifies the probability of observing the result (or more extreme) given the 'null hypothesis' is true (i.e. no difference exists)
216
- as p_value. If the p_value falls below the threshold alpha, then the result is considered statistically significant, meaning we reject the null hypothesis in
217
- favor of the alternative. cohens_d measures effect size, the practical magnitude of the difference between the means of group1 and group2, standardized by pooled standard
218
- deviation. It can be interpreted with the help of effect_thresholds. This test assumes both groups have equal variances and normal distributions. Use Welch's t-test if variances are unequal.
219
-
220
- You should supply either a dataframe with the first 2 columns containing sample data (ideal for large datasets or data pipelines), or strings (group1 and group2) containing
221
- comma-delimited lists of sampled data (ideal for small, simple data sets).
222
-
223
- WHEN TO USE: Compare average scores between two independent groups (e.g., treatment vs control,
224
- before vs after with different participants, male vs female performance)
225
 
226
  Args:
227
- dataframe (Optional[pd.DataFrame]): DataFrame containing group data in first two columns.
228
- If provided, group1_str and group2_str will be ignored.
229
- group1_str (Optional[str]): Comma-separated string of numeric values for the first group.
230
- Example: "85.2,90.1,78.5,92.3" (test scores for Group A)
231
- Only used if dataframe is None or empty.
232
- group2_str (Optional[str]): Comma-separated string of numeric values for the second group.
233
- Example: "88.1,85.7,91.2,87.4" (test scores for Group B)
234
- Only used if dataframe is None or empty.
235
- alternative (str): Direction of the alternative hypothesis:
236
- - "two-sided": group1 mean ≠ group2 mean (different in either direction)
237
- - "less": group1 mean < group2 mean (group1 is smaller)
238
- - "greater": group1 mean > group2 mean (group1 is larger)
239
- alpha (float): Significance level for the test (probability of Type I error).
240
- Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
241
- effect_thresholds (str): Three comma-separated values defining Cohen's d effect size boundaries.
242
- Format: "small_threshold,medium_threshold,large_threshold"
243
- Default "0.2,0.5,0.8" means: <0.2=negligible, 0.2-0.5=small, 0.5-0.8=medium, >0.8=large
244
- These are Cohen's canonical benchmarks for effect size interpretation.
245
 
246
  Returns:
247
- dict: Comprehensive test results with the following keys:
248
- - test_type (str): Always "Student's t-test"
249
- - t_statistic (float): The calculated t-value, which measures how many standard errors the difference
250
- between group means is away from zero (assuming the null hypothesis is true).
251
- Larger absolute values indicate the observed difference is less likely under the null hypothesis.
252
- - p_value (float): Probability of observing this result (or more extreme) if null hypothesis is true.
253
- Values < alpha indicate statistical significance.
254
- - degrees_of_freedom (int): df = n1 + n2 - 2, degrees of freedom for the pooled variance estimate, used for determining critical t-values.
255
- - cohens_d (float): Effect size measure. Positive means group1 > group2, negative means group1 < group2.
256
- Interpreted using Cohen's canonical benchmarks: negligible (<0.2), small (0.2), medium (0.5), large (0.8).
257
- - pooled_std (float): Combined standard deviation used in Cohen's d calculation.
258
- - group1_stats (dict): Descriptive statistics for group1 (mean, std, n)
259
- - group2_stats (dict): Descriptive statistics for group2 (mean, std, n)
260
- - significant (bool): True if p_value < alpha, False otherwise
261
- - effect_size (str): Categorical interpretation ("negligible", "small", "medium", "large") based on |cohens_d| and effect_thresholds
262
- - alternative_hypothesis (str): Echo of the alternative parameter used
263
- - alpha (float): Echo of the significance level used
264
- - effect_thresholds (List[float]): Echo of the thresholds used
265
- - input_method (str): "dataframe" or "strings" - indicates which input method was used
266
  """
267
  try:
268
- # Parse effect size thresholds
269
- try:
270
- thresholds = [float(x.strip()) for x in effect_thresholds.split(',')]
271
- if len(thresholds) != 3:
272
- return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
273
- except:
274
- return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.2,0.5,0.8')"}
275
-
276
- # Method 1: DataFrame input (preferred for LLMs and data pipelines)
277
- if dataframe is not None and not dataframe.empty:
278
- # Use first two columns automatically
279
- if len(dataframe.columns) < 2:
280
- return {"error": f"DataFrame must have at least 2 columns. Found {len(dataframe.columns)} columns."}
281
-
282
- # Extract and validate data from first two columns
283
- try:
284
- # Convert to numeric, coercing errors to NaN
285
- col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
286
- col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')
287
-
288
- # Remove NaN values and convert to list
289
- group1 = col1_numeric.dropna().tolist()
290
- group2 = col2_numeric.dropna().tolist()
291
-
292
- # Check if we lost too much data due to non-numeric values
293
- original_count1 = len(dataframe.iloc[:, 0].dropna())
294
- original_count2 = len(dataframe.iloc[:, 1].dropna())
295
-
296
- if len(group1) < original_count1 * 0.5: # Lost more than 50% of data
297
- return {"error": f"Column 1 contains too many non-numeric values. Only {len(group1)} out of {original_count1} values could be converted to numbers."}
298
-
299
- if len(group2) < original_count2 * 0.5: # Lost more than 50% of data
300
- return {"error": f"Column 2 contains too many non-numeric values. Only {len(group2)} out of {original_count2} values could be converted to numbers."}
301
-
302
- input_method = "dataframe"
303
-
304
- except Exception as e:
305
- return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}
306
-
307
- # Method 2: String input (preferred for humans and simple use cases)
308
- elif group1_str and group2_str and group1_str.strip() and group2_str.strip():
309
- try:
310
- group1 = parse_numeric_input(group1_str)
311
- group2 = parse_numeric_input(group2_str)
312
- input_method = "strings"
313
- except ValueError as e:
314
- return {"error": f"String parsing error: {str(e)}"}
315
-
316
- else:
317
- return {"error": "Please provide either a DataFrame with data OR comma-separated strings for both groups. Do not leave inputs empty."}
318
 
319
- # Validate extracted data
320
- if len(group1) < 2:
321
- return {"error": f"Group 1 must have at least 2 observations. Found {len(group1)} values."}
322
 
323
- if len(group2) < 2:
324
- return {"error": f"Group 2 must have at least 2 observations. Found {len(group2)} values."}
325
 
326
- # Perform Student's t-test analysis directly
327
- # Convert to numpy arrays for calculations
328
- data1 = np.array(group1)
329
- data2 = np.array(group2)
330
 
331
- # Perform Student's t-test (equal variances)
332
- t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=True, alternative=alternative)
 
 
333
 
334
- # Calculate descriptive statistics
335
- desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
336
- desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
337
 
338
- # Degrees of freedom (pooled)
339
- df = len(data1) + len(data2) - 2
340
 
341
- # Effect size (Cohen's d using pooled standard deviation)
342
- pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / df)
343
- cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std
344
-
345
- # Interpretation using Cohen's canonical benchmarks
346
- significant = p_value < alpha
347
- abs_d = abs(cohens_d)
348
- small_threshold, medium_threshold, large_threshold = thresholds
349
- if abs_d < small_threshold:
350
- effect_size_interp = "negligible"
351
- elif abs_d < medium_threshold:
352
- effect_size_interp = "small"
353
- elif abs_d < large_threshold:
354
- effect_size_interp = "medium"
355
- else:
356
- effect_size_interp = "large"
357
 
358
  return {
359
- "test_type": "Student's t-test",
360
- "t_statistic": t_stat,
361
- "p_value": p_value,
362
  "degrees_of_freedom": df,
363
- "cohens_d": cohens_d,
364
- "pooled_std": pooled_std,
365
- "group1_stats": desc1,
366
- "group2_stats": desc2,
367
- "significant": significant,
368
- "effect_size": effect_size_interp,
369
- "alternative_hypothesis": alternative,
370
- "alpha": alpha,
371
- "effect_thresholds": thresholds,
372
- "input_method": input_method
373
  }
374
-
375
  except Exception as e:
376
- return {"error": f"Unexpected error in flexible t-test: {str(e)}"}
377
 
378
- def load_uploaded_file(file_path, has_header_flag):
379
- """Shared function to load uploaded files and return both the DataFrame and preview."""
380
- if file_path is None:
381
- return None, None
 
 
 
 
382
 
 
 
 
383
  try:
384
- # Determine header parameter for pandas
385
- header_param = 0 if has_header_flag else None
386
 
387
- if file_path.endswith('.csv'):
388
- df = pd.read_csv(file_path, header=header_param)
389
- elif file_path.endswith(('.xlsx', '.xls')):
390
- df = pd.read_excel(file_path, header=header_param)
391
- else:
392
- return None, pd.DataFrame({'Error': ['Unsupported file format']})
393
-
394
- # Take only first two columns
395
- if len(df.columns) >= 2:
396
- df_subset = df.iloc[:, :2].copy()
397
-
398
- # Set column names based on whether headers were detected
399
- if has_header_flag and not str(df_subset.columns[0]).startswith('Unnamed'):
400
- # Keep original column names if they exist and aren't auto-generated
401
- df_subset.columns = [str(df_subset.columns[0]), str(df_subset.columns[1])]
402
- else:
403
- # Use default names
404
- df_subset.columns = ['Group1', 'Group2']
405
-
406
- # Convert columns to numeric, replacing non-numeric with NaN
407
- df_subset.iloc[:, 0] = pd.to_numeric(df_subset.iloc[:, 0], errors='coerce')
408
- df_subset.iloc[:, 1] = pd.to_numeric(df_subset.iloc[:, 1], errors='coerce')
409
-
410
- # Remove rows where both values are NaN
411
- df_subset = df_subset.dropna(how='all')
412
-
413
- # Return full dataframe for processing and preview for display
414
- preview = df_subset.head(10) # Show first 10 rows
415
- return df_subset, preview
416
- else:
417
- error_df = pd.DataFrame({'Error': ['File must have at least 2 columns']})
418
- return None, error_df
 
 
 
419
  except Exception as e:
420
- error_df = pd.DataFrame({'Error': [f"Failed to load file: {str(e)}"]})
421
- return None, error_df
422
 
423
- def create_input_components():
424
- """Create reusable input components for both test tabs."""
425
- # Input method selector
426
- input_method = gr.Radio(
427
- choices=["File Upload", "Text Input"],
428
- value="File Upload",
429
- label="Choose Input Method",
430
- info="Select how you want to provide your data"
431
- )
432
-
433
- # File upload input section
434
- with gr.Group(visible=True) as file_section:
435
- gr.Markdown("### File Upload")
436
- gr.Markdown("*Upload CSV or Excel file - first two columns will be used as Group 1 and Group 2*")
437
-
438
- with gr.Row():
439
- file_upload = gr.File(
440
- label="Upload CSV/Excel File",
441
- file_types=[".csv", ".xlsx", ".xls"],
442
- type="filepath"
443
- )
444
- has_header = gr.Checkbox(
445
- label="File has header row",
446
- value=True,
447
- info="Check if first row contains column names"
448
- )
449
-
450
- # Display loaded data preview
451
- data_preview = gr.Dataframe(
452
- label="Data Preview (first two columns)",
453
- interactive=False,
454
- row_count=5
455
- )
456
 
457
- # Text input section
458
- with gr.Group(visible=False) as text_section:
459
- gr.Markdown("### Text Input")
460
- gr.Markdown("*Enter comma-separated numbers for each group*")
461
-
462
- group1_str = gr.Textbox(
463
- placeholder="85.2,90.1,78.5,92.3,88.7",
464
- label="Group 1 Data",
465
- info="Comma-separated numbers (e.g., test scores for condition A)"
466
- )
467
- group2_str = gr.Textbox(
468
- placeholder="88.1,85.7,91.2,87.4,89.3",
469
- label="Group 2 Data",
470
- info="Comma-separated numbers (e.g., test scores for condition B)"
471
- )
472
 
473
- return input_method, file_section, text_section, file_upload, has_header, data_preview, group1_str, group2_str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
 
475
- def create_parameter_components():
476
- """Create reusable parameter components for both test tabs."""
477
- gr.Markdown("### Test Parameters")
478
- with gr.Row():
479
- alternative = gr.Dropdown(
480
- choices=["two-sided", "less", "greater"],
481
- value="two-sided",
482
- label="Alternative Hypothesis",
483
- info="two-sided: groups differ; less: group1 < group2; greater: group1 > group2"
484
- )
485
- alpha = gr.Number(
486
- value=0.05,
487
- minimum=0,
488
- maximum=1,
489
- step=0.01,
490
- label="Significance Level (α)",
491
- info="Probability threshold for statistical significance (typically 0.05)"
492
- )
493
- effect_thresholds = gr.Textbox(
494
- value="0.2,0.5,0.8",
495
- label="Effect Size Thresholds",
496
- info="Cohen's d boundaries: small,medium,large (Cohen's canonical values)"
497
- )
498
 
499
- return alternative, alpha, effect_thresholds
500
-
501
- def create_t_test_interface():
502
- """Enhanced Gradio interface for both Student's and Welch's t-tests."""
503
 
504
- with gr.Blocks(title="T-Test Analysis", theme=gr.themes.Soft()) as demo:
505
-
506
- gr.Markdown("""
507
- # T-Test Statistical Analysis
508
-
509
- **Purpose**: Compare the means of two independent groups to determine if they differ significantly.
510
-
511
- - **Student's t-test**: Assumes equal variances between groups
512
- - **Welch's t-test**: Does not assume equal variances (more robust, generally recommended)
513
- """)
514
-
515
- with gr.Tabs():
516
- # Student's t-test tab
517
- with gr.TabItem("Student's T-Test"):
518
- gr.Markdown("**Assumes equal variances between groups**")
519
-
520
- # Create input components
521
- (student_input_method, student_file_section, student_text_section,
522
- student_file_upload, student_has_header, student_data_preview,
523
- student_group1_str, student_group2_str) = create_input_components()
524
-
525
- # Create parameter components
526
- student_alternative, student_alpha, student_effect_thresholds = create_parameter_components()
527
-
528
- with gr.Row():
529
- student_run_button = gr.Button("Run Student's T-Test", variant="primary", scale=1)
530
- student_clear_button = gr.Button("Clear All", variant="secondary", scale=1)
531
-
532
- student_output = gr.JSON(label="Statistical Test Results")
533
-
534
- # Example data button
535
- with gr.Row():
536
- gr.Markdown("### Quick Examples")
537
- student_example_button = gr.Button("Load Example Data", variant="outline")
538
-
539
- # Welch's t-test tab
540
- with gr.TabItem("Welch's T-Test"):
541
- gr.Markdown("**Does not assume equal variances (more robust)**")
542
-
543
- # Create input components
544
- (welch_input_method, welch_file_section, welch_text_section,
545
- welch_file_upload, welch_has_header, welch_data_preview,
546
- welch_group1_str, welch_group2_str) = create_input_components()
547
-
548
- # Create parameter components
549
- welch_alternative, welch_alpha, welch_effect_thresholds = create_parameter_components()
550
-
551
- with gr.Row():
552
- welch_run_button = gr.Button("Run Welch's T-Test", variant="primary", scale=1)
553
- welch_clear_button = gr.Button("Clear All", variant="secondary", scale=1)
554
-
555
- welch_output = gr.JSON(label="Statistical Test Results")
556
-
557
- # Example data button
558
- with gr.Row():
559
- gr.Markdown("### Quick Examples")
560
- welch_example_button = gr.Button("Load Example Data", variant="outline")
561
-
562
- # Shared state for loaded dataframes
563
- student_loaded_dataframe = gr.State(value=None)
564
- welch_loaded_dataframe = gr.State(value=None)
565
-
566
- # Common functions for both tabs
567
- def toggle_input_method(method):
568
- if method == "File Upload":
569
- return gr.update(visible=True), gr.update(visible=False)
570
- else:
571
- return gr.update(visible=False), gr.update(visible=True)
572
-
573
- def run_student_test(method, loaded_df, g1_str, g2_str, alt, alph, thresh):
574
- # Pass appropriate inputs based on selected method
575
- if method == "File Upload":
576
- return student_t_test(
577
- dataframe=loaded_df,
578
- group1_str=None,
579
- group2_str=None,
580
- alternative=alt,
581
- alpha=alph,
582
- effect_thresholds=thresh
583
- )
584
- else:
585
- return student_t_test(
586
- dataframe=None,
587
- group1_str=g1_str,
588
- group2_str=g2_str,
589
- alternative=alt,
590
- alpha=alph,
591
- effect_thresholds=thresh
592
- )
593
-
594
- def run_welch_test(method, loaded_df, g1_str, g2_str, alt, alph, thresh):
595
- # Pass appropriate inputs based on selected method
596
- if method == "File Upload":
597
- return welch_t_test(
598
- dataframe=loaded_df,
599
- group1_str=None,
600
- group2_str=None,
601
- alternative=alt,
602
- alpha=alph,
603
- effect_thresholds=thresh
604
- )
605
- else:
606
- return welch_t_test(
607
- dataframe=None,
608
- group1_str=g1_str,
609
- group2_str=g2_str,
610
- alternative=alt,
611
- alpha=alph,
612
- effect_thresholds=thresh
613
- )
614
-
615
- def clear_all():
616
- return (
617
- "File Upload", # input_method
618
- None, # loaded_dataframe
619
- None, # data_preview
620
- "", # group1_str
621
- "", # group2_str
622
- "two-sided", # alternative
623
- 0.05, # alpha
624
- "0.2,0.5,0.8", # effect_thresholds
625
- {} # output
626
- )
627
-
628
- def load_example():
629
- example_df = pd.DataFrame({
630
- 'Treatment': [85.2, 90.1, 78.5, 92.3, 88.7, 86.4, 89.2],
631
- 'Control': [88.1, 85.7, 91.2, 87.4, 89.3, 90.8, 86.9]
632
- })
633
- preview = example_df.head(10)
634
- return "File Upload", example_df, preview, "", ""
635
-
636
- # Student's t-test event handlers
637
- student_input_method.change(
638
- fn=toggle_input_method,
639
- inputs=student_input_method,
640
- outputs=[student_file_section, student_text_section]
641
- )
642
 
643
- student_file_upload.change(
644
- fn=load_uploaded_file,
645
- inputs=[student_file_upload, student_has_header],
646
- outputs=[student_loaded_dataframe, student_data_preview]
647
- )
648
 
649
- student_has_header.change(
650
- fn=load_uploaded_file,
651
- inputs=[student_file_upload, student_has_header],
652
- outputs=[student_loaded_dataframe, student_data_preview]
653
- )
654
 
655
- student_run_button.click(
656
- fn=run_student_test,
657
- inputs=[
658
- student_input_method,
659
- student_loaded_dataframe,
660
- student_group1_str,
661
- student_group2_str,
662
- student_alternative,
663
- student_alpha,
664
- student_effect_thresholds
665
- ],
666
- outputs=student_output
667
- )
668
 
669
- student_clear_button.click(
670
- fn=clear_all,
671
- outputs=[
672
- student_input_method, student_loaded_dataframe, student_data_preview,
673
- student_group1_str, student_group2_str, student_alternative,
674
- student_alpha, student_effect_thresholds, student_output
675
- ]
676
- )
677
 
678
- student_example_button.click(
679
- fn=load_example,
680
- outputs=[student_input_method, student_loaded_dataframe, student_data_preview,
681
- student_group1_str, student_group2_str]
682
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
683
 
684
- # Welch's t-test event handlers
685
- welch_input_method.change(
686
- fn=toggle_input_method,
687
- inputs=welch_input_method,
688
- outputs=[welch_file_section, welch_text_section]
689
- )
690
 
691
- welch_file_upload.change(
692
- fn=load_uploaded_file,
693
- inputs=[welch_file_upload, welch_has_header],
694
- outputs=[welch_loaded_dataframe, welch_data_preview]
695
- )
 
 
 
696
 
697
- welch_has_header.change(
698
- fn=load_uploaded_file,
699
- inputs=[welch_file_upload, welch_has_header],
700
- outputs=[welch_loaded_dataframe, welch_data_preview]
701
- )
702
 
703
- welch_run_button.click(
704
- fn=run_welch_test,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
  inputs=[
706
- welch_input_method,
707
- welch_loaded_dataframe,
708
- welch_group1_str,
709
- welch_group2_str,
710
- welch_alternative,
711
- welch_alpha,
712
- welch_effect_thresholds
713
  ],
714
- outputs=welch_output
715
- )
716
-
717
- welch_clear_button.click(
718
- fn=clear_all,
719
- outputs=[
720
- welch_input_method, welch_loaded_dataframe, welch_data_preview,
721
- welch_group1_str, welch_group2_str, welch_alternative,
722
- welch_alpha, welch_effect_thresholds, welch_output
723
- ]
724
- )
725
-
726
- welch_example_button.click(
727
- fn=load_example,
728
- outputs=[welch_input_method, welch_loaded_dataframe, welch_data_preview,
729
- welch_group1_str, welch_group2_str]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  )
731
-
732
- gr.Markdown("""
733
- ### Interpretation Guide
734
- - **p-value**: Likelihood of result given the null hypothesis (default significance threshold is 0.05).
735
- - **Cohen's d**: Measure of effect size (default effect thresholds are 0.2, 0.5 and 0.8 for small, medium and large effect sizes).
736
- - **t-statistic**: Quantifies how many standard errors the mean difference is from zero.
737
- - **Degrees of freedom**: Student's uses pooled df, Welch's uses adjusted df for unequal variances.
738
-
739
- ### When to Use Which Test
740
- - **Student's t-test**: Use when you can confidently assume equal variances between groups.
741
- - **Welch's t-test**: Use when variances might be unequal, or as a safer default choice.
742
- """)
743
-
744
- return demo
745
 
746
- # Main execution
747
  if __name__ == "__main__":
748
- demo = create_t_test_interface()
749
  demo.launch(mcp_server=True)
 
1
  import gradio as gr
2
  import numpy as np
 
3
  from scipy import stats
4
+ from typing import List, Dict, Any, Union, Tuple
5
+ import json
6
 
7
+ def independent_t_test(group1: str, group2: str, equal_var: bool = True, alternative: str = "two-sided") -> Dict[str, Any]:
8
  """
9
+ Perform an independent samples t-test between two groups.
10
 
11
  Args:
12
+ group1 (str): Comma-separated values for group 1 (e.g., "1.2,2.3,3.4,2.1")
13
+ group2 (str): Comma-separated values for group 2 (e.g., "2.1,3.2,4.1,3.5")
14
+ equal_var (bool): If True, perform standard t-test assuming equal variances. If False, perform Welch's t-test
15
+ alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  Returns:
18
+ dict: Test results including t-statistic, p-value, degrees of freedom, and interpretation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  """
20
  try:
21
+ # Parse input data
22
+ data1 = [float(x.strip()) for x in group1.split(',') if x.strip()]
23
+ data2 = [float(x.strip()) for x in group2.split(',') if x.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ if len(data1) < 2 or len(data2) < 2:
26
+ return {"error": "Each group must have at least 2 observations"}
27
 
28
+ # Perform t-test
29
+ t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=equal_var, alternative=alternative)
 
 
 
 
 
30
 
31
  # Calculate descriptive statistics
32
  desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
33
  desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
34
 
35
+ # Degrees of freedom
36
+ if equal_var:
37
+ df = len(data1) + len(data2) - 2
38
+ else:
39
+ # Welch's formula for unequal variances
40
+ s1_sq, s2_sq = desc1["std"]**2, desc2["std"]**2
41
+ n1, n2 = desc1["n"], desc2["n"]
42
+ df = (s1_sq/n1 + s2_sq/n2)**2 / ((s1_sq/n1)**2/(n1-1) + (s2_sq/n2)**2/(n2-1))
43
 
44
+ # Effect size (Cohen's d)
45
+ pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / (len(data1)+len(data2)-2))
 
46
  cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std
47
 
48
+ # Interpretation
49
+ significance = "significant" if p_value < 0.05 else "not significant"
50
+ effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
 
 
 
 
 
 
 
 
 
51
 
52
  return {
53
+ "test_type": f"Independent t-test ({'equal variances' if equal_var else 'unequal variances'})",
54
+ "t_statistic": round(t_stat, 4),
55
+ "p_value": round(p_value, 6),
56
+ "degrees_of_freedom": round(df, 2),
57
+ "cohens_d": round(cohens_d, 4),
 
58
  "group1_stats": desc1,
59
  "group2_stats": desc2,
60
+ "result": f"The difference between groups is {significance} (p = {p_value:.6f})",
61
+ "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
62
+ "alternative_hypothesis": alternative
 
 
 
63
  }
 
64
  except Exception as e:
65
+ return {"error": f"Error performing t-test: {str(e)}"}
66
 
67
+ def paired_t_test(before: str, after: str, alternative: str = "two-sided") -> Dict[str, Any]:
 
 
 
 
 
 
 
68
  """
69
+ Perform a paired samples t-test.
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  Args:
72
+ before (str): Comma-separated values for before condition
73
+ after (str): Comma-separated values for after condition
74
+ alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  Returns:
77
+ dict: Test results including t-statistic, p-value, and interpretation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  """
79
  try:
80
+ # Parse input data
81
+ data_before = [float(x.strip()) for x in before.split(',') if x.strip()]
82
+ data_after = [float(x.strip()) for x in after.split(',') if x.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ if len(data_before) != len(data_after):
85
+ return {"error": "Before and after groups must have the same number of observations"}
 
86
 
87
+ if len(data_before) < 2:
88
+ return {"error": "Need at least 2 paired observations"}
89
 
90
+ # Perform paired t-test
91
+ t_stat, p_value = stats.ttest_rel(data_before, data_after, alternative=alternative)
 
 
92
 
93
+ # Calculate differences and descriptive statistics
94
+ differences = np.array(data_after) - np.array(data_before)
95
+ mean_diff = np.mean(differences)
96
+ std_diff = np.std(differences, ddof=1)
97
 
98
+ # Effect size (Cohen's d for paired samples)
99
+ cohens_d = mean_diff / std_diff
 
100
 
101
+ # Degrees of freedom
102
+ df = len(data_before) - 1
103
 
104
+ # Interpretation
105
+ significance = "significant" if p_value < 0.05 else "not significant"
106
+ effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  return {
109
+ "test_type": "Paired t-test",
110
+ "t_statistic": round(t_stat, 4),
111
+ "p_value": round(p_value, 6),
112
  "degrees_of_freedom": df,
113
+ "mean_difference": round(mean_diff, 4),
114
+ "std_difference": round(std_diff, 4),
115
+ "cohens_d": round(cohens_d, 4),
116
+ "before_mean": round(np.mean(data_before), 4),
117
+ "after_mean": round(np.mean(data_after), 4),
118
+ "result": f"The paired difference is {significance} (p = {p_value:.6f})",
119
+ "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
120
+ "alternative_hypothesis": alternative
 
 
121
  }
 
122
  except Exception as e:
123
+ return {"error": f"Error performing paired t-test: {str(e)}"}
124
 
125
+ def one_sample_t_test(sample: str, population_mean: float, alternative: str = "two-sided") -> Dict[str, Any]:
126
+ """
127
+ Perform a one-sample t-test against a population mean.
128
+
129
+ Args:
130
+ sample (str): Comma-separated sample values
131
+ population_mean (float): Hypothesized population mean
132
+ alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
133
 
134
+ Returns:
135
+ dict: Test results including t-statistic, p-value, and interpretation
136
+ """
137
  try:
138
+ # Parse input data
139
+ data = [float(x.strip()) for x in sample.split(',') if x.strip()]
140
 
141
+ if len(data) < 2:
142
+ return {"error": "Sample must have at least 2 observations"}
143
+
144
+ # Perform one-sample t-test
145
+ t_stat, p_value = stats.ttest_1samp(data, population_mean, alternative=alternative)
146
+
147
+ # Calculate descriptive statistics
148
+ sample_mean = np.mean(data)
149
+ sample_std = np.std(data, ddof=1)
150
+ sample_size = len(data)
151
+
152
+ # Effect size (Cohen's d)
153
+ cohens_d = (sample_mean - population_mean) / sample_std
154
+
155
+ # Degrees of freedom
156
+ df = sample_size - 1
157
+
158
+ # Interpretation
159
+ significance = "significant" if p_value < 0.05 else "not significant"
160
+ effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
161
+
162
+ return {
163
+ "test_type": "One-sample t-test",
164
+ "t_statistic": round(t_stat, 4),
165
+ "p_value": round(p_value, 6),
166
+ "degrees_of_freedom": df,
167
+ "sample_mean": round(sample_mean, 4),
168
+ "population_mean": population_mean,
169
+ "sample_std": round(sample_std, 4),
170
+ "sample_size": sample_size,
171
+ "cohens_d": round(cohens_d, 4),
172
+ "result": f"Sample mean differs {significance}ly from population mean (p = {p_value:.6f})",
173
+ "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
174
+ "alternative_hypothesis": alternative
175
+ }
176
  except Exception as e:
177
+ return {"error": f"Error performing one-sample t-test: {str(e)}"}
 
178
 
179
+ def one_way_anova(*groups: str) -> Dict[str, Any]:
180
+ """
181
+ Perform a one-way ANOVA test.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
+ Args:
184
+ *groups: Variable number of comma-separated group values (minimum 2 groups)
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
+ Returns:
187
+ dict: ANOVA results including F-statistic, p-value, and interpretation
188
+ """
189
+ try:
190
+ # Parse input data
191
+ parsed_groups = []
192
+ for i, group in enumerate(groups):
193
+ if not group.strip():
194
+ continue
195
+ data = [float(x.strip()) for x in group.split(',') if x.strip()]
196
+ if len(data) < 2:
197
+ return {"error": f"Group {i+1} must have at least 2 observations"}
198
+ parsed_groups.append(data)
199
+
200
+ if len(parsed_groups) < 2:
201
+ return {"error": "Need at least 2 groups for ANOVA"}
202
+
203
+ # Perform one-way ANOVA
204
+ f_stat, p_value = stats.f_oneway(*parsed_groups)
205
+
206
+ # Calculate descriptive statistics for each group
207
+ group_stats = []
208
+ overall_data = []
209
+ for i, group in enumerate(parsed_groups):
210
+ group_stats.append({
211
+ "group": i+1,
212
+ "n": len(group),
213
+ "mean": round(np.mean(group), 4),
214
+ "std": round(np.std(group, ddof=1), 4)
215
+ })
216
+ overall_data.extend(group)
217
+
218
+ # Calculate effect size (eta-squared)
219
+ # SS_between / SS_total
220
+ overall_mean = np.mean(overall_data)
221
+ ss_total = sum((x - overall_mean)**2 for x in overall_data)
222
+ ss_between = sum(len(group) * (np.mean(group) - overall_mean)**2 for group in parsed_groups)
223
+ eta_squared = ss_between / ss_total if ss_total > 0 else 0
224
+
225
+ # Degrees of freedom
226
+ df_between = len(parsed_groups) - 1
227
+ df_within = len(overall_data) - len(parsed_groups)
228
+
229
+ # Interpretation
230
+ significance = "significant" if p_value < 0.05 else "not significant"
231
+ effect_size_interp = "small" if eta_squared < 0.06 else "medium" if eta_squared < 0.14 else "large"
232
+
233
+ return {
234
+ "test_type": "One-way ANOVA",
235
+ "f_statistic": round(f_stat, 4),
236
+ "p_value": round(p_value, 6),
237
+ "df_between": df_between,
238
+ "df_within": df_within,
239
+ "eta_squared": round(eta_squared, 4),
240
+ "group_statistics": group_stats,
241
+ "result": f"Group differences are {significance} (p = {p_value:.6f})",
242
+ "effect_size": f"Effect size (η² = {eta_squared:.4f}) is {effect_size_interp}",
243
+ "note": "If significant, consider post-hoc tests to identify specific group differences"
244
+ }
245
+ except Exception as e:
246
+ return {"error": f"Error performing ANOVA: {str(e)}"}
247
 
248
+ def chi_square_test(observed: str, expected: str = None) -> Dict[str, Any]:
249
+ """
250
+ Perform a chi-square goodness of fit test.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
+ Args:
253
+ observed (str): Comma-separated observed frequencies
254
+ expected (str): Comma-separated expected frequencies (optional, defaults to equal distribution)
 
255
 
256
+ Returns:
257
+ dict: Chi-square test results
258
+ """
259
+ try:
260
+ # Parse observed frequencies
261
+ obs_data = [float(x.strip()) for x in observed.split(',') if x.strip()]
262
+
263
+ # Parse expected frequencies or create equal distribution
264
+ if expected and expected.strip():
265
+ exp_data = [float(x.strip()) for x in expected.split(',') if x.strip()]
266
+ if len(obs_data) != len(exp_data):
267
+ return {"error": "Observed and expected must have the same number of categories"}
268
+ else:
269
+ # Equal distribution
270
+ total = sum(obs_data)
271
+ exp_data = [total / len(obs_data)] * len(obs_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
+ # Perform chi-square test
274
+ chi2_stat, p_value = stats.chisquare(obs_data, exp_data)
 
 
 
275
 
276
+ # Degrees of freedom
277
+ df = len(obs_data) - 1
 
 
 
278
 
279
+ # Effect size (Cramér's V for goodness of fit)
280
+ n = sum(obs_data)
281
+ cramers_v = np.sqrt(chi2_stat / (n * (len(obs_data) - 1)))
 
 
 
 
 
 
 
 
 
 
282
 
283
+ # Interpretation
284
+ significance = "significant" if p_value < 0.05 else "not significant"
285
+ effect_size_interp = "small" if cramers_v < 0.3 else "medium" if cramers_v < 0.5 else "large"
 
 
 
 
 
286
 
287
+ return {
288
+ "test_type": "Chi-square goodness of fit test",
289
+ "chi_square_statistic": round(chi2_stat, 4),
290
+ "p_value": round(p_value, 6),
291
+ "degrees_of_freedom": df,
292
+ "cramers_v": round(cramers_v, 4),
293
+ "observed_frequencies": obs_data,
294
+ "expected_frequencies": [round(x, 2) for x in exp_data],
295
+ "result": f"Observed frequencies differ {significance}ly from expected (p = {p_value:.6f})",
296
+ "effect_size": f"Effect size (Cramér's V = {cramers_v:.4f}) is {effect_size_interp}"
297
+ }
298
+ except Exception as e:
299
+ return {"error": f"Error performing chi-square test: {str(e)}"}
300
+
301
+ def correlation_test(x_values: str, y_values: str, method: str = "pearson") -> Dict[str, Any]:
302
+ """
303
+ Perform correlation analysis between two variables.
304
+
305
+ Args:
306
+ x_values (str): Comma-separated X variable values
307
+ y_values (str): Comma-separated Y variable values
308
+ method (str): Correlation method - 'pearson', 'spearman', or 'kendall'
309
+
310
+ Returns:
311
+ dict: Correlation results including coefficient and p-value
312
+ """
313
+ try:
314
+ # Parse input data
315
+ x_data = [float(x.strip()) for x in x_values.split(',') if x.strip()]
316
+ y_data = [float(y.strip()) for y in y_values.split(',') if y.strip()]
317
+
318
+ if len(x_data) != len(y_data):
319
+ return {"error": "X and Y variables must have the same number of observations"}
320
+
321
+ if len(x_data) < 3:
322
+ return {"error": "Need at least 3 observations for correlation"}
323
+
324
+ # Perform correlation test
325
+ if method.lower() == "pearson":
326
+ corr_coef, p_value = stats.pearsonr(x_data, y_data)
327
+ test_name = "Pearson correlation"
328
+ elif method.lower() == "spearman":
329
+ corr_coef, p_value = stats.spearmanr(x_data, y_data)
330
+ test_name = "Spearman rank correlation"
331
+ elif method.lower() == "kendall":
332
+ corr_coef, p_value = stats.kendalltau(x_data, y_data)
333
+ test_name = "Kendall's tau correlation"
334
+ else:
335
+ return {"error": "Method must be 'pearson', 'spearman', or 'kendall'"}
336
 
337
+ # Interpretation
338
+ significance = "significant" if p_value < 0.05 else "not significant"
 
 
 
 
339
 
340
+ # Correlation strength interpretation
341
+ abs_corr = abs(corr_coef)
342
+ if abs_corr < 0.3:
343
+ strength = "weak"
344
+ elif abs_corr < 0.7:
345
+ strength = "moderate"
346
+ else:
347
+ strength = "strong"
348
 
349
+ direction = "positive" if corr_coef > 0 else "negative"
 
 
 
 
350
 
351
+ return {
352
+ "test_type": test_name,
353
+ "correlation_coefficient": round(corr_coef, 4),
354
+ "p_value": round(p_value, 6),
355
+ "sample_size": len(x_data),
356
+ "result": f"The correlation is {significance} (p = {p_value:.6f})",
357
+ "interpretation": f"{strength.title()} {direction} correlation (r = {corr_coef:.4f})",
358
+ "method": method.lower()
359
+ }
360
+ except Exception as e:
361
+ return {"error": f"Error performing correlation test: {str(e)}"}
362
+
363
+ # Create Gradio interfaces for each function
364
+ demo = gr.TabbedInterface(
365
+ [
366
+ gr.Interface(
367
+ fn=independent_t_test,
368
  inputs=[
369
+ gr.Textbox(placeholder="1.2,2.3,3.4,2.1", label="Group 1 (comma-separated)"),
370
+ gr.Textbox(placeholder="2.1,3.2,4.1,3.5", label="Group 2 (comma-separated)"),
371
+ gr.Checkbox(value=True, label="Equal variances"),
372
+ gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
 
 
 
373
  ],
374
+ outputs=gr.JSON(),
375
+ title="Independent T-Test",
376
+ description="Compare means between two independent groups"
377
+ ),
378
+ gr.Interface(
379
+ fn=paired_t_test,
380
+ inputs=[
381
+ gr.Textbox(placeholder="10,12,11,13", label="Before (comma-separated)"),
382
+ gr.Textbox(placeholder="12,14,13,15", label="After (comma-separated)"),
383
+ gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
384
+ ],
385
+ outputs=gr.JSON(),
386
+ title="Paired T-Test",
387
+ description="Compare paired/matched samples"
388
+ ),
389
+ gr.Interface(
390
+ fn=one_sample_t_test,
391
+ inputs=[
392
+ gr.Textbox(placeholder="10,12,11,13,9", label="Sample (comma-separated)"),
393
+ gr.Number(value=10, label="Population mean"),
394
+ gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
395
+ ],
396
+ outputs=gr.JSON(),
397
+ title="One-Sample T-Test",
398
+ description="Test sample mean against population mean"
399
+ ),
400
+ gr.Interface(
401
+ fn=one_way_anova,
402
+ inputs=[
403
+ gr.Textbox(placeholder="1,2,3,2", label="Group 1 (comma-separated)"),
404
+ gr.Textbox(placeholder="4,5,6,5", label="Group 2 (comma-separated)"),
405
+ gr.Textbox(placeholder="7,8,9,8", label="Group 3 (comma-separated)", info="Optional"),
406
+ gr.Textbox(placeholder="", label="Group 4 (comma-separated)", info="Optional"),
407
+ gr.Textbox(placeholder="", label="Group 5 (comma-separated)", info="Optional")
408
+ ],
409
+ outputs=gr.JSON(),
410
+ title="One-Way ANOVA",
411
+ description="Compare means across multiple groups"
412
+ ),
413
+ gr.Interface(
414
+ fn=chi_square_test,
415
+ inputs=[
416
+ gr.Textbox(placeholder="10,20,15,25", label="Observed frequencies (comma-separated)"),
417
+ gr.Textbox(placeholder="", label="Expected frequencies (optional, comma-separated)")
418
+ ],
419
+ outputs=gr.JSON(),
420
+ title="Chi-Square Test",
421
+ description="Test goodness of fit for categorical data"
422
+ ),
423
+ gr.Interface(
424
+ fn=correlation_test,
425
+ inputs=[
426
+ gr.Textbox(placeholder="1,2,3,4,5", label="X values (comma-separated)"),
427
+ gr.Textbox(placeholder="2,4,6,8,10", label="Y values (comma-separated)"),
428
+ gr.Dropdown(["pearson", "spearman", "kendall"], value="pearson", label="Correlation method")
429
+ ],
430
+ outputs=gr.JSON(),
431
+ title="Correlation Analysis",
432
+ description="Test correlation between two variables"
433
  )
434
+ ],
435
+ tab_names=["Independent T-Test", "Paired T-Test", "One-Sample T-Test", "ANOVA", "Chi-Square", "Correlation"]
436
+ )
 
 
 
 
 
 
 
 
 
 
 
437
 
 
438
  if __name__ == "__main__":
 
439
  demo.launch(mcp_server=True)