Update app.py
Browse files
app.py
CHANGED
@@ -1,749 +1,439 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
-
import pandas as pd
|
4 |
from scipy import stats
|
5 |
-
from typing import List, Dict, Any,
|
|
|
6 |
|
7 |
-
def
|
8 |
"""
|
9 |
-
|
10 |
|
11 |
Args:
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
Raises:
|
18 |
-
ValueError: If data cannot be parsed as numeric values
|
19 |
-
|
20 |
-
Example:
|
21 |
-
>>> parse_numeric_input("85.2,90.1,78.5,92.3")
|
22 |
-
[85.2, 90.1, 78.5, 92.3]
|
23 |
-
"""
|
24 |
-
try:
|
25 |
-
parsed = [float(x.strip()) for x in data.split(',') if x.strip()]
|
26 |
-
if not parsed:
|
27 |
-
raise ValueError("No valid numbers found in input string")
|
28 |
-
return parsed
|
29 |
-
except ValueError as e:
|
30 |
-
if "could not convert" in str(e):
|
31 |
-
raise ValueError(f"Cannot parse '{data}' as comma-separated numbers")
|
32 |
-
raise e
|
33 |
-
|
34 |
-
def welch_t_test(
|
35 |
-
dataframe: Optional[pd.DataFrame] = None,
|
36 |
-
group1_str: Optional[str] = None,
|
37 |
-
group2_str: Optional[str] = None,
|
38 |
-
alternative: str = "two-sided",
|
39 |
-
alpha: float = 0.05,
|
40 |
-
effect_thresholds: str = "0.2,0.5,0.8"
|
41 |
-
) -> Dict[str, Any]:
|
42 |
-
"""
|
43 |
-
Welch's t-test supporting both DataFrame and string inputs for maximum compatibility.
|
44 |
-
|
45 |
-
Welch's t-test determines if there is a statistically significant difference between
|
46 |
-
the means of group1 and group2. Unlike Student's t-test, this does NOT assume equal
|
47 |
-
variances between groups, making it more robust and generally recommended for most situations.
|
48 |
-
|
49 |
-
WHEN TO USE: Compare average scores between two independent groups when you cannot assume
|
50 |
-
equal variances, or as a safer default choice. Preferred over Student's t-test in most cases.
|
51 |
-
|
52 |
-
Args:
|
53 |
-
dataframe (Optional[pd.DataFrame]): DataFrame containing group data in first two columns.
|
54 |
-
If provided, group1_str and group2_str will be ignored.
|
55 |
-
group1_str (Optional[str]): Comma-separated string of numeric values for the first group.
|
56 |
-
Example: "12.1,15.3,18.7,14.2,16.8" (reaction times for Group A)
|
57 |
-
Only used if dataframe is None or empty.
|
58 |
-
group2_str (Optional[str]): Comma-separated string of numeric values for the second group.
|
59 |
-
Example: "22.4,19.8,25.1,21.3" (reaction times for Group B)
|
60 |
-
Only used if dataframe is None or empty.
|
61 |
-
alternative (str): Direction of the alternative hypothesis:
|
62 |
-
- "two-sided": group1 mean ≠ group2 mean (different in either direction)
|
63 |
-
- "less": group1 mean < group2 mean (group1 is smaller)
|
64 |
-
- "greater": group1 mean > group2 mean (group1 is larger)
|
65 |
-
alpha (float): Significance level for the test (probability of Type I error).
|
66 |
-
Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
|
67 |
-
effect_thresholds (str): Three comma-separated values defining Cohen's d effect size boundaries.
|
68 |
-
Format: "small_threshold,medium_threshold,large_threshold"
|
69 |
-
Default "0.2,0.5,0.8" means: <0.2=negligible, 0.2-0.5=small, 0.5-0.8=medium, >0.8=large
|
70 |
|
71 |
Returns:
|
72 |
-
dict:
|
73 |
-
- test_type (str): Always "Welch's t-test (unequal variances)"
|
74 |
-
- t_statistic (float): The calculated t-value using Welch's formula
|
75 |
-
- p_value (float): Probability of observing this result if null hypothesis is true
|
76 |
-
- degrees_of_freedom (float): Welch's adjusted df (usually non-integer), accounts for unequal variances
|
77 |
-
- cohens_d (float): Standardized effect size. Positive means group1 > group2, negative means group1 < group2
|
78 |
-
- pooled_std (float): Pooled standard deviation used in effect size calculation
|
79 |
-
- group1_stats (dict): Descriptive statistics for group1 (mean, std, n)
|
80 |
-
- group2_stats (dict): Descriptive statistics for group2 (mean, std, n)
|
81 |
-
- significant (bool): True if p_value < alpha
|
82 |
-
- effect_size (str): Categorical interpretation of Cohen's d magnitude
|
83 |
-
- alternative_hypothesis (str): Echo of alternative parameter
|
84 |
-
- alpha (float): Echo of significance level used
|
85 |
-
- effect_thresholds (List[float]): Echo of effect size thresholds used
|
86 |
-
- input_method (str): "dataframe" or "strings" - indicates which input method was used
|
87 |
"""
|
88 |
try:
|
89 |
-
# Parse
|
90 |
-
|
91 |
-
|
92 |
-
if len(thresholds) != 3:
|
93 |
-
return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
|
94 |
-
except:
|
95 |
-
return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.2,0.5,0.8')"}
|
96 |
-
|
97 |
-
# Method 1: DataFrame input (preferred for LLMs and data pipelines)
|
98 |
-
if dataframe is not None and not dataframe.empty:
|
99 |
-
# Use first two columns automatically
|
100 |
-
if len(dataframe.columns) < 2:
|
101 |
-
return {"error": f"DataFrame must have at least 2 columns. Found {len(dataframe.columns)} columns."}
|
102 |
-
|
103 |
-
# Extract and validate data from first two columns
|
104 |
-
try:
|
105 |
-
# Convert to numeric, coercing errors to NaN
|
106 |
-
col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
|
107 |
-
col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')
|
108 |
-
|
109 |
-
# Remove NaN values and convert to list
|
110 |
-
group1 = col1_numeric.dropna().tolist()
|
111 |
-
group2 = col2_numeric.dropna().tolist()
|
112 |
-
|
113 |
-
# Check if we lost too much data due to non-numeric values
|
114 |
-
original_count1 = len(dataframe.iloc[:, 0].dropna())
|
115 |
-
original_count2 = len(dataframe.iloc[:, 1].dropna())
|
116 |
-
|
117 |
-
if len(group1) < original_count1 * 0.5: # Lost more than 50% of data
|
118 |
-
return {"error": f"Column 1 contains too many non-numeric values. Only {len(group1)} out of {original_count1} values could be converted to numbers."}
|
119 |
-
|
120 |
-
if len(group2) < original_count2 * 0.5: # Lost more than 50% of data
|
121 |
-
return {"error": f"Column 2 contains too many non-numeric values. Only {len(group2)} out of {original_count2} values could be converted to numbers."}
|
122 |
-
|
123 |
-
input_method = "dataframe"
|
124 |
-
|
125 |
-
except Exception as e:
|
126 |
-
return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}
|
127 |
-
|
128 |
-
# Method 2: String input (preferred for humans and simple use cases)
|
129 |
-
elif group1_str and group2_str and group1_str.strip() and group2_str.strip():
|
130 |
-
try:
|
131 |
-
group1 = parse_numeric_input(group1_str)
|
132 |
-
group2 = parse_numeric_input(group2_str)
|
133 |
-
input_method = "strings"
|
134 |
-
except ValueError as e:
|
135 |
-
return {"error": f"String parsing error: {str(e)}"}
|
136 |
-
|
137 |
-
else:
|
138 |
-
return {"error": "Please provide either a DataFrame with data OR comma-separated strings for both groups. Do not leave inputs empty."}
|
139 |
-
|
140 |
-
# Validate extracted data
|
141 |
-
if len(group1) < 2:
|
142 |
-
return {"error": f"Group 1 must have at least 2 observations. Found {len(group1)} values."}
|
143 |
|
144 |
-
if len(
|
145 |
-
return {"error":
|
146 |
|
147 |
-
# Perform
|
148 |
-
|
149 |
-
data1 = np.array(group1)
|
150 |
-
data2 = np.array(group2)
|
151 |
-
|
152 |
-
# Perform Welch's t-test (unequal variances)
|
153 |
-
t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=False, alternative=alternative)
|
154 |
|
155 |
# Calculate descriptive statistics
|
156 |
desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
|
157 |
desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
|
158 |
|
159 |
-
#
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
163 |
|
164 |
-
# Effect size (Cohen's d
|
165 |
-
|
166 |
-
pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / (len(data1) + len(data2) - 2))
|
167 |
cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std
|
168 |
|
169 |
-
# Interpretation
|
170 |
-
|
171 |
-
|
172 |
-
small_threshold, medium_threshold, large_threshold = thresholds
|
173 |
-
if abs_d < small_threshold:
|
174 |
-
effect_size_interp = "negligible"
|
175 |
-
elif abs_d < medium_threshold:
|
176 |
-
effect_size_interp = "small"
|
177 |
-
elif abs_d < large_threshold:
|
178 |
-
effect_size_interp = "medium"
|
179 |
-
else:
|
180 |
-
effect_size_interp = "large"
|
181 |
|
182 |
return {
|
183 |
-
"test_type": "
|
184 |
-
"t_statistic": t_stat,
|
185 |
-
"p_value": p_value,
|
186 |
-
"degrees_of_freedom": df,
|
187 |
-
"cohens_d": cohens_d,
|
188 |
-
"pooled_std": pooled_std,
|
189 |
"group1_stats": desc1,
|
190 |
"group2_stats": desc2,
|
191 |
-
"
|
192 |
-
"effect_size": effect_size_interp,
|
193 |
-
"alternative_hypothesis": alternative
|
194 |
-
"alpha": alpha,
|
195 |
-
"effect_thresholds": thresholds,
|
196 |
-
"input_method": input_method
|
197 |
}
|
198 |
-
|
199 |
except Exception as e:
|
200 |
-
return {"error": f"
|
201 |
|
202 |
-
def
|
203 |
-
dataframe: Optional[pd.DataFrame] = None,
|
204 |
-
group1_str: Optional[str] = None,
|
205 |
-
group2_str: Optional[str] = None,
|
206 |
-
alternative: str = "two-sided",
|
207 |
-
alpha: float = 0.05,
|
208 |
-
effect_thresholds: str = "0.2,0.5,0.8"
|
209 |
-
) -> Dict[str, Any]:
|
210 |
"""
|
211 |
-
|
212 |
-
|
213 |
-
Student's t-test is used to determine if there is a statistically significant difference between the means of two sets of sampled numbers, group1 and group2.
|
214 |
-
This test produces a key statistic known as the t_statistic. Depending on the 'alternative hypothesis' considered (e.g. group1 mean < group2 mean or simply
|
215 |
-
group1 mean ≠ group2 mean), the test quantifies the probability of observing the result (or more extreme) given the 'null hypothesis' is true (i.e. no difference exists)
|
216 |
-
as p_value. If the p_value falls below the threshold alpha, then the result is considered statistically significant, meaning we reject the null hypothesis in
|
217 |
-
favor of the alternative. cohens_d measures effect size, the practical magnitude of the difference between the means of group1 and group2, standardized by pooled standard
|
218 |
-
deviation. It can be interpreted with the help of effect_thresholds. This test assumes both groups have equal variances and normal distributions. Use Welch's t-test if variances are unequal.
|
219 |
-
|
220 |
-
You should supply either a dataframe with the first 2 columns containing sample data (ideal for large datasets or data pipelines), or strings (group1 and group2) containing
|
221 |
-
comma-delimited lists of sampled data (ideal for small, simple data sets).
|
222 |
-
|
223 |
-
WHEN TO USE: Compare average scores between two independent groups (e.g., treatment vs control,
|
224 |
-
before vs after with different participants, male vs female performance)
|
225 |
|
226 |
Args:
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
Example: "85.2,90.1,78.5,92.3" (test scores for Group A)
|
231 |
-
Only used if dataframe is None or empty.
|
232 |
-
group2_str (Optional[str]): Comma-separated string of numeric values for the second group.
|
233 |
-
Example: "88.1,85.7,91.2,87.4" (test scores for Group B)
|
234 |
-
Only used if dataframe is None or empty.
|
235 |
-
alternative (str): Direction of the alternative hypothesis:
|
236 |
-
- "two-sided": group1 mean ≠ group2 mean (different in either direction)
|
237 |
-
- "less": group1 mean < group2 mean (group1 is smaller)
|
238 |
-
- "greater": group1 mean > group2 mean (group1 is larger)
|
239 |
-
alpha (float): Significance level for the test (probability of Type I error).
|
240 |
-
Common values: 0.05 (5%), 0.01 (1%), 0.10 (10%)
|
241 |
-
effect_thresholds (str): Three comma-separated values defining Cohen's d effect size boundaries.
|
242 |
-
Format: "small_threshold,medium_threshold,large_threshold"
|
243 |
-
Default "0.2,0.5,0.8" means: <0.2=negligible, 0.2-0.5=small, 0.5-0.8=medium, >0.8=large
|
244 |
-
These are Cohen's canonical benchmarks for effect size interpretation.
|
245 |
|
246 |
Returns:
|
247 |
-
dict:
|
248 |
-
- test_type (str): Always "Student's t-test"
|
249 |
-
- t_statistic (float): The calculated t-value, which measures how many standard errors the difference
|
250 |
-
between group means is away from zero (assuming the null hypothesis is true).
|
251 |
-
Larger absolute values indicate the observed difference is less likely under the null hypothesis.
|
252 |
-
- p_value (float): Probability of observing this result (or more extreme) if null hypothesis is true.
|
253 |
-
Values < alpha indicate statistical significance.
|
254 |
-
- degrees_of_freedom (int): df = n1 + n2 - 2, degrees of freedom for the pooled variance estimate, used for determining critical t-values.
|
255 |
-
- cohens_d (float): Effect size measure. Positive means group1 > group2, negative means group1 < group2.
|
256 |
-
Interpreted using Cohen's canonical benchmarks: negligible (<0.2), small (0.2), medium (0.5), large (0.8).
|
257 |
-
- pooled_std (float): Combined standard deviation used in Cohen's d calculation.
|
258 |
-
- group1_stats (dict): Descriptive statistics for group1 (mean, std, n)
|
259 |
-
- group2_stats (dict): Descriptive statistics for group2 (mean, std, n)
|
260 |
-
- significant (bool): True if p_value < alpha, False otherwise
|
261 |
-
- effect_size (str): Categorical interpretation ("negligible", "small", "medium", "large") based on |cohens_d| and effect_thresholds
|
262 |
-
- alternative_hypothesis (str): Echo of the alternative parameter used
|
263 |
-
- alpha (float): Echo of the significance level used
|
264 |
-
- effect_thresholds (List[float]): Echo of the thresholds used
|
265 |
-
- input_method (str): "dataframe" or "strings" - indicates which input method was used
|
266 |
"""
|
267 |
try:
|
268 |
-
# Parse
|
269 |
-
|
270 |
-
|
271 |
-
if len(thresholds) != 3:
|
272 |
-
return {"error": "Effect thresholds must be three comma-separated numbers (small,medium,large)"}
|
273 |
-
except:
|
274 |
-
return {"error": "Invalid effect thresholds format. Use 'small,medium,large' (e.g., '0.2,0.5,0.8')"}
|
275 |
-
|
276 |
-
# Method 1: DataFrame input (preferred for LLMs and data pipelines)
|
277 |
-
if dataframe is not None and not dataframe.empty:
|
278 |
-
# Use first two columns automatically
|
279 |
-
if len(dataframe.columns) < 2:
|
280 |
-
return {"error": f"DataFrame must have at least 2 columns. Found {len(dataframe.columns)} columns."}
|
281 |
-
|
282 |
-
# Extract and validate data from first two columns
|
283 |
-
try:
|
284 |
-
# Convert to numeric, coercing errors to NaN
|
285 |
-
col1_numeric = pd.to_numeric(dataframe.iloc[:, 0], errors='coerce')
|
286 |
-
col2_numeric = pd.to_numeric(dataframe.iloc[:, 1], errors='coerce')
|
287 |
-
|
288 |
-
# Remove NaN values and convert to list
|
289 |
-
group1 = col1_numeric.dropna().tolist()
|
290 |
-
group2 = col2_numeric.dropna().tolist()
|
291 |
-
|
292 |
-
# Check if we lost too much data due to non-numeric values
|
293 |
-
original_count1 = len(dataframe.iloc[:, 0].dropna())
|
294 |
-
original_count2 = len(dataframe.iloc[:, 1].dropna())
|
295 |
-
|
296 |
-
if len(group1) < original_count1 * 0.5: # Lost more than 50% of data
|
297 |
-
return {"error": f"Column 1 contains too many non-numeric values. Only {len(group1)} out of {original_count1} values could be converted to numbers."}
|
298 |
-
|
299 |
-
if len(group2) < original_count2 * 0.5: # Lost more than 50% of data
|
300 |
-
return {"error": f"Column 2 contains too many non-numeric values. Only {len(group2)} out of {original_count2} values could be converted to numbers."}
|
301 |
-
|
302 |
-
input_method = "dataframe"
|
303 |
-
|
304 |
-
except Exception as e:
|
305 |
-
return {"error": f"Error processing DataFrame columns: {str(e)}. Ensure columns contain numeric data."}
|
306 |
-
|
307 |
-
# Method 2: String input (preferred for humans and simple use cases)
|
308 |
-
elif group1_str and group2_str and group1_str.strip() and group2_str.strip():
|
309 |
-
try:
|
310 |
-
group1 = parse_numeric_input(group1_str)
|
311 |
-
group2 = parse_numeric_input(group2_str)
|
312 |
-
input_method = "strings"
|
313 |
-
except ValueError as e:
|
314 |
-
return {"error": f"String parsing error: {str(e)}"}
|
315 |
-
|
316 |
-
else:
|
317 |
-
return {"error": "Please provide either a DataFrame with data OR comma-separated strings for both groups. Do not leave inputs empty."}
|
318 |
|
319 |
-
|
320 |
-
|
321 |
-
return {"error": f"Group 1 must have at least 2 observations. Found {len(group1)} values."}
|
322 |
|
323 |
-
if len(
|
324 |
-
return {"error":
|
325 |
|
326 |
-
# Perform
|
327 |
-
|
328 |
-
data1 = np.array(group1)
|
329 |
-
data2 = np.array(group2)
|
330 |
|
331 |
-
#
|
332 |
-
|
|
|
|
|
333 |
|
334 |
-
#
|
335 |
-
|
336 |
-
desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
|
337 |
|
338 |
-
# Degrees of freedom
|
339 |
-
df = len(
|
340 |
|
341 |
-
#
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
# Interpretation using Cohen's canonical benchmarks
|
346 |
-
significant = p_value < alpha
|
347 |
-
abs_d = abs(cohens_d)
|
348 |
-
small_threshold, medium_threshold, large_threshold = thresholds
|
349 |
-
if abs_d < small_threshold:
|
350 |
-
effect_size_interp = "negligible"
|
351 |
-
elif abs_d < medium_threshold:
|
352 |
-
effect_size_interp = "small"
|
353 |
-
elif abs_d < large_threshold:
|
354 |
-
effect_size_interp = "medium"
|
355 |
-
else:
|
356 |
-
effect_size_interp = "large"
|
357 |
|
358 |
return {
|
359 |
-
"test_type": "
|
360 |
-
"t_statistic": t_stat,
|
361 |
-
"p_value": p_value,
|
362 |
"degrees_of_freedom": df,
|
363 |
-
"
|
364 |
-
"
|
365 |
-
"
|
366 |
-
"
|
367 |
-
"
|
368 |
-
"
|
369 |
-
"
|
370 |
-
"
|
371 |
-
"effect_thresholds": thresholds,
|
372 |
-
"input_method": input_method
|
373 |
}
|
374 |
-
|
375 |
except Exception as e:
|
376 |
-
return {"error": f"
|
377 |
|
378 |
-
def
|
379 |
-
"""
|
380 |
-
|
381 |
-
|
|
|
|
|
|
|
|
|
382 |
|
|
|
|
|
|
|
383 |
try:
|
384 |
-
#
|
385 |
-
|
386 |
|
387 |
-
if
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
|
|
|
|
|
|
419 |
except Exception as e:
|
420 |
-
|
421 |
-
return None, error_df
|
422 |
|
423 |
-
def
|
424 |
-
"""
|
425 |
-
|
426 |
-
input_method = gr.Radio(
|
427 |
-
choices=["File Upload", "Text Input"],
|
428 |
-
value="File Upload",
|
429 |
-
label="Choose Input Method",
|
430 |
-
info="Select how you want to provide your data"
|
431 |
-
)
|
432 |
-
|
433 |
-
# File upload input section
|
434 |
-
with gr.Group(visible=True) as file_section:
|
435 |
-
gr.Markdown("### File Upload")
|
436 |
-
gr.Markdown("*Upload CSV or Excel file - first two columns will be used as Group 1 and Group 2*")
|
437 |
-
|
438 |
-
with gr.Row():
|
439 |
-
file_upload = gr.File(
|
440 |
-
label="Upload CSV/Excel File",
|
441 |
-
file_types=[".csv", ".xlsx", ".xls"],
|
442 |
-
type="filepath"
|
443 |
-
)
|
444 |
-
has_header = gr.Checkbox(
|
445 |
-
label="File has header row",
|
446 |
-
value=True,
|
447 |
-
info="Check if first row contains column names"
|
448 |
-
)
|
449 |
-
|
450 |
-
# Display loaded data preview
|
451 |
-
data_preview = gr.Dataframe(
|
452 |
-
label="Data Preview (first two columns)",
|
453 |
-
interactive=False,
|
454 |
-
row_count=5
|
455 |
-
)
|
456 |
|
457 |
-
|
458 |
-
|
459 |
-
gr.Markdown("### Text Input")
|
460 |
-
gr.Markdown("*Enter comma-separated numbers for each group*")
|
461 |
-
|
462 |
-
group1_str = gr.Textbox(
|
463 |
-
placeholder="85.2,90.1,78.5,92.3,88.7",
|
464 |
-
label="Group 1 Data",
|
465 |
-
info="Comma-separated numbers (e.g., test scores for condition A)"
|
466 |
-
)
|
467 |
-
group2_str = gr.Textbox(
|
468 |
-
placeholder="88.1,85.7,91.2,87.4,89.3",
|
469 |
-
label="Group 2 Data",
|
470 |
-
info="Comma-separated numbers (e.g., test scores for condition B)"
|
471 |
-
)
|
472 |
|
473 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
|
475 |
-
def
|
476 |
-
"""
|
477 |
-
|
478 |
-
with gr.Row():
|
479 |
-
alternative = gr.Dropdown(
|
480 |
-
choices=["two-sided", "less", "greater"],
|
481 |
-
value="two-sided",
|
482 |
-
label="Alternative Hypothesis",
|
483 |
-
info="two-sided: groups differ; less: group1 < group2; greater: group1 > group2"
|
484 |
-
)
|
485 |
-
alpha = gr.Number(
|
486 |
-
value=0.05,
|
487 |
-
minimum=0,
|
488 |
-
maximum=1,
|
489 |
-
step=0.01,
|
490 |
-
label="Significance Level (α)",
|
491 |
-
info="Probability threshold for statistical significance (typically 0.05)"
|
492 |
-
)
|
493 |
-
effect_thresholds = gr.Textbox(
|
494 |
-
value="0.2,0.5,0.8",
|
495 |
-
label="Effect Size Thresholds",
|
496 |
-
info="Cohen's d boundaries: small,medium,large (Cohen's canonical values)"
|
497 |
-
)
|
498 |
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
"""Enhanced Gradio interface for both Student's and Welch's t-tests."""
|
503 |
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
# Create input components
|
521 |
-
(student_input_method, student_file_section, student_text_section,
|
522 |
-
student_file_upload, student_has_header, student_data_preview,
|
523 |
-
student_group1_str, student_group2_str) = create_input_components()
|
524 |
-
|
525 |
-
# Create parameter components
|
526 |
-
student_alternative, student_alpha, student_effect_thresholds = create_parameter_components()
|
527 |
-
|
528 |
-
with gr.Row():
|
529 |
-
student_run_button = gr.Button("Run Student's T-Test", variant="primary", scale=1)
|
530 |
-
student_clear_button = gr.Button("Clear All", variant="secondary", scale=1)
|
531 |
-
|
532 |
-
student_output = gr.JSON(label="Statistical Test Results")
|
533 |
-
|
534 |
-
# Example data button
|
535 |
-
with gr.Row():
|
536 |
-
gr.Markdown("### Quick Examples")
|
537 |
-
student_example_button = gr.Button("Load Example Data", variant="outline")
|
538 |
-
|
539 |
-
# Welch's t-test tab
|
540 |
-
with gr.TabItem("Welch's T-Test"):
|
541 |
-
gr.Markdown("**Does not assume equal variances (more robust)**")
|
542 |
-
|
543 |
-
# Create input components
|
544 |
-
(welch_input_method, welch_file_section, welch_text_section,
|
545 |
-
welch_file_upload, welch_has_header, welch_data_preview,
|
546 |
-
welch_group1_str, welch_group2_str) = create_input_components()
|
547 |
-
|
548 |
-
# Create parameter components
|
549 |
-
welch_alternative, welch_alpha, welch_effect_thresholds = create_parameter_components()
|
550 |
-
|
551 |
-
with gr.Row():
|
552 |
-
welch_run_button = gr.Button("Run Welch's T-Test", variant="primary", scale=1)
|
553 |
-
welch_clear_button = gr.Button("Clear All", variant="secondary", scale=1)
|
554 |
-
|
555 |
-
welch_output = gr.JSON(label="Statistical Test Results")
|
556 |
-
|
557 |
-
# Example data button
|
558 |
-
with gr.Row():
|
559 |
-
gr.Markdown("### Quick Examples")
|
560 |
-
welch_example_button = gr.Button("Load Example Data", variant="outline")
|
561 |
-
|
562 |
-
# Shared state for loaded dataframes
|
563 |
-
student_loaded_dataframe = gr.State(value=None)
|
564 |
-
welch_loaded_dataframe = gr.State(value=None)
|
565 |
-
|
566 |
-
# Common functions for both tabs
|
567 |
-
def toggle_input_method(method):
|
568 |
-
if method == "File Upload":
|
569 |
-
return gr.update(visible=True), gr.update(visible=False)
|
570 |
-
else:
|
571 |
-
return gr.update(visible=False), gr.update(visible=True)
|
572 |
-
|
573 |
-
def run_student_test(method, loaded_df, g1_str, g2_str, alt, alph, thresh):
|
574 |
-
# Pass appropriate inputs based on selected method
|
575 |
-
if method == "File Upload":
|
576 |
-
return student_t_test(
|
577 |
-
dataframe=loaded_df,
|
578 |
-
group1_str=None,
|
579 |
-
group2_str=None,
|
580 |
-
alternative=alt,
|
581 |
-
alpha=alph,
|
582 |
-
effect_thresholds=thresh
|
583 |
-
)
|
584 |
-
else:
|
585 |
-
return student_t_test(
|
586 |
-
dataframe=None,
|
587 |
-
group1_str=g1_str,
|
588 |
-
group2_str=g2_str,
|
589 |
-
alternative=alt,
|
590 |
-
alpha=alph,
|
591 |
-
effect_thresholds=thresh
|
592 |
-
)
|
593 |
-
|
594 |
-
def run_welch_test(method, loaded_df, g1_str, g2_str, alt, alph, thresh):
|
595 |
-
# Pass appropriate inputs based on selected method
|
596 |
-
if method == "File Upload":
|
597 |
-
return welch_t_test(
|
598 |
-
dataframe=loaded_df,
|
599 |
-
group1_str=None,
|
600 |
-
group2_str=None,
|
601 |
-
alternative=alt,
|
602 |
-
alpha=alph,
|
603 |
-
effect_thresholds=thresh
|
604 |
-
)
|
605 |
-
else:
|
606 |
-
return welch_t_test(
|
607 |
-
dataframe=None,
|
608 |
-
group1_str=g1_str,
|
609 |
-
group2_str=g2_str,
|
610 |
-
alternative=alt,
|
611 |
-
alpha=alph,
|
612 |
-
effect_thresholds=thresh
|
613 |
-
)
|
614 |
-
|
615 |
-
def clear_all():
|
616 |
-
return (
|
617 |
-
"File Upload", # input_method
|
618 |
-
None, # loaded_dataframe
|
619 |
-
None, # data_preview
|
620 |
-
"", # group1_str
|
621 |
-
"", # group2_str
|
622 |
-
"two-sided", # alternative
|
623 |
-
0.05, # alpha
|
624 |
-
"0.2,0.5,0.8", # effect_thresholds
|
625 |
-
{} # output
|
626 |
-
)
|
627 |
-
|
628 |
-
def load_example():
|
629 |
-
example_df = pd.DataFrame({
|
630 |
-
'Treatment': [85.2, 90.1, 78.5, 92.3, 88.7, 86.4, 89.2],
|
631 |
-
'Control': [88.1, 85.7, 91.2, 87.4, 89.3, 90.8, 86.9]
|
632 |
-
})
|
633 |
-
preview = example_df.head(10)
|
634 |
-
return "File Upload", example_df, preview, "", ""
|
635 |
-
|
636 |
-
# Student's t-test event handlers
|
637 |
-
student_input_method.change(
|
638 |
-
fn=toggle_input_method,
|
639 |
-
inputs=student_input_method,
|
640 |
-
outputs=[student_file_section, student_text_section]
|
641 |
-
)
|
642 |
|
643 |
-
|
644 |
-
|
645 |
-
inputs=[student_file_upload, student_has_header],
|
646 |
-
outputs=[student_loaded_dataframe, student_data_preview]
|
647 |
-
)
|
648 |
|
649 |
-
|
650 |
-
|
651 |
-
inputs=[student_file_upload, student_has_header],
|
652 |
-
outputs=[student_loaded_dataframe, student_data_preview]
|
653 |
-
)
|
654 |
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
-
student_input_method,
|
659 |
-
student_loaded_dataframe,
|
660 |
-
student_group1_str,
|
661 |
-
student_group2_str,
|
662 |
-
student_alternative,
|
663 |
-
student_alpha,
|
664 |
-
student_effect_thresholds
|
665 |
-
],
|
666 |
-
outputs=student_output
|
667 |
-
)
|
668 |
|
669 |
-
|
670 |
-
|
671 |
-
|
672 |
-
student_input_method, student_loaded_dataframe, student_data_preview,
|
673 |
-
student_group1_str, student_group2_str, student_alternative,
|
674 |
-
student_alpha, student_effect_thresholds, student_output
|
675 |
-
]
|
676 |
-
)
|
677 |
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
683 |
|
684 |
-
#
|
685 |
-
|
686 |
-
fn=toggle_input_method,
|
687 |
-
inputs=welch_input_method,
|
688 |
-
outputs=[welch_file_section, welch_text_section]
|
689 |
-
)
|
690 |
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
|
|
|
|
|
|
696 |
|
697 |
-
|
698 |
-
fn=load_uploaded_file,
|
699 |
-
inputs=[welch_file_upload, welch_has_header],
|
700 |
-
outputs=[welch_loaded_dataframe, welch_data_preview]
|
701 |
-
)
|
702 |
|
703 |
-
|
704 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
705 |
inputs=[
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
welch_alternative,
|
711 |
-
welch_alpha,
|
712 |
-
welch_effect_thresholds
|
713 |
],
|
714 |
-
outputs=
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
730 |
)
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
- **p-value**: Likelihood of result given the null hypothesis (default significance threshold is 0.05).
|
735 |
-
- **Cohen's d**: Measure of effect size (default effect thresholds are 0.2, 0.5 and 0.8 for small, medium and large effect sizes).
|
736 |
-
- **t-statistic**: Quantifies how many standard errors the mean difference is from zero.
|
737 |
-
- **Degrees of freedom**: Student's uses pooled df, Welch's uses adjusted df for unequal variances.
|
738 |
-
|
739 |
-
### When to Use Which Test
|
740 |
-
- **Student's t-test**: Use when you can confidently assume equal variances between groups.
|
741 |
-
- **Welch's t-test**: Use when variances might be unequal, or as a safer default choice.
|
742 |
-
""")
|
743 |
-
|
744 |
-
return demo
|
745 |
|
746 |
-
# Main execution
|
747 |
if __name__ == "__main__":
|
748 |
-
demo = create_t_test_interface()
|
749 |
demo.launch(mcp_server=True)
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
|
|
3 |
from scipy import stats
|
4 |
+
from typing import List, Dict, Any, Union, Tuple
|
5 |
+
import json
|
6 |
|
7 |
+
def independent_t_test(group1: str, group2: str, equal_var: bool = True, alternative: str = "two-sided") -> Dict[str, Any]:
|
8 |
"""
|
9 |
+
Perform an independent samples t-test between two groups.
|
10 |
|
11 |
Args:
|
12 |
+
group1 (str): Comma-separated values for group 1 (e.g., "1.2,2.3,3.4,2.1")
|
13 |
+
group2 (str): Comma-separated values for group 2 (e.g., "2.1,3.2,4.1,3.5")
|
14 |
+
equal_var (bool): If True, perform standard t-test assuming equal variances. If False, perform Welch's t-test
|
15 |
+
alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
Returns:
|
18 |
+
dict: Test results including t-statistic, p-value, degrees of freedom, and interpretation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"""
|
20 |
try:
|
21 |
+
# Parse input data
|
22 |
+
data1 = [float(x.strip()) for x in group1.split(',') if x.strip()]
|
23 |
+
data2 = [float(x.strip()) for x in group2.split(',') if x.strip()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
if len(data1) < 2 or len(data2) < 2:
|
26 |
+
return {"error": "Each group must have at least 2 observations"}
|
27 |
|
28 |
+
# Perform t-test
|
29 |
+
t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=equal_var, alternative=alternative)
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# Calculate descriptive statistics
|
32 |
desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
|
33 |
desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
|
34 |
|
35 |
+
# Degrees of freedom
|
36 |
+
if equal_var:
|
37 |
+
df = len(data1) + len(data2) - 2
|
38 |
+
else:
|
39 |
+
# Welch's formula for unequal variances
|
40 |
+
s1_sq, s2_sq = desc1["std"]**2, desc2["std"]**2
|
41 |
+
n1, n2 = desc1["n"], desc2["n"]
|
42 |
+
df = (s1_sq/n1 + s2_sq/n2)**2 / ((s1_sq/n1)**2/(n1-1) + (s2_sq/n2)**2/(n2-1))
|
43 |
|
44 |
+
# Effect size (Cohen's d)
|
45 |
+
pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / (len(data1)+len(data2)-2))
|
|
|
46 |
cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std
|
47 |
|
48 |
+
# Interpretation
|
49 |
+
significance = "significant" if p_value < 0.05 else "not significant"
|
50 |
+
effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
return {
|
53 |
+
"test_type": f"Independent t-test ({'equal variances' if equal_var else 'unequal variances'})",
|
54 |
+
"t_statistic": round(t_stat, 4),
|
55 |
+
"p_value": round(p_value, 6),
|
56 |
+
"degrees_of_freedom": round(df, 2),
|
57 |
+
"cohens_d": round(cohens_d, 4),
|
|
|
58 |
"group1_stats": desc1,
|
59 |
"group2_stats": desc2,
|
60 |
+
"result": f"The difference between groups is {significance} (p = {p_value:.6f})",
|
61 |
+
"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
|
62 |
+
"alternative_hypothesis": alternative
|
|
|
|
|
|
|
63 |
}
|
|
|
64 |
except Exception as e:
|
65 |
+
return {"error": f"Error performing t-test: {str(e)}"}
|
66 |
|
67 |
+
def paired_t_test(before: str, after: str, alternative: str = "two-sided") -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
"""
|
69 |
+
Perform a paired samples t-test.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
Args:
|
72 |
+
before (str): Comma-separated values for before condition
|
73 |
+
after (str): Comma-separated values for after condition
|
74 |
+
alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
Returns:
|
77 |
+
dict: Test results including t-statistic, p-value, and interpretation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
"""
|
79 |
try:
|
80 |
+
# Parse input data
|
81 |
+
data_before = [float(x.strip()) for x in before.split(',') if x.strip()]
|
82 |
+
data_after = [float(x.strip()) for x in after.split(',') if x.strip()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
+
if len(data_before) != len(data_after):
|
85 |
+
return {"error": "Before and after groups must have the same number of observations"}
|
|
|
86 |
|
87 |
+
if len(data_before) < 2:
|
88 |
+
return {"error": "Need at least 2 paired observations"}
|
89 |
|
90 |
+
# Perform paired t-test
|
91 |
+
t_stat, p_value = stats.ttest_rel(data_before, data_after, alternative=alternative)
|
|
|
|
|
92 |
|
93 |
+
# Calculate differences and descriptive statistics
|
94 |
+
differences = np.array(data_after) - np.array(data_before)
|
95 |
+
mean_diff = np.mean(differences)
|
96 |
+
std_diff = np.std(differences, ddof=1)
|
97 |
|
98 |
+
# Effect size (Cohen's d for paired samples)
|
99 |
+
cohens_d = mean_diff / std_diff
|
|
|
100 |
|
101 |
+
# Degrees of freedom
|
102 |
+
df = len(data_before) - 1
|
103 |
|
104 |
+
# Interpretation
|
105 |
+
significance = "significant" if p_value < 0.05 else "not significant"
|
106 |
+
effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
return {
|
109 |
+
"test_type": "Paired t-test",
|
110 |
+
"t_statistic": round(t_stat, 4),
|
111 |
+
"p_value": round(p_value, 6),
|
112 |
"degrees_of_freedom": df,
|
113 |
+
"mean_difference": round(mean_diff, 4),
|
114 |
+
"std_difference": round(std_diff, 4),
|
115 |
+
"cohens_d": round(cohens_d, 4),
|
116 |
+
"before_mean": round(np.mean(data_before), 4),
|
117 |
+
"after_mean": round(np.mean(data_after), 4),
|
118 |
+
"result": f"The paired difference is {significance} (p = {p_value:.6f})",
|
119 |
+
"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
|
120 |
+
"alternative_hypothesis": alternative
|
|
|
|
|
121 |
}
|
|
|
122 |
except Exception as e:
|
123 |
+
return {"error": f"Error performing paired t-test: {str(e)}"}
|
124 |
|
125 |
+
def one_sample_t_test(sample: str, population_mean: float, alternative: str = "two-sided") -> Dict[str, Any]:
|
126 |
+
"""
|
127 |
+
Perform a one-sample t-test against a population mean.
|
128 |
+
|
129 |
+
Args:
|
130 |
+
sample (str): Comma-separated sample values
|
131 |
+
population_mean (float): Hypothesized population mean
|
132 |
+
alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
|
133 |
|
134 |
+
Returns:
|
135 |
+
dict: Test results including t-statistic, p-value, and interpretation
|
136 |
+
"""
|
137 |
try:
|
138 |
+
# Parse input data
|
139 |
+
data = [float(x.strip()) for x in sample.split(',') if x.strip()]
|
140 |
|
141 |
+
if len(data) < 2:
|
142 |
+
return {"error": "Sample must have at least 2 observations"}
|
143 |
+
|
144 |
+
# Perform one-sample t-test
|
145 |
+
t_stat, p_value = stats.ttest_1samp(data, population_mean, alternative=alternative)
|
146 |
+
|
147 |
+
# Calculate descriptive statistics
|
148 |
+
sample_mean = np.mean(data)
|
149 |
+
sample_std = np.std(data, ddof=1)
|
150 |
+
sample_size = len(data)
|
151 |
+
|
152 |
+
# Effect size (Cohen's d)
|
153 |
+
cohens_d = (sample_mean - population_mean) / sample_std
|
154 |
+
|
155 |
+
# Degrees of freedom
|
156 |
+
df = sample_size - 1
|
157 |
+
|
158 |
+
# Interpretation
|
159 |
+
significance = "significant" if p_value < 0.05 else "not significant"
|
160 |
+
effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
|
161 |
+
|
162 |
+
return {
|
163 |
+
"test_type": "One-sample t-test",
|
164 |
+
"t_statistic": round(t_stat, 4),
|
165 |
+
"p_value": round(p_value, 6),
|
166 |
+
"degrees_of_freedom": df,
|
167 |
+
"sample_mean": round(sample_mean, 4),
|
168 |
+
"population_mean": population_mean,
|
169 |
+
"sample_std": round(sample_std, 4),
|
170 |
+
"sample_size": sample_size,
|
171 |
+
"cohens_d": round(cohens_d, 4),
|
172 |
+
"result": f"Sample mean differs {significance}ly from population mean (p = {p_value:.6f})",
|
173 |
+
"effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
|
174 |
+
"alternative_hypothesis": alternative
|
175 |
+
}
|
176 |
except Exception as e:
|
177 |
+
return {"error": f"Error performing one-sample t-test: {str(e)}"}
|
|
|
178 |
|
179 |
+
def one_way_anova(*groups: str) -> Dict[str, Any]:
|
180 |
+
"""
|
181 |
+
Perform a one-way ANOVA test.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
|
183 |
+
Args:
|
184 |
+
*groups: Variable number of comma-separated group values (minimum 2 groups)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
+
Returns:
|
187 |
+
dict: ANOVA results including F-statistic, p-value, and interpretation
|
188 |
+
"""
|
189 |
+
try:
|
190 |
+
# Parse input data
|
191 |
+
parsed_groups = []
|
192 |
+
for i, group in enumerate(groups):
|
193 |
+
if not group.strip():
|
194 |
+
continue
|
195 |
+
data = [float(x.strip()) for x in group.split(',') if x.strip()]
|
196 |
+
if len(data) < 2:
|
197 |
+
return {"error": f"Group {i+1} must have at least 2 observations"}
|
198 |
+
parsed_groups.append(data)
|
199 |
+
|
200 |
+
if len(parsed_groups) < 2:
|
201 |
+
return {"error": "Need at least 2 groups for ANOVA"}
|
202 |
+
|
203 |
+
# Perform one-way ANOVA
|
204 |
+
f_stat, p_value = stats.f_oneway(*parsed_groups)
|
205 |
+
|
206 |
+
# Calculate descriptive statistics for each group
|
207 |
+
group_stats = []
|
208 |
+
overall_data = []
|
209 |
+
for i, group in enumerate(parsed_groups):
|
210 |
+
group_stats.append({
|
211 |
+
"group": i+1,
|
212 |
+
"n": len(group),
|
213 |
+
"mean": round(np.mean(group), 4),
|
214 |
+
"std": round(np.std(group, ddof=1), 4)
|
215 |
+
})
|
216 |
+
overall_data.extend(group)
|
217 |
+
|
218 |
+
# Calculate effect size (eta-squared)
|
219 |
+
# SS_between / SS_total
|
220 |
+
overall_mean = np.mean(overall_data)
|
221 |
+
ss_total = sum((x - overall_mean)**2 for x in overall_data)
|
222 |
+
ss_between = sum(len(group) * (np.mean(group) - overall_mean)**2 for group in parsed_groups)
|
223 |
+
eta_squared = ss_between / ss_total if ss_total > 0 else 0
|
224 |
+
|
225 |
+
# Degrees of freedom
|
226 |
+
df_between = len(parsed_groups) - 1
|
227 |
+
df_within = len(overall_data) - len(parsed_groups)
|
228 |
+
|
229 |
+
# Interpretation
|
230 |
+
significance = "significant" if p_value < 0.05 else "not significant"
|
231 |
+
effect_size_interp = "small" if eta_squared < 0.06 else "medium" if eta_squared < 0.14 else "large"
|
232 |
+
|
233 |
+
return {
|
234 |
+
"test_type": "One-way ANOVA",
|
235 |
+
"f_statistic": round(f_stat, 4),
|
236 |
+
"p_value": round(p_value, 6),
|
237 |
+
"df_between": df_between,
|
238 |
+
"df_within": df_within,
|
239 |
+
"eta_squared": round(eta_squared, 4),
|
240 |
+
"group_statistics": group_stats,
|
241 |
+
"result": f"Group differences are {significance} (p = {p_value:.6f})",
|
242 |
+
"effect_size": f"Effect size (η² = {eta_squared:.4f}) is {effect_size_interp}",
|
243 |
+
"note": "If significant, consider post-hoc tests to identify specific group differences"
|
244 |
+
}
|
245 |
+
except Exception as e:
|
246 |
+
return {"error": f"Error performing ANOVA: {str(e)}"}
|
247 |
|
248 |
+
def chi_square_test(observed: str, expected: str = None) -> Dict[str, Any]:
|
249 |
+
"""
|
250 |
+
Perform a chi-square goodness of fit test.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
+
Args:
|
253 |
+
observed (str): Comma-separated observed frequencies
|
254 |
+
expected (str): Comma-separated expected frequencies (optional, defaults to equal distribution)
|
|
|
255 |
|
256 |
+
Returns:
|
257 |
+
dict: Chi-square test results
|
258 |
+
"""
|
259 |
+
try:
|
260 |
+
# Parse observed frequencies
|
261 |
+
obs_data = [float(x.strip()) for x in observed.split(',') if x.strip()]
|
262 |
+
|
263 |
+
# Parse expected frequencies or create equal distribution
|
264 |
+
if expected and expected.strip():
|
265 |
+
exp_data = [float(x.strip()) for x in expected.split(',') if x.strip()]
|
266 |
+
if len(obs_data) != len(exp_data):
|
267 |
+
return {"error": "Observed and expected must have the same number of categories"}
|
268 |
+
else:
|
269 |
+
# Equal distribution
|
270 |
+
total = sum(obs_data)
|
271 |
+
exp_data = [total / len(obs_data)] * len(obs_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
|
273 |
+
# Perform chi-square test
|
274 |
+
chi2_stat, p_value = stats.chisquare(obs_data, exp_data)
|
|
|
|
|
|
|
275 |
|
276 |
+
# Degrees of freedom
|
277 |
+
df = len(obs_data) - 1
|
|
|
|
|
|
|
278 |
|
279 |
+
# Effect size (Cramér's V for goodness of fit)
|
280 |
+
n = sum(obs_data)
|
281 |
+
cramers_v = np.sqrt(chi2_stat / (n * (len(obs_data) - 1)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
|
283 |
+
# Interpretation
|
284 |
+
significance = "significant" if p_value < 0.05 else "not significant"
|
285 |
+
effect_size_interp = "small" if cramers_v < 0.3 else "medium" if cramers_v < 0.5 else "large"
|
|
|
|
|
|
|
|
|
|
|
286 |
|
287 |
+
return {
|
288 |
+
"test_type": "Chi-square goodness of fit test",
|
289 |
+
"chi_square_statistic": round(chi2_stat, 4),
|
290 |
+
"p_value": round(p_value, 6),
|
291 |
+
"degrees_of_freedom": df,
|
292 |
+
"cramers_v": round(cramers_v, 4),
|
293 |
+
"observed_frequencies": obs_data,
|
294 |
+
"expected_frequencies": [round(x, 2) for x in exp_data],
|
295 |
+
"result": f"Observed frequencies differ {significance}ly from expected (p = {p_value:.6f})",
|
296 |
+
"effect_size": f"Effect size (Cramér's V = {cramers_v:.4f}) is {effect_size_interp}"
|
297 |
+
}
|
298 |
+
except Exception as e:
|
299 |
+
return {"error": f"Error performing chi-square test: {str(e)}"}
|
300 |
+
|
301 |
+
def correlation_test(x_values: str, y_values: str, method: str = "pearson") -> Dict[str, Any]:
|
302 |
+
"""
|
303 |
+
Perform correlation analysis between two variables.
|
304 |
+
|
305 |
+
Args:
|
306 |
+
x_values (str): Comma-separated X variable values
|
307 |
+
y_values (str): Comma-separated Y variable values
|
308 |
+
method (str): Correlation method - 'pearson', 'spearman', or 'kendall'
|
309 |
+
|
310 |
+
Returns:
|
311 |
+
dict: Correlation results including coefficient and p-value
|
312 |
+
"""
|
313 |
+
try:
|
314 |
+
# Parse input data
|
315 |
+
x_data = [float(x.strip()) for x in x_values.split(',') if x.strip()]
|
316 |
+
y_data = [float(y.strip()) for y in y_values.split(',') if y.strip()]
|
317 |
+
|
318 |
+
if len(x_data) != len(y_data):
|
319 |
+
return {"error": "X and Y variables must have the same number of observations"}
|
320 |
+
|
321 |
+
if len(x_data) < 3:
|
322 |
+
return {"error": "Need at least 3 observations for correlation"}
|
323 |
+
|
324 |
+
# Perform correlation test
|
325 |
+
if method.lower() == "pearson":
|
326 |
+
corr_coef, p_value = stats.pearsonr(x_data, y_data)
|
327 |
+
test_name = "Pearson correlation"
|
328 |
+
elif method.lower() == "spearman":
|
329 |
+
corr_coef, p_value = stats.spearmanr(x_data, y_data)
|
330 |
+
test_name = "Spearman rank correlation"
|
331 |
+
elif method.lower() == "kendall":
|
332 |
+
corr_coef, p_value = stats.kendalltau(x_data, y_data)
|
333 |
+
test_name = "Kendall's tau correlation"
|
334 |
+
else:
|
335 |
+
return {"error": "Method must be 'pearson', 'spearman', or 'kendall'"}
|
336 |
|
337 |
+
# Interpretation
|
338 |
+
significance = "significant" if p_value < 0.05 else "not significant"
|
|
|
|
|
|
|
|
|
339 |
|
340 |
+
# Correlation strength interpretation
|
341 |
+
abs_corr = abs(corr_coef)
|
342 |
+
if abs_corr < 0.3:
|
343 |
+
strength = "weak"
|
344 |
+
elif abs_corr < 0.7:
|
345 |
+
strength = "moderate"
|
346 |
+
else:
|
347 |
+
strength = "strong"
|
348 |
|
349 |
+
direction = "positive" if corr_coef > 0 else "negative"
|
|
|
|
|
|
|
|
|
350 |
|
351 |
+
return {
|
352 |
+
"test_type": test_name,
|
353 |
+
"correlation_coefficient": round(corr_coef, 4),
|
354 |
+
"p_value": round(p_value, 6),
|
355 |
+
"sample_size": len(x_data),
|
356 |
+
"result": f"The correlation is {significance} (p = {p_value:.6f})",
|
357 |
+
"interpretation": f"{strength.title()} {direction} correlation (r = {corr_coef:.4f})",
|
358 |
+
"method": method.lower()
|
359 |
+
}
|
360 |
+
except Exception as e:
|
361 |
+
return {"error": f"Error performing correlation test: {str(e)}"}
|
362 |
+
|
363 |
+
# Create Gradio interfaces for each function
|
364 |
+
demo = gr.TabbedInterface(
|
365 |
+
[
|
366 |
+
gr.Interface(
|
367 |
+
fn=independent_t_test,
|
368 |
inputs=[
|
369 |
+
gr.Textbox(placeholder="1.2,2.3,3.4,2.1", label="Group 1 (comma-separated)"),
|
370 |
+
gr.Textbox(placeholder="2.1,3.2,4.1,3.5", label="Group 2 (comma-separated)"),
|
371 |
+
gr.Checkbox(value=True, label="Equal variances"),
|
372 |
+
gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
|
|
|
|
|
|
|
373 |
],
|
374 |
+
outputs=gr.JSON(),
|
375 |
+
title="Independent T-Test",
|
376 |
+
description="Compare means between two independent groups"
|
377 |
+
),
|
378 |
+
gr.Interface(
|
379 |
+
fn=paired_t_test,
|
380 |
+
inputs=[
|
381 |
+
gr.Textbox(placeholder="10,12,11,13", label="Before (comma-separated)"),
|
382 |
+
gr.Textbox(placeholder="12,14,13,15", label="After (comma-separated)"),
|
383 |
+
gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
|
384 |
+
],
|
385 |
+
outputs=gr.JSON(),
|
386 |
+
title="Paired T-Test",
|
387 |
+
description="Compare paired/matched samples"
|
388 |
+
),
|
389 |
+
gr.Interface(
|
390 |
+
fn=one_sample_t_test,
|
391 |
+
inputs=[
|
392 |
+
gr.Textbox(placeholder="10,12,11,13,9", label="Sample (comma-separated)"),
|
393 |
+
gr.Number(value=10, label="Population mean"),
|
394 |
+
gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
|
395 |
+
],
|
396 |
+
outputs=gr.JSON(),
|
397 |
+
title="One-Sample T-Test",
|
398 |
+
description="Test sample mean against population mean"
|
399 |
+
),
|
400 |
+
gr.Interface(
|
401 |
+
fn=one_way_anova,
|
402 |
+
inputs=[
|
403 |
+
gr.Textbox(placeholder="1,2,3,2", label="Group 1 (comma-separated)"),
|
404 |
+
gr.Textbox(placeholder="4,5,6,5", label="Group 2 (comma-separated)"),
|
405 |
+
gr.Textbox(placeholder="7,8,9,8", label="Group 3 (comma-separated)", info="Optional"),
|
406 |
+
gr.Textbox(placeholder="", label="Group 4 (comma-separated)", info="Optional"),
|
407 |
+
gr.Textbox(placeholder="", label="Group 5 (comma-separated)", info="Optional")
|
408 |
+
],
|
409 |
+
outputs=gr.JSON(),
|
410 |
+
title="One-Way ANOVA",
|
411 |
+
description="Compare means across multiple groups"
|
412 |
+
),
|
413 |
+
gr.Interface(
|
414 |
+
fn=chi_square_test,
|
415 |
+
inputs=[
|
416 |
+
gr.Textbox(placeholder="10,20,15,25", label="Observed frequencies (comma-separated)"),
|
417 |
+
gr.Textbox(placeholder="", label="Expected frequencies (optional, comma-separated)")
|
418 |
+
],
|
419 |
+
outputs=gr.JSON(),
|
420 |
+
title="Chi-Square Test",
|
421 |
+
description="Test goodness of fit for categorical data"
|
422 |
+
),
|
423 |
+
gr.Interface(
|
424 |
+
fn=correlation_test,
|
425 |
+
inputs=[
|
426 |
+
gr.Textbox(placeholder="1,2,3,4,5", label="X values (comma-separated)"),
|
427 |
+
gr.Textbox(placeholder="2,4,6,8,10", label="Y values (comma-separated)"),
|
428 |
+
gr.Dropdown(["pearson", "spearman", "kendall"], value="pearson", label="Correlation method")
|
429 |
+
],
|
430 |
+
outputs=gr.JSON(),
|
431 |
+
title="Correlation Analysis",
|
432 |
+
description="Test correlation between two variables"
|
433 |
)
|
434 |
+
],
|
435 |
+
tab_names=["Independent T-Test", "Paired T-Test", "One-Sample T-Test", "ANOVA", "Chi-Square", "Correlation"]
|
436 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
|
|
|
438 |
if __name__ == "__main__":
|
|
|
439 |
demo.launch(mcp_server=True)
|