znacer commited on
Commit
aafd0ea
·
1 Parent(s): c00a51c

anomaly detection: iforest

Browse files
Files changed (4) hide show
  1. app/analyzer.py +75 -0
  2. app/service.py +52 -18
  3. pyproject.toml +1 -0
  4. uv.lock +86 -0
app/analyzer.py CHANGED
@@ -8,6 +8,7 @@ from typing import Any
8
  import numpy as np
9
  import polars as pl
10
  from loguru import logger
 
11
  from sqlalchemy import Engine, create_engine, text
12
 
13
  from data.get_mock import get_df
@@ -232,3 +233,77 @@ class TimeSeriesAnalyzer:
232
  "end": timestamps[-1] if len(timestamps) > 0 else None,
233
  },
234
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import numpy as np
9
  import polars as pl
10
  from loguru import logger
11
+ from sklearn.ensemble import IsolationForest
12
  from sqlalchemy import Engine, create_engine, text
13
 
14
  from data.get_mock import get_df
 
233
  "end": timestamps[-1] if len(timestamps) > 0 else None,
234
  },
235
  }
236
+
237
+ def detect_anomalies_isolation_forest(
238
+ self, data: pl.DataFrame, contamination: float = 0.1
239
+ ) -> dict[str, Any]:
240
+ """Detect anomalies in the time series data using Isolation Forest algorithm.
241
+
242
+ Args:
243
+ data: expect only 1 timeserie with columns datetime and value
244
+ contamination: expected proportion of anomalies in the data (default: 0.1)
245
+
246
+ Returns:
247
+ {
248
+ "anomalies_found": int,
249
+ "anomalies": list[dict[str, int]],
250
+ "statistics": {
251
+ "mean": float,
252
+ "std": float,
253
+ "min": float,
254
+ "max": float
255
+ }
256
+
257
+ """
258
+ values = data["value"].to_numpy().reshape(-1, 1)
259
+
260
+ iso_forest = IsolationForest(
261
+ contamination=contamination, random_state=42, n_estimators=100
262
+ )
263
+
264
+ # Predict anomalies (-1 for anomalies, 1 for normal)
265
+ predictions = iso_forest.fit_predict(values)
266
+
267
+ anomaly_scores = -iso_forest.score_samples(values)
268
+
269
+ anomaly_mask = predictions == -1
270
+
271
+ mean_val = data["value"].mean()
272
+ std_val = data["value"].std()
273
+
274
+ logger.debug(f"anaomaly_mask: {anomaly_mask}")
275
+ logger.debug(f"anomaly_scores: {anomaly_scores}")
276
+
277
+ logger.debug(
278
+ pl.Series(anomaly_scores)
279
+ .filter(anomaly_mask)
280
+ .alias("anomaly_score"),
281
+ )
282
+ # Prepare anomalies data
283
+ anomalies = (
284
+ data.select(
285
+ data["timestamp"].cast(pl.Utf8).alias("timestamp"),
286
+ data["value"].cast(pl.Float64),
287
+ pl.Series(anomaly_scores).alias("anomaly_score"),
288
+ pl.Series(anomaly_scores > np.percentile(anomaly_scores, 90))
289
+ .cast(pl.Utf8)
290
+ .alias("severity")
291
+ .map_elements(
292
+ lambda x: "high" if x else "medium",
293
+ return_dtype=pl.String,
294
+ ),
295
+ )
296
+ .filter(anomaly_mask)
297
+ .to_dicts()
298
+ )
299
+ logger.debug(f"anomalies: {anomalies}")
300
+ return {
301
+ "anomalies_found": len(anomalies),
302
+ "anomalies": anomalies,
303
+ "statistics": {
304
+ "mean": mean_val,
305
+ "std": std_val,
306
+ "min": data["value"].min(),
307
+ "max": data["value"].max(),
308
+ },
309
+ }
app/service.py CHANGED
@@ -58,14 +58,41 @@ def query_timeseries(sensor_id: str, start_time: str, end_time: str) -> str:
58
 
59
 
60
  def detect_anomalies(
61
- sensor_id: str, start_time: str, end_time: str, threshold: float = 2.0
 
 
 
 
 
62
  ) -> str:
63
- """Detect anomalies in the time series data for a specific sensor."""
64
- logger.info("detect anomalies")
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  if not ensure_analyzer_connected():
66
  return json.dumps({"error": "Database not connected"})
67
  data = analyzer.query_metrics(sensor_id, start_time, end_time)
68
- anomalies = analyzer.detect_anomalies(data, threshold)
 
 
 
 
 
 
 
 
 
69
  return json.dumps(anomalies, indent=2)
70
 
71
 
@@ -103,25 +130,25 @@ def generate_analysis_report(
103
  include_anomalies: bool = True,
104
  include_trends: bool = True,
105
  user_question: str | None = None,
 
 
 
106
  ) -> str:
107
  """Generate a comprehensive analysis report for a specific sensor.
108
 
109
- This function creates a detailed report containing information about the sensor data,
110
- including optional trend analysis and anomaly detection sections based on the parameters.
111
-
112
  Args:
113
- sensor_id (str): The identifier of the sensor to analyze.
114
- start_time (str): The start time of the analysis period in ISO format.
115
- end_time (str): The end time of the analysis period in ISO format.
116
- include_anomalies (bool, optional): Whether to include anomaly detection in the report. Defaults to True.
117
- include_trends (bool, optional): Whether to include trend analysis in the report. Defaults to True.
118
- user_question (str | None, optional): A specific question from the user to be included in the report. Defaults to None.
 
 
 
119
 
120
  Returns:
121
- str: A formatted string containing the comprehensive analysis report with requested sections.
122
-
123
- Note:
124
- Returns an error message if the database is not connected.
125
 
126
  """
127
  logger.info("generate report")
@@ -148,8 +175,15 @@ def generate_analysis_report(
148
  )
149
  report_sections.append(f"- **End Value:** {trends['end_value']:.2f}")
150
  if include_anomalies:
151
- anomalies = analyzer.detect_anomalies(data)
 
 
 
 
 
 
152
  report_sections.append("\n### Anomaly Detection")
 
153
  report_sections.append(
154
  f"- **Anomalies Found:** {anomalies['anomalies_found']}"
155
  )
 
58
 
59
 
60
  def detect_anomalies(
61
+ sensor_id: str,
62
+ start_time: str,
63
+ end_time: str,
64
+ threshold: float = 2.0,
65
+ algorithm: str = "zscore",
66
+ contamination: float = 0.1,
67
  ) -> str:
68
+ """Detect anomalies in the time series data for a specific sensor.
69
+
70
+ Args:
71
+ sensor_id: The identifier of the sensor to analyze
72
+ start_time: The start time of the analysis period in ISO format
73
+ end_time: The end time of the analysis period in ISO format
74
+ threshold: Threshold for z-score based detection (default: 2.0)
75
+ algorithm: Algorithm to use for detection ("zscore" or "isolation_forest")
76
+ contamination: Expected proportion of anomalies for isolation forest (default: 0.1)
77
+
78
+ Returns:
79
+ str: JSON string containing anomaly detection results
80
+
81
+ """
82
+ logger.info(f"detect anomalies using {algorithm}")
83
  if not ensure_analyzer_connected():
84
  return json.dumps({"error": "Database not connected"})
85
  data = analyzer.query_metrics(sensor_id, start_time, end_time)
86
+
87
+ if algorithm == "zscore":
88
+ anomalies = analyzer.detect_anomalies(data, threshold)
89
+ elif algorithm == "isolation_forest":
90
+ anomalies = analyzer.detect_anomalies_isolation_forest(
91
+ data, contamination
92
+ )
93
+ else:
94
+ return json.dumps({"error": f"Unknown algorithm: {algorithm}"})
95
+
96
  return json.dumps(anomalies, indent=2)
97
 
98
 
 
130
  include_anomalies: bool = True,
131
  include_trends: bool = True,
132
  user_question: str | None = None,
133
+ anomaly_algorithm: str = "zscore",
134
+ anomaly_threshold: float = 2.0,
135
+ anomaly_contamination: float = 0.1,
136
  ) -> str:
137
  """Generate a comprehensive analysis report for a specific sensor.
138
 
 
 
 
139
  Args:
140
+ sensor_id: The identifier of the sensor to analyze
141
+ start_time: The start time of the analysis period in ISO format
142
+ end_time: The end time of the analysis period in ISO format
143
+ include_anomalies: Whether to include anomaly detection in the report
144
+ include_trends: Whether to include trend analysis in the report
145
+ user_question: A specific question from the user to be included in the report
146
+ anomaly_algorithm: Algorithm to use for anomaly detection ("zscore" or "isolation_forest")
147
+ anomaly_threshold: Threshold for z-score based detection
148
+ anomaly_contamination: Expected proportion of anomalies for isolation forest
149
 
150
  Returns:
151
+ str: A formatted string containing the comprehensive analysis report
 
 
 
152
 
153
  """
154
  logger.info("generate report")
 
175
  )
176
  report_sections.append(f"- **End Value:** {trends['end_value']:.2f}")
177
  if include_anomalies:
178
+ if anomaly_algorithm == "zscore":
179
+ anomalies = analyzer.detect_anomalies(data, anomaly_threshold)
180
+ else:
181
+ anomalies = analyzer.detect_anomalies_isolation_forest(
182
+ data, anomaly_contamination
183
+ )
184
+
185
  report_sections.append("\n### Anomaly Detection")
186
+ report_sections.append(f"- **Algorithm:** {anomaly_algorithm}")
187
  report_sections.append(
188
  f"- **Anomalies Found:** {anomalies['anomalies_found']}"
189
  )
pyproject.toml CHANGED
@@ -13,6 +13,7 @@ dependencies = [
13
  "polars>=1.30.0",
14
  "psycopg2-binary>=2.9.10",
15
  "pyarrow>=20.0.0",
 
16
  "sqlalchemy>=2.0.41",
17
  ]
18
 
 
13
  "polars>=1.30.0",
14
  "psycopg2-binary>=2.9.10",
15
  "pyarrow>=20.0.0",
16
+ "scikit-learn>=1.7.0",
17
  "sqlalchemy>=2.0.41",
18
  ]
19
 
uv.lock CHANGED
@@ -415,6 +415,15 @@ wheels = [
415
  { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
416
  ]
417
 
 
 
 
 
 
 
 
 
 
418
  [[package]]
419
  name = "loguru"
420
  version = "0.7.3"
@@ -966,6 +975,72 @@ wheels = [
966
  { url = "https://files.pythonhosted.org/packages/4d/c0/1108ad9f01567f66b3154063605b350b69c3c9366732e09e45f9fd0d1deb/safehttpx-0.1.6-py3-none-any.whl", hash = "sha256:407cff0b410b071623087c63dd2080c3b44dc076888d8c5823c00d1e58cb381c", size = 8692, upload-time = "2024-12-02T18:44:08.555Z" },
967
  ]
968
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
969
  [[package]]
970
  name = "semantic-version"
971
  version = "2.10.0"
@@ -1055,6 +1130,15 @@ wheels = [
1055
  { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload-time = "2025-04-13T13:56:16.21Z" },
1056
  ]
1057
 
 
 
 
 
 
 
 
 
 
1058
  [[package]]
1059
  name = "tomlkit"
1060
  version = "0.13.2"
@@ -1089,6 +1173,7 @@ dependencies = [
1089
  { name = "polars" },
1090
  { name = "psycopg2-binary" },
1091
  { name = "pyarrow" },
 
1092
  { name = "sqlalchemy" },
1093
  ]
1094
 
@@ -1102,6 +1187,7 @@ requires-dist = [
1102
  { name = "polars", specifier = ">=1.30.0" },
1103
  { name = "psycopg2-binary", specifier = ">=2.9.10" },
1104
  { name = "pyarrow", specifier = ">=20.0.0" },
 
1105
  { name = "sqlalchemy", specifier = ">=2.0.41" },
1106
  ]
1107
 
 
415
  { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
416
  ]
417
 
418
+ [[package]]
419
+ name = "joblib"
420
+ version = "1.5.1"
421
+ source = { registry = "https://pypi.org/simple" }
422
+ sdist = { url = "https://files.pythonhosted.org/packages/dc/fe/0f5a938c54105553436dbff7a61dc4fed4b1b2c98852f8833beaf4d5968f/joblib-1.5.1.tar.gz", hash = "sha256:f4f86e351f39fe3d0d32a9f2c3d8af1ee4cec285aafcb27003dda5205576b444", size = 330475, upload-time = "2025-05-23T12:04:37.097Z" }
423
+ wheels = [
424
+ { url = "https://files.pythonhosted.org/packages/7d/4f/1195bbac8e0c2acc5f740661631d8d750dc38d4a32b23ee5df3cde6f4e0d/joblib-1.5.1-py3-none-any.whl", hash = "sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a", size = 307746, upload-time = "2025-05-23T12:04:35.124Z" },
425
+ ]
426
+
427
  [[package]]
428
  name = "loguru"
429
  version = "0.7.3"
 
975
  { url = "https://files.pythonhosted.org/packages/4d/c0/1108ad9f01567f66b3154063605b350b69c3c9366732e09e45f9fd0d1deb/safehttpx-0.1.6-py3-none-any.whl", hash = "sha256:407cff0b410b071623087c63dd2080c3b44dc076888d8c5823c00d1e58cb381c", size = 8692, upload-time = "2024-12-02T18:44:08.555Z" },
976
  ]
977
 
978
+ [[package]]
979
+ name = "scikit-learn"
980
+ version = "1.7.0"
981
+ source = { registry = "https://pypi.org/simple" }
982
+ dependencies = [
983
+ { name = "joblib" },
984
+ { name = "numpy" },
985
+ { name = "scipy" },
986
+ { name = "threadpoolctl" },
987
+ ]
988
+ sdist = { url = "https://files.pythonhosted.org/packages/df/3b/29fa87e76b1d7b3b77cc1fcbe82e6e6b8cd704410705b008822de530277c/scikit_learn-1.7.0.tar.gz", hash = "sha256:c01e869b15aec88e2cdb73d27f15bdbe03bce8e2fb43afbe77c45d399e73a5a3", size = 7178217, upload-time = "2025-06-05T22:02:46.703Z" }
989
+ wheels = [
990
+ { url = "https://files.pythonhosted.org/packages/70/3a/bffab14e974a665a3ee2d79766e7389572ffcaad941a246931c824afcdb2/scikit_learn-1.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c2c7243d34aaede0efca7a5a96d67fddaebb4ad7e14a70991b9abee9dc5c0379", size = 11646758, upload-time = "2025-06-05T22:02:09.51Z" },
991
+ { url = "https://files.pythonhosted.org/packages/58/d8/f3249232fa79a70cb40595282813e61453c1e76da3e1a44b77a63dd8d0cb/scikit_learn-1.7.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f39f6a811bf3f15177b66c82cbe0d7b1ebad9f190737dcdef77cfca1ea3c19c", size = 10673971, upload-time = "2025-06-05T22:02:12.217Z" },
992
+ { url = "https://files.pythonhosted.org/packages/67/93/eb14c50533bea2f77758abe7d60a10057e5f2e2cdcf0a75a14c6bc19c734/scikit_learn-1.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63017a5f9a74963d24aac7590287149a8d0f1a0799bbe7173c0d8ba1523293c0", size = 11818428, upload-time = "2025-06-05T22:02:14.947Z" },
993
+ { url = "https://files.pythonhosted.org/packages/08/17/804cc13b22a8663564bb0b55fb89e661a577e4e88a61a39740d58b909efe/scikit_learn-1.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b2f8a0b1e73e9a08b7cc498bb2aeab36cdc1f571f8ab2b35c6e5d1c7115d97d", size = 12505887, upload-time = "2025-06-05T22:02:17.824Z" },
994
+ { url = "https://files.pythonhosted.org/packages/68/c7/4e956281a077f4835458c3f9656c666300282d5199039f26d9de1dabd9be/scikit_learn-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:34cc8d9d010d29fb2b7cbcd5ccc24ffdd80515f65fe9f1e4894ace36b267ce19", size = 10668129, upload-time = "2025-06-05T22:02:20.536Z" },
995
+ { url = "https://files.pythonhosted.org/packages/9a/c3/a85dcccdaf1e807e6f067fa95788a6485b0491d9ea44fd4c812050d04f45/scikit_learn-1.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5b7974f1f32bc586c90145df51130e02267e4b7e77cab76165c76cf43faca0d9", size = 11559841, upload-time = "2025-06-05T22:02:23.308Z" },
996
+ { url = "https://files.pythonhosted.org/packages/d8/57/eea0de1562cc52d3196eae51a68c5736a31949a465f0b6bb3579b2d80282/scikit_learn-1.7.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:014e07a23fe02e65f9392898143c542a50b6001dbe89cb867e19688e468d049b", size = 10616463, upload-time = "2025-06-05T22:02:26.068Z" },
997
+ { url = "https://files.pythonhosted.org/packages/10/a4/39717ca669296dfc3a62928393168da88ac9d8cbec88b6321ffa62c6776f/scikit_learn-1.7.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7e7ced20582d3a5516fb6f405fd1d254e1f5ce712bfef2589f51326af6346e8", size = 11766512, upload-time = "2025-06-05T22:02:28.689Z" },
998
+ { url = "https://files.pythonhosted.org/packages/d5/cd/a19722241d5f7b51e08351e1e82453e0057aeb7621b17805f31fcb57bb6c/scikit_learn-1.7.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1babf2511e6ffd695da7a983b4e4d6de45dce39577b26b721610711081850906", size = 12461075, upload-time = "2025-06-05T22:02:31.233Z" },
999
+ { url = "https://files.pythonhosted.org/packages/f3/bc/282514272815c827a9acacbe5b99f4f1a4bc5961053719d319480aee0812/scikit_learn-1.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:5abd2acff939d5bd4701283f009b01496832d50ddafa83c90125a4e41c33e314", size = 10652517, upload-time = "2025-06-05T22:02:34.139Z" },
1000
+ { url = "https://files.pythonhosted.org/packages/ea/78/7357d12b2e4c6674175f9a09a3ba10498cde8340e622715bcc71e532981d/scikit_learn-1.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e39d95a929b112047c25b775035c8c234c5ca67e681ce60d12413afb501129f7", size = 12111822, upload-time = "2025-06-05T22:02:36.904Z" },
1001
+ { url = "https://files.pythonhosted.org/packages/d0/0c/9c3715393343f04232f9d81fe540eb3831d0b4ec351135a145855295110f/scikit_learn-1.7.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:0521cb460426c56fee7e07f9365b0f45ec8ca7b2d696534ac98bfb85e7ae4775", size = 11325286, upload-time = "2025-06-05T22:02:39.739Z" },
1002
+ { url = "https://files.pythonhosted.org/packages/64/e0/42282ad3dd70b7c1a5f65c412ac3841f6543502a8d6263cae7b466612dc9/scikit_learn-1.7.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:317ca9f83acbde2883bd6bb27116a741bfcb371369706b4f9973cf30e9a03b0d", size = 12380865, upload-time = "2025-06-05T22:02:42.137Z" },
1003
+ { url = "https://files.pythonhosted.org/packages/4e/d0/3ef4ab2c6be4aa910445cd09c5ef0b44512e3de2cfb2112a88bb647d2cf7/scikit_learn-1.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:126c09740a6f016e815ab985b21e3a0656835414521c81fc1a8da78b679bdb75", size = 11549609, upload-time = "2025-06-05T22:02:44.483Z" },
1004
+ ]
1005
+
1006
+ [[package]]
1007
+ name = "scipy"
1008
+ version = "1.15.3"
1009
+ source = { registry = "https://pypi.org/simple" }
1010
+ dependencies = [
1011
+ { name = "numpy" },
1012
+ ]
1013
+ sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" }
1014
+ wheels = [
1015
+ { url = "https://files.pythonhosted.org/packages/37/4b/683aa044c4162e10ed7a7ea30527f2cbd92e6999c10a8ed8edb253836e9c/scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019", size = 38766735, upload-time = "2025-05-08T16:06:06.471Z" },
1016
+ { url = "https://files.pythonhosted.org/packages/7b/7e/f30be3d03de07f25dc0ec926d1681fed5c732d759ac8f51079708c79e680/scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6", size = 30173284, upload-time = "2025-05-08T16:06:11.686Z" },
1017
+ { url = "https://files.pythonhosted.org/packages/07/9c/0ddb0d0abdabe0d181c1793db51f02cd59e4901da6f9f7848e1f96759f0d/scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477", size = 22446958, upload-time = "2025-05-08T16:06:15.97Z" },
1018
+ { url = "https://files.pythonhosted.org/packages/af/43/0bce905a965f36c58ff80d8bea33f1f9351b05fad4beaad4eae34699b7a1/scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c", size = 25242454, upload-time = "2025-05-08T16:06:20.394Z" },
1019
+ { url = "https://files.pythonhosted.org/packages/56/30/a6f08f84ee5b7b28b4c597aca4cbe545535c39fe911845a96414700b64ba/scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45", size = 35210199, upload-time = "2025-05-08T16:06:26.159Z" },
1020
+ { url = "https://files.pythonhosted.org/packages/0b/1f/03f52c282437a168ee2c7c14a1a0d0781a9a4a8962d84ac05c06b4c5b555/scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49", size = 37309455, upload-time = "2025-05-08T16:06:32.778Z" },
1021
+ { url = "https://files.pythonhosted.org/packages/89/b1/fbb53137f42c4bf630b1ffdfc2151a62d1d1b903b249f030d2b1c0280af8/scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e", size = 36885140, upload-time = "2025-05-08T16:06:39.249Z" },
1022
+ { url = "https://files.pythonhosted.org/packages/2e/2e/025e39e339f5090df1ff266d021892694dbb7e63568edcfe43f892fa381d/scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539", size = 39710549, upload-time = "2025-05-08T16:06:45.729Z" },
1023
+ { url = "https://files.pythonhosted.org/packages/e6/eb/3bf6ea8ab7f1503dca3a10df2e4b9c3f6b3316df07f6c0ded94b281c7101/scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed", size = 40966184, upload-time = "2025-05-08T16:06:52.623Z" },
1024
+ { url = "https://files.pythonhosted.org/packages/73/18/ec27848c9baae6e0d6573eda6e01a602e5649ee72c27c3a8aad673ebecfd/scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759", size = 38728256, upload-time = "2025-05-08T16:06:58.696Z" },
1025
+ { url = "https://files.pythonhosted.org/packages/74/cd/1aef2184948728b4b6e21267d53b3339762c285a46a274ebb7863c9e4742/scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62", size = 30109540, upload-time = "2025-05-08T16:07:04.209Z" },
1026
+ { url = "https://files.pythonhosted.org/packages/5b/d8/59e452c0a255ec352bd0a833537a3bc1bfb679944c4938ab375b0a6b3a3e/scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb", size = 22383115, upload-time = "2025-05-08T16:07:08.998Z" },
1027
+ { url = "https://files.pythonhosted.org/packages/08/f5/456f56bbbfccf696263b47095291040655e3cbaf05d063bdc7c7517f32ac/scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730", size = 25163884, upload-time = "2025-05-08T16:07:14.091Z" },
1028
+ { url = "https://files.pythonhosted.org/packages/a2/66/a9618b6a435a0f0c0b8a6d0a2efb32d4ec5a85f023c2b79d39512040355b/scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825", size = 35174018, upload-time = "2025-05-08T16:07:19.427Z" },
1029
+ { url = "https://files.pythonhosted.org/packages/b5/09/c5b6734a50ad4882432b6bb7c02baf757f5b2f256041da5df242e2d7e6b6/scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7", size = 37269716, upload-time = "2025-05-08T16:07:25.712Z" },
1030
+ { url = "https://files.pythonhosted.org/packages/77/0a/eac00ff741f23bcabd352731ed9b8995a0a60ef57f5fd788d611d43d69a1/scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11", size = 36872342, upload-time = "2025-05-08T16:07:31.468Z" },
1031
+ { url = "https://files.pythonhosted.org/packages/fe/54/4379be86dd74b6ad81551689107360d9a3e18f24d20767a2d5b9253a3f0a/scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126", size = 39670869, upload-time = "2025-05-08T16:07:38.002Z" },
1032
+ { url = "https://files.pythonhosted.org/packages/87/2e/892ad2862ba54f084ffe8cc4a22667eaf9c2bcec6d2bff1d15713c6c0703/scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163", size = 40988851, upload-time = "2025-05-08T16:08:33.671Z" },
1033
+ { url = "https://files.pythonhosted.org/packages/1b/e9/7a879c137f7e55b30d75d90ce3eb468197646bc7b443ac036ae3fe109055/scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8", size = 38863011, upload-time = "2025-05-08T16:07:44.039Z" },
1034
+ { url = "https://files.pythonhosted.org/packages/51/d1/226a806bbd69f62ce5ef5f3ffadc35286e9fbc802f606a07eb83bf2359de/scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5", size = 30266407, upload-time = "2025-05-08T16:07:49.891Z" },
1035
+ { url = "https://files.pythonhosted.org/packages/e5/9b/f32d1d6093ab9eeabbd839b0f7619c62e46cc4b7b6dbf05b6e615bbd4400/scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e", size = 22540030, upload-time = "2025-05-08T16:07:54.121Z" },
1036
+ { url = "https://files.pythonhosted.org/packages/e7/29/c278f699b095c1a884f29fda126340fcc201461ee8bfea5c8bdb1c7c958b/scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb", size = 25218709, upload-time = "2025-05-08T16:07:58.506Z" },
1037
+ { url = "https://files.pythonhosted.org/packages/24/18/9e5374b617aba742a990581373cd6b68a2945d65cc588482749ef2e64467/scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723", size = 34809045, upload-time = "2025-05-08T16:08:03.929Z" },
1038
+ { url = "https://files.pythonhosted.org/packages/e1/fe/9c4361e7ba2927074360856db6135ef4904d505e9b3afbbcb073c4008328/scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb", size = 36703062, upload-time = "2025-05-08T16:08:09.558Z" },
1039
+ { url = "https://files.pythonhosted.org/packages/b7/8e/038ccfe29d272b30086b25a4960f757f97122cb2ec42e62b460d02fe98e9/scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4", size = 36393132, upload-time = "2025-05-08T16:08:15.34Z" },
1040
+ { url = "https://files.pythonhosted.org/packages/10/7e/5c12285452970be5bdbe8352c619250b97ebf7917d7a9a9e96b8a8140f17/scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5", size = 38979503, upload-time = "2025-05-08T16:08:21.513Z" },
1041
+ { url = "https://files.pythonhosted.org/packages/81/06/0a5e5349474e1cbc5757975b21bd4fad0e72ebf138c5592f191646154e06/scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca", size = 40308097, upload-time = "2025-05-08T16:08:27.627Z" },
1042
+ ]
1043
+
1044
  [[package]]
1045
  name = "semantic-version"
1046
  version = "2.10.0"
 
1130
  { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload-time = "2025-04-13T13:56:16.21Z" },
1131
  ]
1132
 
1133
+ [[package]]
1134
+ name = "threadpoolctl"
1135
+ version = "3.6.0"
1136
+ source = { registry = "https://pypi.org/simple" }
1137
+ sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" }
1138
+ wheels = [
1139
+ { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" },
1140
+ ]
1141
+
1142
  [[package]]
1143
  name = "tomlkit"
1144
  version = "0.13.2"
 
1173
  { name = "polars" },
1174
  { name = "psycopg2-binary" },
1175
  { name = "pyarrow" },
1176
+ { name = "scikit-learn" },
1177
  { name = "sqlalchemy" },
1178
  ]
1179
 
 
1187
  { name = "polars", specifier = ">=1.30.0" },
1188
  { name = "psycopg2-binary", specifier = ">=2.9.10" },
1189
  { name = "pyarrow", specifier = ">=20.0.0" },
1190
+ { name = "scikit-learn", specifier = ">=1.7.0" },
1191
  { name = "sqlalchemy", specifier = ">=2.0.41" },
1192
  ]
1193