Spaces:
Runtime error
Runtime error
first iteration
Browse files- .gitignore +13 -0
- app/__init__.py +0 -0
- app/analyzer.py +234 -0
- app/service.py +169 -0
- app/ui.py +90 -0
- data/__init__.py +0 -0
- data/get_mock.py +23 -0
- main.py +22 -0
- pyproject.toml +50 -0
- requirements.txt +262 -0
- uv.lock +0 -0
.gitignore
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python-generated files
|
2 |
+
__pycache__/
|
3 |
+
*.py[oc]
|
4 |
+
build/
|
5 |
+
dist/
|
6 |
+
wheels/
|
7 |
+
*.egg-info
|
8 |
+
|
9 |
+
# Virtual environments
|
10 |
+
.venv
|
11 |
+
|
12 |
+
# sqlite files
|
13 |
+
*.db
|
app/__init__.py
ADDED
File without changes
|
app/analyzer.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Module that defines the TimeSeriesAnalyzer object."""
|
2 |
+
|
3 |
+
import os
|
4 |
+
from dataclasses import dataclass
|
5 |
+
from datetime import datetime
|
6 |
+
from typing import Any
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
import polars as pl
|
10 |
+
from loguru import logger
|
11 |
+
from sqlalchemy import Engine, create_engine, text
|
12 |
+
|
13 |
+
from data.get_mock import get_df
|
14 |
+
|
15 |
+
|
16 |
+
@dataclass
|
17 |
+
class TimeSeriesConfig:
|
18 |
+
"""Object with the database connections details.
|
19 |
+
|
20 |
+
Attributes:
|
21 |
+
host: address of the database
|
22 |
+
port: port of the database
|
23 |
+
database: name of the database
|
24 |
+
username: username of the database
|
25 |
+
password: password of the database
|
26 |
+
|
27 |
+
"""
|
28 |
+
|
29 |
+
host: str
|
30 |
+
port: int
|
31 |
+
database: str
|
32 |
+
username: str
|
33 |
+
password: str
|
34 |
+
|
35 |
+
|
36 |
+
class TimeSeriesAnalyzer:
|
37 |
+
"""Handle connections details, and how to compute insights.
|
38 |
+
|
39 |
+
Attributes:
|
40 |
+
use_mock_db: if True, databased if mocked.
|
41 |
+
connection: connection engine
|
42 |
+
|
43 |
+
"""
|
44 |
+
|
45 |
+
def __init__(self):
|
46 |
+
self.use_mock_db = os.getenv("USE_MOCK_DB", True)
|
47 |
+
if not self.use_mock_db:
|
48 |
+
self.config = TimeSeriesConfig(
|
49 |
+
host=os.getenv("DB_HOST", "localhost"),
|
50 |
+
port=int(os.getenv("DB_PORT", 5432)),
|
51 |
+
database=os.getenv("DB_NAME", "data"),
|
52 |
+
username=os.getenv("DB_USER", "postgres"),
|
53 |
+
password=os.getenv("DB_PASS", "secretpassword"),
|
54 |
+
)
|
55 |
+
self.connection: Engine
|
56 |
+
|
57 |
+
def connect(self):
|
58 |
+
"""Instantiate the database engine."""
|
59 |
+
if self.use_mock_db:
|
60 |
+
logger.info("Connecting to mock SQLite database")
|
61 |
+
self.connection = create_engine("sqlite:///mock.db")
|
62 |
+
self._setup_mock_db()
|
63 |
+
else:
|
64 |
+
logger.info(
|
65 |
+
f"Connecting to TimescaleDB at {self.config.host}:{self.config.port}"
|
66 |
+
)
|
67 |
+
self.connection = create_engine(
|
68 |
+
f"postgresql+psycopg2://{self.config.username}:{self.config.password}@{self.config.host}:{self.config.port}/{self.config.database}"
|
69 |
+
)
|
70 |
+
logger.info("Connected to database!")
|
71 |
+
|
72 |
+
def _setup_mock_db(self):
|
73 |
+
df = get_df()
|
74 |
+
if os.path.exists("./mock.db"):
|
75 |
+
return
|
76 |
+
logger.info(
|
77 |
+
f"""df shape: {df.shape}, size: {df.estimated_size("kb"):,.3f}kb"""
|
78 |
+
)
|
79 |
+
logger.debug(df.head(5))
|
80 |
+
|
81 |
+
with self.connection.connect() as conn:
|
82 |
+
df.write_database(
|
83 |
+
"timeseries_data",
|
84 |
+
conn,
|
85 |
+
if_table_exists="replace",
|
86 |
+
engine_options={},
|
87 |
+
)
|
88 |
+
|
89 |
+
def _ensure_connected(self):
|
90 |
+
if not self.connection:
|
91 |
+
self.connect()
|
92 |
+
|
93 |
+
def get_available_metrics(self) -> list[str]:
|
94 |
+
"""Get the list of sensor_id.
|
95 |
+
|
96 |
+
Returns:
|
97 |
+
list of sensors
|
98 |
+
|
99 |
+
"""
|
100 |
+
self._ensure_connected()
|
101 |
+
sql = "SELECT DISTINCT sensor_id FROM timeseries_data ORDER BY sensor_id ASC"
|
102 |
+
with self.connection.connect() as conn:
|
103 |
+
rows = conn.execute(text(sql))
|
104 |
+
return [r[0] for r in rows]
|
105 |
+
|
106 |
+
def query_metrics(
|
107 |
+
self,
|
108 |
+
sensor_id: str,
|
109 |
+
start_time: str,
|
110 |
+
end_time: str,
|
111 |
+
) -> pl.DataFrame:
|
112 |
+
"""Run a select query of 1 time serie.
|
113 |
+
|
114 |
+
Args:
|
115 |
+
sensor_id: id of the sensor
|
116 |
+
start_time: iso datetime
|
117 |
+
end_time: iso datetime
|
118 |
+
|
119 |
+
Returns:
|
120 |
+
The expected time serie as a polar DataFrame.
|
121 |
+
|
122 |
+
"""
|
123 |
+
self._ensure_connected()
|
124 |
+
start_dt = datetime.fromisoformat(start_time)
|
125 |
+
end_dt = datetime.fromisoformat(end_time)
|
126 |
+
|
127 |
+
query = f"""SELECT timestamp, value FROM timeseries_data
|
128 |
+
WHERE sensor_id = '{sensor_id}' AND timestamp >= '{start_dt}' AND timestamp <= '{end_dt}'
|
129 |
+
ORDER BY timestamp ASC"""
|
130 |
+
with self.connection.connect() as conn:
|
131 |
+
df = pl.read_database(query, conn)
|
132 |
+
return df
|
133 |
+
|
134 |
+
def detect_anomalies(
|
135 |
+
self, data: pl.DataFrame, threshold: float = 1.0
|
136 |
+
) -> dict[str, Any]:
|
137 |
+
"""Detect anomalies in the time series data for a specific sensor.
|
138 |
+
|
139 |
+
Args:
|
140 |
+
data: expect only 1 timeserie with columns datetime and value
|
141 |
+
threshold: default to 1.0
|
142 |
+
|
143 |
+
Returns:
|
144 |
+
{
|
145 |
+
"anomalies_found": int,
|
146 |
+
"anomalies": list[dict[str, int]],
|
147 |
+
"statistics": {
|
148 |
+
"mean": float,
|
149 |
+
"std": float,
|
150 |
+
"min": float,
|
151 |
+
"max": float
|
152 |
+
},
|
153 |
+
|
154 |
+
"""
|
155 |
+
mean_val = data["value"].mean()
|
156 |
+
std_val = data["value"].std()
|
157 |
+
data = data.with_columns(
|
158 |
+
((data["value"] - mean_val).abs() / std_val).alias("z_score")
|
159 |
+
)
|
160 |
+
|
161 |
+
anomalies = (
|
162 |
+
data.filter(data["z_score"] > threshold)
|
163 |
+
.select(
|
164 |
+
[
|
165 |
+
data["timestamp"].cast(pl.Utf8).alias("timestamp"),
|
166 |
+
data["value"].cast(pl.Float64),
|
167 |
+
data["z_score"].cast(pl.Float64).alias("z_score"),
|
168 |
+
(data["z_score"] > 2.0)
|
169 |
+
.cast(pl.Utf8)
|
170 |
+
.alias("severity")
|
171 |
+
.map_elements(
|
172 |
+
lambda x: "high" if x else "medium",
|
173 |
+
return_dtype=pl.String,
|
174 |
+
),
|
175 |
+
]
|
176 |
+
)
|
177 |
+
.to_dicts()
|
178 |
+
)
|
179 |
+
return {
|
180 |
+
"anomalies_found": len(anomalies),
|
181 |
+
"anomalies": anomalies,
|
182 |
+
"statistics": {
|
183 |
+
"mean": mean_val,
|
184 |
+
"std": std_val,
|
185 |
+
"min": data["value"].min(),
|
186 |
+
"max": data["value"].max(),
|
187 |
+
},
|
188 |
+
}
|
189 |
+
|
190 |
+
def calculate_trends(self, data: pl.DataFrame) -> dict[str, Any]:
|
191 |
+
"""Calculate trend information such as trend direction and percentage change.
|
192 |
+
|
193 |
+
Args:
|
194 |
+
data: expect only 1 timeserie with columns datetime and value
|
195 |
+
|
196 |
+
Returns:
|
197 |
+
{
|
198 |
+
"trend_direction": Literal["increasing", "decreasing", "stable"],
|
199 |
+
"trend_slope": float,
|
200 |
+
"percentage_change": float,
|
201 |
+
"start_value": float,
|
202 |
+
"end_value": float,
|
203 |
+
"time_period": {
|
204 |
+
"start": datetime,
|
205 |
+
"end": datetime,
|
206 |
+
},
|
207 |
+
}
|
208 |
+
|
209 |
+
"""
|
210 |
+
values = data["value"]
|
211 |
+
timestamps = data["timestamp"]
|
212 |
+
x = np.arange(len(values))
|
213 |
+
coeffs = np.polyfit(x, values, 1)
|
214 |
+
trend_slope = coeffs[0]
|
215 |
+
pct_change = (
|
216 |
+
((values[-1] - values[0]) / values[0]) * 100
|
217 |
+
if len(values) > 1
|
218 |
+
else 0
|
219 |
+
)
|
220 |
+
return {
|
221 |
+
"trend_direction": "increasing"
|
222 |
+
if trend_slope > 0
|
223 |
+
else "decreasing"
|
224 |
+
if trend_slope < 0
|
225 |
+
else "stable",
|
226 |
+
"trend_slope": float(trend_slope),
|
227 |
+
"percentage_change": float(pct_change),
|
228 |
+
"start_value": float(values[0]) if len(values) > 0 else 0,
|
229 |
+
"end_value": float(values[-1]) if len(values) > 0 else 0,
|
230 |
+
"time_period": {
|
231 |
+
"start": timestamps[0] if len(timestamps) > 0 else None,
|
232 |
+
"end": timestamps[-1] if len(timestamps) > 0 else None,
|
233 |
+
},
|
234 |
+
}
|
app/service.py
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Functions meant to be exposed."""
|
2 |
+
|
3 |
+
import json
|
4 |
+
import threading
|
5 |
+
|
6 |
+
from loguru import logger
|
7 |
+
|
8 |
+
from app.analyzer import TimeSeriesAnalyzer
|
9 |
+
|
10 |
+
analyzer: TimeSeriesAnalyzer
|
11 |
+
analyzer_lock = threading.Lock()
|
12 |
+
|
13 |
+
|
14 |
+
def ensure_analyzer_connected() -> bool:
|
15 |
+
"""Check if the analyzer is connected and log the status."""
|
16 |
+
global analyzer
|
17 |
+
try:
|
18 |
+
if analyzer:
|
19 |
+
pass
|
20 |
+
except NameError:
|
21 |
+
connect_database()
|
22 |
+
return True
|
23 |
+
|
24 |
+
|
25 |
+
def connect_database() -> str:
|
26 |
+
"""Connect to the TimescaleDB and initialize the analyzer."""
|
27 |
+
global analyzer
|
28 |
+
with analyzer_lock:
|
29 |
+
analyzer = TimeSeriesAnalyzer()
|
30 |
+
analyzer.connect()
|
31 |
+
return "Successfully connected to database"
|
32 |
+
|
33 |
+
|
34 |
+
def list_available_metrics() -> str:
|
35 |
+
"""List all available metrics from the connected database."""
|
36 |
+
if not ensure_analyzer_connected():
|
37 |
+
return json.dumps({"error": "Database not connected"})
|
38 |
+
metrics = analyzer.get_available_metrics()
|
39 |
+
return json.dumps({"available_metrics": metrics}, indent=2)
|
40 |
+
|
41 |
+
|
42 |
+
def query_timeseries(sensor_id: str, start_time: str, end_time: str) -> str:
|
43 |
+
"""Query time series data for a specific sensor within a time range."""
|
44 |
+
logger.info("query timeseries")
|
45 |
+
if not ensure_analyzer_connected():
|
46 |
+
return json.dumps({"error": "Database not connected"})
|
47 |
+
data = analyzer.query_metrics(sensor_id, start_time, end_time)
|
48 |
+
result = {
|
49 |
+
"sensor_id": sensor_id,
|
50 |
+
"data_points": len(data),
|
51 |
+
"time_range": {
|
52 |
+
"start": data["timestamp"].min() if not data.is_empty() else None,
|
53 |
+
"end": data["timestamp"].max() if not data.is_empty else None,
|
54 |
+
},
|
55 |
+
"sample_data": data.head(10).to_dicts(),
|
56 |
+
}
|
57 |
+
return json.dumps(result, indent=2, default=str)
|
58 |
+
|
59 |
+
|
60 |
+
def detect_anomalies(
|
61 |
+
sensor_id: str, start_time: str, end_time: str, threshold: float = 2.0
|
62 |
+
) -> str:
|
63 |
+
"""Detect anomalies in the time series data for a specific sensor."""
|
64 |
+
logger.info("detect anomalies")
|
65 |
+
if not ensure_analyzer_connected():
|
66 |
+
return json.dumps({"error": "Database not connected"})
|
67 |
+
data = analyzer.query_metrics(sensor_id, start_time, end_time)
|
68 |
+
anomalies = analyzer.detect_anomalies(data, threshold)
|
69 |
+
return json.dumps(anomalies, indent=2)
|
70 |
+
|
71 |
+
|
72 |
+
def analyze_trends(sensor_id: str, start_time: str, end_time: str) -> str:
|
73 |
+
"""Analyze trends in the time series data for a specific sensor.
|
74 |
+
|
75 |
+
This function retrieves time series data for the specified sensor within the given time range
|
76 |
+
and calculates trend information such as trend direction and percentage change.
|
77 |
+
|
78 |
+
Args:
|
79 |
+
sensor_id (str): The identifier of the sensor to analyze.
|
80 |
+
start_time (str): The start time of the analysis period in ISO format.
|
81 |
+
end_time (str): The end time of the analysis period in ISO format.
|
82 |
+
|
83 |
+
Returns:
|
84 |
+
str: A JSON string containing trend analysis results including trend direction,
|
85 |
+
percentage change, and start/end values.
|
86 |
+
|
87 |
+
Raises:
|
88 |
+
No direct exceptions, but returns a JSON error message if the database is not connected.
|
89 |
+
|
90 |
+
"""
|
91 |
+
logger.info("analyze trends")
|
92 |
+
if not ensure_analyzer_connected():
|
93 |
+
return json.dumps({"error": "Database not connected"})
|
94 |
+
data = analyzer.query_metrics(sensor_id, start_time, end_time)
|
95 |
+
trends = analyzer.calculate_trends(data)
|
96 |
+
return json.dumps(trends, indent=2)
|
97 |
+
|
98 |
+
|
99 |
+
def generate_analysis_report(
|
100 |
+
sensor_id: str,
|
101 |
+
start_time: str,
|
102 |
+
end_time: str,
|
103 |
+
include_anomalies: bool = True,
|
104 |
+
include_trends: bool = True,
|
105 |
+
user_question: str | None = None,
|
106 |
+
) -> str:
|
107 |
+
"""Generate a comprehensive analysis report for a specific sensor.
|
108 |
+
|
109 |
+
This function creates a detailed report containing information about the sensor data,
|
110 |
+
including optional trend analysis and anomaly detection sections based on the parameters.
|
111 |
+
|
112 |
+
Args:
|
113 |
+
sensor_id (str): The identifier of the sensor to analyze.
|
114 |
+
start_time (str): The start time of the analysis period in ISO format.
|
115 |
+
end_time (str): The end time of the analysis period in ISO format.
|
116 |
+
include_anomalies (bool, optional): Whether to include anomaly detection in the report. Defaults to True.
|
117 |
+
include_trends (bool, optional): Whether to include trend analysis in the report. Defaults to True.
|
118 |
+
user_question (str | None, optional): A specific question from the user to be included in the report. Defaults to None.
|
119 |
+
|
120 |
+
Returns:
|
121 |
+
str: A formatted string containing the comprehensive analysis report with requested sections.
|
122 |
+
|
123 |
+
Note:
|
124 |
+
Returns an error message if the database is not connected.
|
125 |
+
|
126 |
+
"""
|
127 |
+
logger.info("generate report")
|
128 |
+
if not ensure_analyzer_connected():
|
129 |
+
return "Error: Database not connected. Please connect first."
|
130 |
+
data = analyzer.query_metrics(sensor_id, start_time, end_time)
|
131 |
+
report_sections = []
|
132 |
+
report_sections.append(f"## Analysis Report for {sensor_id}")
|
133 |
+
report_sections.append(f"**Time Period:** {start_time} to {end_time}")
|
134 |
+
report_sections.append(f"**Data Points:** {len(data)}")
|
135 |
+
if user_question:
|
136 |
+
report_sections.append(f"**User Question:** {user_question}")
|
137 |
+
if include_trends:
|
138 |
+
trends = analyzer.calculate_trends(data)
|
139 |
+
report_sections.append("\n### Trend Analysis")
|
140 |
+
report_sections.append(
|
141 |
+
f"- **Direction:** {trends['trend_direction'].title()}"
|
142 |
+
)
|
143 |
+
report_sections.append(
|
144 |
+
f"- **Change:** {trends['percentage_change']:.2f}%"
|
145 |
+
)
|
146 |
+
report_sections.append(
|
147 |
+
f"- **Start Value:** {trends['start_value']:.2f}"
|
148 |
+
)
|
149 |
+
report_sections.append(f"- **End Value:** {trends['end_value']:.2f}")
|
150 |
+
if include_anomalies:
|
151 |
+
anomalies = analyzer.detect_anomalies(data)
|
152 |
+
report_sections.append("\n### Anomaly Detection")
|
153 |
+
report_sections.append(
|
154 |
+
f"- **Anomalies Found:** {anomalies['anomalies_found']}"
|
155 |
+
)
|
156 |
+
if anomalies["anomalies_found"] > 0:
|
157 |
+
report_sections.append("- **Notable Anomalies:**")
|
158 |
+
for anomaly in anomalies["anomalies"][:5]:
|
159 |
+
report_sections.append(
|
160 |
+
f" - {anomaly['timestamp']}: {anomaly['value']:.2f} (severity: {anomaly['severity']})"
|
161 |
+
)
|
162 |
+
stats = anomalies["statistics"]
|
163 |
+
report_sections.append("\n### Statistical Summary")
|
164 |
+
report_sections.append(f"- **Mean:** {stats['mean']:.2f}")
|
165 |
+
report_sections.append(f"- **Std Dev:** {stats['std']:.2f}")
|
166 |
+
report_sections.append(f"- **Min:** {stats['min']:.2f}")
|
167 |
+
report_sections.append(f"- **Max:** {stats['max']:.2f}")
|
168 |
+
full_report = "\n".join(report_sections)
|
169 |
+
return full_report
|
app/ui.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Definition of the Gradio UI."""
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
from app.service import (
|
6 |
+
analyze_trends,
|
7 |
+
connect_database,
|
8 |
+
detect_anomalies,
|
9 |
+
generate_analysis_report,
|
10 |
+
list_available_metrics,
|
11 |
+
query_timeseries,
|
12 |
+
)
|
13 |
+
|
14 |
+
example_sensor = "temperature"
|
15 |
+
example_start = "2019-06-15T02:54:00"
|
16 |
+
example_end = "2019-06-17T02:54:00"
|
17 |
+
|
18 |
+
with gr.Blocks() as demo:
|
19 |
+
gr.Markdown("# TimescaleDB Time Series Analyzer API (Gradio)")
|
20 |
+
with gr.Tab("Connect DB"):
|
21 |
+
connect_btn = gr.Button("Connect to TimescaleDB")
|
22 |
+
connect_out = gr.Textbox(label="Connection Result")
|
23 |
+
connect_btn.click(
|
24 |
+
fn=connect_database,
|
25 |
+
inputs=[],
|
26 |
+
outputs=connect_out,
|
27 |
+
)
|
28 |
+
with gr.Tab("List Metrics"):
|
29 |
+
list_btn = gr.Button("List Available Metrics")
|
30 |
+
list_out = gr.Textbox(label="Metrics")
|
31 |
+
list_btn.click(
|
32 |
+
fn=list_available_metrics,
|
33 |
+
inputs=[],
|
34 |
+
outputs=list_out,
|
35 |
+
)
|
36 |
+
with gr.Tab("Query Timeseries"):
|
37 |
+
sensor_id = gr.Textbox(label="Sensor ID", value=example_sensor)
|
38 |
+
start_time = gr.Textbox(label="Start Time (ISO)", value=example_start)
|
39 |
+
end_time = gr.Textbox(label="End Time (ISO)", value=example_end)
|
40 |
+
query_btn = gr.Button("Query")
|
41 |
+
query_out = gr.Textbox(label="Query Result")
|
42 |
+
query_btn.click(
|
43 |
+
fn=query_timeseries,
|
44 |
+
inputs=[sensor_id, start_time, end_time],
|
45 |
+
outputs=query_out,
|
46 |
+
)
|
47 |
+
with gr.Tab("Detect Anomalies"):
|
48 |
+
sensor_id2 = gr.Textbox(label="Sensor ID", value=example_sensor)
|
49 |
+
start_time2 = gr.Textbox(label="Start Time (ISO)", value=example_start)
|
50 |
+
end_time2 = gr.Textbox(label="End Time (ISO)", value=example_end)
|
51 |
+
threshold = gr.Number(label="Threshold", value=2.0)
|
52 |
+
anomaly_btn = gr.Button("Detect")
|
53 |
+
anomaly_out = gr.Textbox(label="Anomaly Result")
|
54 |
+
anomaly_btn.click(
|
55 |
+
fn=detect_anomalies,
|
56 |
+
inputs=[sensor_id2, start_time2, end_time2, threshold],
|
57 |
+
outputs=anomaly_out,
|
58 |
+
)
|
59 |
+
with gr.Tab("Analyze Trends"):
|
60 |
+
sensor_id3 = gr.Textbox(label="Sensor ID", value=example_sensor)
|
61 |
+
start_time3 = gr.Textbox(label="Start Time (ISO)", value=example_start)
|
62 |
+
end_time3 = gr.Textbox(label="End Time (ISO)", value=example_end)
|
63 |
+
trend_btn = gr.Button("Analyze")
|
64 |
+
trend_out = gr.Textbox(label="Trend Result")
|
65 |
+
trend_btn.click(
|
66 |
+
fn=analyze_trends,
|
67 |
+
inputs=[sensor_id3, start_time3, end_time3],
|
68 |
+
outputs=trend_out,
|
69 |
+
)
|
70 |
+
with gr.Tab("Generate Report"):
|
71 |
+
sensor_id4 = gr.Textbox(label="Sensor ID", value=example_sensor)
|
72 |
+
start_time4 = gr.Textbox(label="Start Time (ISO)", value=example_start)
|
73 |
+
end_time4 = gr.Textbox(label="End Time (ISO)", value=example_end)
|
74 |
+
include_anomalies = gr.Checkbox(label="Include Anomalies", value=True)
|
75 |
+
include_trends = gr.Checkbox(label="Include Trends", value=True)
|
76 |
+
user_question = gr.Textbox(label="User Question", value="")
|
77 |
+
report_btn = gr.Button("Generate Report")
|
78 |
+
report_out = gr.Markdown(label="Report")
|
79 |
+
report_btn.click(
|
80 |
+
fn=generate_analysis_report,
|
81 |
+
inputs=[
|
82 |
+
sensor_id4,
|
83 |
+
start_time4,
|
84 |
+
end_time4,
|
85 |
+
include_anomalies,
|
86 |
+
include_trends,
|
87 |
+
user_question,
|
88 |
+
],
|
89 |
+
outputs=report_out,
|
90 |
+
)
|
data/__init__.py
ADDED
File without changes
|
data/get_mock.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Module to mock a timeseries for demo and dev purpouses."""
|
2 |
+
|
3 |
+
import polars as pl
|
4 |
+
import polars.selectors as cs
|
5 |
+
|
6 |
+
|
7 |
+
def get_df() -> pl.DataFrame:
|
8 |
+
"""Retrieve a weather dataset from huggingface datasets.
|
9 |
+
|
10 |
+
Returns:
|
11 |
+
a dataframe with 3 columns: datetime, sensor_id, value
|
12 |
+
|
13 |
+
"""
|
14 |
+
raw_df = pl.read_parquet(
|
15 |
+
"hf://datasets/afeng/MTBench_weather_temperature/data/train-*.parquet"
|
16 |
+
)
|
17 |
+
|
18 |
+
df = raw_df.explode(*raw_df.columns).sample(fraction=0.01)
|
19 |
+
|
20 |
+
df = df.select(~cs.starts_with("precipitation"))
|
21 |
+
df = df.with_columns(pl.col("DATE").str.to_datetime())
|
22 |
+
df = df.unpivot(cs.numeric(), index="DATE")
|
23 |
+
return df.rename({"DATE": "timestamp", "variable": "sensor_id"})
|
main.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Module to launch the application.
|
2 |
+
|
3 |
+
run: `gradio main.py` to run the application
|
4 |
+
"""
|
5 |
+
|
6 |
+
import sys
|
7 |
+
|
8 |
+
from loguru import logger
|
9 |
+
|
10 |
+
from app.ui import demo
|
11 |
+
|
12 |
+
logger.add(
|
13 |
+
sys.stdout,
|
14 |
+
format="<green>{time}</green> <level>{message}</level>",
|
15 |
+
filter="my_module",
|
16 |
+
level="INFO",
|
17 |
+
colorize=True,
|
18 |
+
)
|
19 |
+
|
20 |
+
|
21 |
+
if __name__ == "__main__":
|
22 |
+
demo.launch(mcp_server=True)
|
pyproject.toml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "tscontext"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Add your description here"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"asyncpg>=0.30.0",
|
9 |
+
"gradio[mcp]>=5.32.1",
|
10 |
+
"ipykernel>=6.29.5",
|
11 |
+
"loguru>=0.7.3",
|
12 |
+
"numpy>=2.2.6",
|
13 |
+
"pandas>=2.2.3",
|
14 |
+
"polars>=1.30.0",
|
15 |
+
"psycopg2-binary>=2.9.10",
|
16 |
+
"pyarrow>=20.0.0",
|
17 |
+
"sqlalchemy>=2.0.41",
|
18 |
+
]
|
19 |
+
|
20 |
+
|
21 |
+
[tool.ruff]
|
22 |
+
include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]
|
23 |
+
extend-exclude = ["reports/**", "**/__init__.py"]
|
24 |
+
line-length = 80
|
25 |
+
|
26 |
+
[tool.ruff.lint]
|
27 |
+
select = [
|
28 |
+
"F", # pyflakes
|
29 |
+
"I", # isort
|
30 |
+
"ISC", # string literal concatenation.
|
31 |
+
"UP", # pyupgrade
|
32 |
+
"E", # pycodestyle
|
33 |
+
"W", # warning
|
34 |
+
"D", # pydocstyle
|
35 |
+
"NPY", # Numpy
|
36 |
+
"SIM101", # flake8-simplify
|
37 |
+
"FA", # future annotations
|
38 |
+
]
|
39 |
+
ignore = [
|
40 |
+
'D105',
|
41 |
+
'D107',
|
42 |
+
'D205',
|
43 |
+
'D203',
|
44 |
+
'D213',
|
45 |
+
'D415',
|
46 |
+
'E501',
|
47 |
+
'B011',
|
48 |
+
'B028',
|
49 |
+
'B904',
|
50 |
+
]
|
requirements.txt
ADDED
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file was autogenerated by uv via the following command:
|
2 |
+
# uv pip compile pyproject.toml -o requirements.txt
|
3 |
+
aiofiles==24.1.0
|
4 |
+
# via gradio
|
5 |
+
annotated-types==0.7.0
|
6 |
+
# via pydantic
|
7 |
+
anyio==4.9.0
|
8 |
+
# via
|
9 |
+
# gradio
|
10 |
+
# httpx
|
11 |
+
# mcp
|
12 |
+
# sse-starlette
|
13 |
+
# starlette
|
14 |
+
asttokens==3.0.0
|
15 |
+
# via stack-data
|
16 |
+
asyncpg==0.30.0
|
17 |
+
# via tscontext (pyproject.toml)
|
18 |
+
certifi==2025.4.26
|
19 |
+
# via
|
20 |
+
# httpcore
|
21 |
+
# httpx
|
22 |
+
# requests
|
23 |
+
charset-normalizer==3.4.2
|
24 |
+
# via requests
|
25 |
+
click==8.2.1
|
26 |
+
# via
|
27 |
+
# typer
|
28 |
+
# uvicorn
|
29 |
+
comm==0.2.2
|
30 |
+
# via ipykernel
|
31 |
+
debugpy==1.8.14
|
32 |
+
# via ipykernel
|
33 |
+
decorator==5.2.1
|
34 |
+
# via ipython
|
35 |
+
executing==2.2.0
|
36 |
+
# via stack-data
|
37 |
+
fastapi==0.115.12
|
38 |
+
# via gradio
|
39 |
+
ffmpy==0.6.0
|
40 |
+
# via gradio
|
41 |
+
filelock==3.18.0
|
42 |
+
# via huggingface-hub
|
43 |
+
fsspec==2025.5.1
|
44 |
+
# via
|
45 |
+
# gradio-client
|
46 |
+
# huggingface-hub
|
47 |
+
gradio==5.33.0
|
48 |
+
# via tscontext (pyproject.toml)
|
49 |
+
gradio-client==1.10.2
|
50 |
+
# via gradio
|
51 |
+
greenlet==3.2.2
|
52 |
+
# via sqlalchemy
|
53 |
+
groovy==0.1.2
|
54 |
+
# via gradio
|
55 |
+
h11==0.16.0
|
56 |
+
# via
|
57 |
+
# httpcore
|
58 |
+
# uvicorn
|
59 |
+
hf-xet==1.1.3
|
60 |
+
# via huggingface-hub
|
61 |
+
httpcore==1.0.9
|
62 |
+
# via httpx
|
63 |
+
httpx==0.28.1
|
64 |
+
# via
|
65 |
+
# gradio
|
66 |
+
# gradio-client
|
67 |
+
# mcp
|
68 |
+
# safehttpx
|
69 |
+
httpx-sse==0.4.0
|
70 |
+
# via mcp
|
71 |
+
huggingface-hub==0.32.4
|
72 |
+
# via
|
73 |
+
# gradio
|
74 |
+
# gradio-client
|
75 |
+
idna==3.10
|
76 |
+
# via
|
77 |
+
# anyio
|
78 |
+
# httpx
|
79 |
+
# requests
|
80 |
+
ipykernel==6.29.5
|
81 |
+
# via tscontext (pyproject.toml)
|
82 |
+
ipython==9.3.0
|
83 |
+
# via ipykernel
|
84 |
+
ipython-pygments-lexers==1.1.1
|
85 |
+
# via ipython
|
86 |
+
jedi==0.19.2
|
87 |
+
# via ipython
|
88 |
+
jinja2==3.1.6
|
89 |
+
# via gradio
|
90 |
+
jupyter-client==8.6.3
|
91 |
+
# via ipykernel
|
92 |
+
jupyter-core==5.8.1
|
93 |
+
# via
|
94 |
+
# ipykernel
|
95 |
+
# jupyter-client
|
96 |
+
loguru==0.7.3
|
97 |
+
# via tscontext (pyproject.toml)
|
98 |
+
markdown-it-py==3.0.0
|
99 |
+
# via rich
|
100 |
+
markupsafe==3.0.2
|
101 |
+
# via
|
102 |
+
# gradio
|
103 |
+
# jinja2
|
104 |
+
matplotlib-inline==0.1.7
|
105 |
+
# via
|
106 |
+
# ipykernel
|
107 |
+
# ipython
|
108 |
+
mcp==1.9.0
|
109 |
+
# via gradio
|
110 |
+
mdurl==0.1.2
|
111 |
+
# via markdown-it-py
|
112 |
+
nest-asyncio==1.6.0
|
113 |
+
# via ipykernel
|
114 |
+
numpy==2.2.6
|
115 |
+
# via
|
116 |
+
# tscontext (pyproject.toml)
|
117 |
+
# gradio
|
118 |
+
# pandas
|
119 |
+
orjson==3.10.18
|
120 |
+
# via gradio
|
121 |
+
packaging==25.0
|
122 |
+
# via
|
123 |
+
# gradio
|
124 |
+
# gradio-client
|
125 |
+
# huggingface-hub
|
126 |
+
# ipykernel
|
127 |
+
pandas==2.3.0
|
128 |
+
# via
|
129 |
+
# tscontext (pyproject.toml)
|
130 |
+
# gradio
|
131 |
+
parso==0.8.4
|
132 |
+
# via jedi
|
133 |
+
pexpect==4.9.0
|
134 |
+
# via ipython
|
135 |
+
pillow==11.2.1
|
136 |
+
# via gradio
|
137 |
+
platformdirs==4.3.8
|
138 |
+
# via jupyter-core
|
139 |
+
polars==1.30.0
|
140 |
+
# via tscontext (pyproject.toml)
|
141 |
+
prompt-toolkit==3.0.51
|
142 |
+
# via ipython
|
143 |
+
psutil==7.0.0
|
144 |
+
# via ipykernel
|
145 |
+
psycopg2-binary==2.9.10
|
146 |
+
# via tscontext (pyproject.toml)
|
147 |
+
ptyprocess==0.7.0
|
148 |
+
# via pexpect
|
149 |
+
pure-eval==0.2.3
|
150 |
+
# via stack-data
|
151 |
+
pyarrow==20.0.0
|
152 |
+
# via tscontext (pyproject.toml)
|
153 |
+
pydantic==2.11.5
|
154 |
+
# via
|
155 |
+
# fastapi
|
156 |
+
# gradio
|
157 |
+
# mcp
|
158 |
+
# pydantic-settings
|
159 |
+
pydantic-core==2.33.2
|
160 |
+
# via pydantic
|
161 |
+
pydantic-settings==2.9.1
|
162 |
+
# via mcp
|
163 |
+
pydub==0.25.1
|
164 |
+
# via gradio
|
165 |
+
pygments==2.19.1
|
166 |
+
# via
|
167 |
+
# ipython
|
168 |
+
# ipython-pygments-lexers
|
169 |
+
# rich
|
170 |
+
python-dateutil==2.9.0.post0
|
171 |
+
# via
|
172 |
+
# jupyter-client
|
173 |
+
# pandas
|
174 |
+
python-dotenv==1.1.0
|
175 |
+
# via pydantic-settings
|
176 |
+
python-multipart==0.0.20
|
177 |
+
# via
|
178 |
+
# gradio
|
179 |
+
# mcp
|
180 |
+
pytz==2025.2
|
181 |
+
# via pandas
|
182 |
+
pyyaml==6.0.2
|
183 |
+
# via
|
184 |
+
# gradio
|
185 |
+
# huggingface-hub
|
186 |
+
pyzmq==26.4.0
|
187 |
+
# via
|
188 |
+
# ipykernel
|
189 |
+
# jupyter-client
|
190 |
+
requests==2.32.3
|
191 |
+
# via huggingface-hub
|
192 |
+
rich==14.0.0
|
193 |
+
# via typer
|
194 |
+
ruff==0.11.12
|
195 |
+
# via gradio
|
196 |
+
safehttpx==0.1.6
|
197 |
+
# via gradio
|
198 |
+
semantic-version==2.10.0
|
199 |
+
# via gradio
|
200 |
+
shellingham==1.5.4
|
201 |
+
# via typer
|
202 |
+
six==1.17.0
|
203 |
+
# via python-dateutil
|
204 |
+
sniffio==1.3.1
|
205 |
+
# via anyio
|
206 |
+
sqlalchemy==2.0.41
|
207 |
+
# via tscontext (pyproject.toml)
|
208 |
+
sse-starlette==2.3.6
|
209 |
+
# via mcp
|
210 |
+
stack-data==0.6.3
|
211 |
+
# via ipython
|
212 |
+
starlette==0.46.2
|
213 |
+
# via
|
214 |
+
# fastapi
|
215 |
+
# gradio
|
216 |
+
# mcp
|
217 |
+
tomlkit==0.13.3
|
218 |
+
# via gradio
|
219 |
+
tornado==6.5.1
|
220 |
+
# via
|
221 |
+
# ipykernel
|
222 |
+
# jupyter-client
|
223 |
+
tqdm==4.67.1
|
224 |
+
# via huggingface-hub
|
225 |
+
traitlets==5.14.3
|
226 |
+
# via
|
227 |
+
# comm
|
228 |
+
# ipykernel
|
229 |
+
# ipython
|
230 |
+
# jupyter-client
|
231 |
+
# jupyter-core
|
232 |
+
# matplotlib-inline
|
233 |
+
typer==0.16.0
|
234 |
+
# via gradio
|
235 |
+
typing-extensions==4.14.0
|
236 |
+
# via
|
237 |
+
# anyio
|
238 |
+
# fastapi
|
239 |
+
# gradio
|
240 |
+
# gradio-client
|
241 |
+
# huggingface-hub
|
242 |
+
# pydantic
|
243 |
+
# pydantic-core
|
244 |
+
# sqlalchemy
|
245 |
+
# typer
|
246 |
+
# typing-inspection
|
247 |
+
typing-inspection==0.4.1
|
248 |
+
# via
|
249 |
+
# pydantic
|
250 |
+
# pydantic-settings
|
251 |
+
tzdata==2025.2
|
252 |
+
# via pandas
|
253 |
+
urllib3==2.4.0
|
254 |
+
# via requests
|
255 |
+
uvicorn==0.34.3
|
256 |
+
# via
|
257 |
+
# gradio
|
258 |
+
# mcp
|
259 |
+
wcwidth==0.2.13
|
260 |
+
# via prompt-toolkit
|
261 |
+
websockets==15.0.1
|
262 |
+
# via gradio-client
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|