mcp-tscontext / data /get_mock.py
znacer's picture
first iteration
0167724
raw
history blame contribute delete
701 Bytes
"""Module to mock a timeseries for demo and dev purpouses."""
import polars as pl
import polars.selectors as cs
def get_df() -> pl.DataFrame:
"""Retrieve a weather dataset from huggingface datasets.
Returns:
a dataframe with 3 columns: datetime, sensor_id, value
"""
raw_df = pl.read_parquet(
"hf://datasets/afeng/MTBench_weather_temperature/data/train-*.parquet"
)
df = raw_df.explode(*raw_df.columns).sample(fraction=0.01)
df = df.select(~cs.starts_with("precipitation"))
df = df.with_columns(pl.col("DATE").str.to_datetime())
df = df.unpivot(cs.numeric(), index="DATE")
return df.rename({"DATE": "timestamp", "variable": "sensor_id"})