"""Module to mock a timeseries for demo and dev purpouses.""" import polars as pl import polars.selectors as cs def get_df() -> pl.DataFrame: """Retrieve a weather dataset from huggingface datasets. Returns: a dataframe with 3 columns: datetime, sensor_id, value """ raw_df = pl.read_parquet( "hf://datasets/afeng/MTBench_weather_temperature/data/train-*.parquet" ) df = raw_df.explode(*raw_df.columns).sample(fraction=0.01) df = df.select(~cs.starts_with("precipitation")) df = df.with_columns(pl.col("DATE").str.to_datetime()) df = df.unpivot(cs.numeric(), index="DATE") return df.rename({"DATE": "timestamp", "variable": "sensor_id"})