File size: 701 Bytes
0167724
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
"""Module to mock a timeseries for demo and dev purpouses."""

import polars as pl
import polars.selectors as cs


def get_df() -> pl.DataFrame:
    """Retrieve a weather dataset from huggingface datasets.

    Returns:
        a dataframe with 3 columns: datetime, sensor_id, value

    """
    raw_df = pl.read_parquet(
        "hf://datasets/afeng/MTBench_weather_temperature/data/train-*.parquet"
    )

    df = raw_df.explode(*raw_df.columns).sample(fraction=0.01)

    df = df.select(~cs.starts_with("precipitation"))
    df = df.with_columns(pl.col("DATE").str.to_datetime())
    df = df.unpivot(cs.numeric(), index="DATE")
    return df.rename({"DATE": "timestamp", "variable": "sensor_id"})