File size: 5,459 Bytes
95b3c75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import uuid
import calendar
import numpy as np
# Set professional matplotlib styling with high resolution
#plt.style.use('vayuchat.mplstyle')
df = pd.read_csv("AQ_met_data.csv")
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
states_df = pd.read_csv("states_data.csv")
ncap_df = pd.read_csv("ncap_funding_data.csv")
# df is pandas DataFrame with air quality data from India. Data frequency is daily from 2017 to 2024. The data has the following columns and data types:
# Unnamed: 0                int64
# Timestamp        datetime64[ns]
# State                    object
# City                     object
# Station                  object
# site_id                  object
# Year                      int64
# PM2.5 (µg/m³)           float64
# PM10 (µg/m³)            float64
# NO (µg/m³)              float64
# NO2 (µg/m³)             float64
# NOx (ppb)               float64
# NH3 (µg/m³)             float64
# SO2 (µg/m³)             float64
# CO (mg/m³)              float64
# Ozone (µg/m³)           float64
# AT (°C)                 float64
# RH (%)                  float64
# WS (m/s)                float64
# WD (deg)                float64
# RF (mm)                 float64
# TOT-RF (mm)             float64
# SR (W/mt2)              float64
# BP (mmHg)               float64
# VWS (m/s)               float64
# dtype: object
# states_df is a pandas DataFrame of state-wise population, area and whether state is union territory or not of India.
# state               object
# population           int64
# area (km2)           int64
# isUnionTerritory      bool
# dtype: object
# ncap_df is a pandas DataFrame of funding given to the cities of India from 2019-2022, under The National Clean Air Program (NCAP).
# S. No.                                 int64
# state                                 object
# city                                  object
# Amount released during FY 2019-20    float64
# Amount released during FY 2020-21    float64
# Amount released during FY 2021-22    float64
# Total fund released                  float64
# Utilisation as on June 2022          float64
# dtype: object
# Question: Compare the wind speed and PM2.5 levels during Delhi’s most polluted week (highest PM2.5) in December 2024 with the previous 15 days and the following 15 days on a time series plot.
# Generate code to answer the question and save result in 'answer' variable
# If creating a plot, save it with a unique filename and store the filename in 'answer'
# If returning text/numbers, store the result directly in 'answer'


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import uuid
import numpy as np

# Ensure data is loaded
if df.empty:
    answer = "No data available"
else:
    try:
        # Filter for Delhi in December 2024
        df_delhi = df[
            (df['City'].str.contains('Delhi', case=False, na=False)) &
            (df['Timestamp'].dt.year == 2024) &
            (df['Timestamp'].dt.month == 12)
        ].copy()
        df_delhi = df_delhi.dropna(subset=['PM2.5 (µg/m³)', 'WS (m/s)'])
        df_delhi = df_delhi.sort_values('Timestamp')

        # Need at least a full week to compute rolling mean
        if len(df_delhi) < 7:
            answer = "Insufficient data"
        else:
            # Compute 7‑day rolling mean of PM2.5
            df_delhi['PM2.5_roll7'] = df_delhi['PM2.5 (µg/m³)'].rolling(window=7, min_periods=7).mean()
            # Identify the window with the highest mean PM2.5
            max_idx = df_delhi['PM2.5_roll7'].idxmax()
            max_end_date = df_delhi.loc[max_idx, 'Timestamp']
            max_start_date = max_end_date - pd.Timedelta(days=6)

            # Define extended window: 15 days before start and 15 days after end
            ext_start = max_start_date - pd.Timedelta(days=15)
            ext_end = max_end_date + pd.Timedelta(days=15)

            # Filter data for the extended period
            mask = (df_delhi['Timestamp'] >= ext_start) & (df_delhi['Timestamp'] <= ext_end)
            df_plot = df_delhi.loc[mask].copy()

            if df_plot.empty or len(df_plot) < 30:
                answer = "Insufficient data"
            else:
                # Plot time series
                plt.figure(figsize=(9, 6))
                ax1 = plt.gca()
                sns.lineplot(data=df_plot, x='Timestamp', y='PM2.5 (µg/m³)', ax=ax1,
                             label='PM2.5 (µg/m³)', color='tab:red')
                ax1.set_ylabel('PM2.5 (µg/m³)', color='tab:red')
                ax1.tick_params(axis='y', labelcolor='tab:red')

                ax2 = ax1.twinx()
                sns.lineplot(data=df_plot, x='Timestamp', y='WS (m/s)', ax=ax2,
                             label='Wind Speed (m/s)', color='tab:blue')
                ax2.set_ylabel('Wind Speed (m/s)', color='tab:blue')
                ax2.tick_params(axis='y', labelcolor='tab:blue')

                plt.title('Delhi – PM2.5 and Wind Speed around Most Polluted Week (Dec 2024)')
                plt.xlabel('Date')
                plt.tight_layout()

                # Save plot
                filename = f"plot.png"
                plt.savefig(filename, dpi=1200, bbox_inches='tight', facecolor='white')
                plt.close()

                answer = filename
    except Exception as e:
        answer = "Unable to complete analysis with available data"