Spaces:

SustainabilityLabIITGN
/

VayuChat

Running

App Files Files Community

VayuChat / test_image.py

Nipun

Implement ultra-high DPI plots and fix UI responsiveness

95b3c75 11 days ago

raw

history blame contribute delete

5.46 kB

	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import uuid
	import calendar
	import numpy as np
	# Set professional matplotlib styling with high resolution
	#plt.style.use('vayuchat.mplstyle')
	df = pd.read_csv("AQ_met_data.csv")
	df["Timestamp"] = pd.to_datetime(df["Timestamp"])
	states_df = pd.read_csv("states_data.csv")
	ncap_df = pd.read_csv("ncap_funding_data.csv")
	# df is pandas DataFrame with air quality data from India. Data frequency is daily from 2017 to 2024. The data has the following columns and data types:
	# Unnamed: 0 int64
	# Timestamp datetime64[ns]
	# State object
	# City object
	# Station object
	# site_id object
	# Year int64
	# PM2.5 (µg/m³) float64
	# PM10 (µg/m³) float64
	# NO (µg/m³) float64
	# NO2 (µg/m³) float64
	# NOx (ppb) float64
	# NH3 (µg/m³) float64
	# SO2 (µg/m³) float64
	# CO (mg/m³) float64
	# Ozone (µg/m³) float64
	# AT (°C) float64
	# RH (%) float64
	# WS (m/s) float64
	# WD (deg) float64
	# RF (mm) float64
	# TOT-RF (mm) float64
	# SR (W/mt2) float64
	# BP (mmHg) float64
	# VWS (m/s) float64
	# dtype: object
	# states_df is a pandas DataFrame of state-wise population, area and whether state is union territory or not of India.
	# state object
	# population int64
	# area (km2) int64
	# isUnionTerritory bool
	# dtype: object
	# ncap_df is a pandas DataFrame of funding given to the cities of India from 2019-2022, under The National Clean Air Program (NCAP).
	# S. No. int64
	# state object
	# city object
	# Amount released during FY 2019-20 float64
	# Amount released during FY 2020-21 float64
	# Amount released during FY 2021-22 float64
	# Total fund released float64
	# Utilisation as on June 2022 float64
	# dtype: object
	# Question: Compare the wind speed and PM2.5 levels during Delhi’s most polluted week (highest PM2.5) in December 2024 with the previous 15 days and the following 15 days on a time series plot.
	# Generate code to answer the question and save result in 'answer' variable
	# If creating a plot, save it with a unique filename and store the filename in 'answer'
	# If returning text/numbers, store the result directly in 'answer'


	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import uuid
	import numpy as np

	# Ensure data is loaded
	if df.empty:
	answer = "No data available"
	else:
	try:
	# Filter for Delhi in December 2024
	df_delhi = df[
	(df['City'].str.contains('Delhi', case=False, na=False)) &
	(df['Timestamp'].dt.year == 2024) &
	(df['Timestamp'].dt.month == 12)
	].copy()
	df_delhi = df_delhi.dropna(subset=['PM2.5 (µg/m³)', 'WS (m/s)'])
	df_delhi = df_delhi.sort_values('Timestamp')

	# Need at least a full week to compute rolling mean
	if len(df_delhi) < 7:
	answer = "Insufficient data"
	else:
	# Compute 7‑day rolling mean of PM2.5
	df_delhi['PM2.5_roll7'] = df_delhi['PM2.5 (µg/m³)'].rolling(window=7, min_periods=7).mean()
	# Identify the window with the highest mean PM2.5
	max_idx = df_delhi['PM2.5_roll7'].idxmax()
	max_end_date = df_delhi.loc[max_idx, 'Timestamp']
	max_start_date = max_end_date - pd.Timedelta(days=6)

	# Define extended window: 15 days before start and 15 days after end
	ext_start = max_start_date - pd.Timedelta(days=15)
	ext_end = max_end_date + pd.Timedelta(days=15)

	# Filter data for the extended period
	mask = (df_delhi['Timestamp'] >= ext_start) & (df_delhi['Timestamp'] <= ext_end)
	df_plot = df_delhi.loc[mask].copy()

	if df_plot.empty or len(df_plot) < 30:
	answer = "Insufficient data"
	else:
	# Plot time series
	plt.figure(figsize=(9, 6))
	ax1 = plt.gca()
	sns.lineplot(data=df_plot, x='Timestamp', y='PM2.5 (µg/m³)', ax=ax1,
	label='PM2.5 (µg/m³)', color='tab:red')
	ax1.set_ylabel('PM2.5 (µg/m³)', color='tab:red')
	ax1.tick_params(axis='y', labelcolor='tab:red')

	ax2 = ax1.twinx()
	sns.lineplot(data=df_plot, x='Timestamp', y='WS (m/s)', ax=ax2,
	label='Wind Speed (m/s)', color='tab:blue')
	ax2.set_ylabel('Wind Speed (m/s)', color='tab:blue')
	ax2.tick_params(axis='y', labelcolor='tab:blue')

	plt.title('Delhi – PM2.5 and Wind Speed around Most Polluted Week (Dec 2024)')
	plt.xlabel('Date')
	plt.tight_layout()

	# Save plot
	filename = f"plot.png"
	plt.savefig(filename, dpi=1200, bbox_inches='tight', facecolor='white')
	plt.close()

	answer = filename
	except Exception as e:
	answer = "Unable to complete analysis with available data"