MLDeveloper's picture
Update app.py
f4ba322 verified
raw
history blame
2.13 kB
import streamlit as st
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
# Page config
st.set_page_config(page_title="Crime Rate Prediction", layout="wide")
st.title("📊 Crime Rate Prediction Based on Past Data")
# Replace this with your actual dataset path
csv_path = "crime_data.csv" # Example: "data/crime_data.csv" if inside a folder
# Load data
try:
df = pd.read_csv(csv_path)
# Display raw data
st.subheader("📄 Raw Dataset")
st.dataframe(df)
# Extract the relevant columns
data = df[[
'State/UT',
'Number of Cases Registered - 2018-19',
'Number of Cases Registered - 2019-20',
'Number of Cases Registered - 2020-21',
'Number of Cases Registered - 2021-22 (up to 31.10.2021)'
]].copy()
# Rename for easier access
data.columns = ['State/UT', '2018', '2019', '2020', '2021']
# Model training & prediction
years = ['2018', '2019', '2020', '2021']
future_year = '2022'
X = pd.DataFrame({'Year': [2018, 2019, 2020, 2021]})
predicted_values = []
for i, row in data.iterrows():
y = row[years].values
model = LinearRegression()
model.fit(X, y)
pred = model.predict([[2022]])[0]
predicted_values.append(max(0, int(pred))) # Avoid negatives
data['Predicted 2022'] = predicted_values
# Display result
st.subheader("📈 Predicted Crime Rate for 2022")
st.dataframe(data[['State/UT', 'Predicted 2022']].sort_values(by='Predicted 2022', ascending=False))
# Plot top 10 states
st.subheader("🔝 Top 10 States by Predicted Crime Rate")
top10 = data.sort_values(by='Predicted 2022', ascending=False).head(10)
fig, ax = plt.subplots()
ax.barh(top10['State/UT'], top10['Predicted 2022'], color='salmon')
ax.set_xlabel("Predicted Cases")
ax.set_ylabel("State/UT")
ax.invert_yaxis()
ax.set_title("Top 10 States with Highest Predicted Crime Rate (2022)")
st.pyplot(fig)
except FileNotFoundError:
st.error(f"❌ File not found at path: {csv_path}. Please check the path.")