import streamlit as st import pandas as pd from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt # Page config st.set_page_config(page_title="Crime Rate Prediction", layout="wide") st.title("📊 Crime Rate Prediction Based on Past Data") # Replace this with your actual dataset path csv_path = "crime_data.csv" # Example: "data/crime_data.csv" if inside a folder # Load data try: df = pd.read_csv(csv_path) # Display raw data st.subheader("📄 Raw Dataset") st.dataframe(df) # Extract the relevant columns data = df[[ 'State/UT', 'Number of Cases Registered - 2018-19', 'Number of Cases Registered - 2019-20', 'Number of Cases Registered - 2020-21', 'Number of Cases Registered - 2021-22 (up to 31.10.2021)' ]].copy() # Rename for easier access data.columns = ['State/UT', '2018', '2019', '2020', '2021'] # Model training & prediction years = ['2018', '2019', '2020', '2021'] future_year = '2022' X = pd.DataFrame({'Year': [2018, 2019, 2020, 2021]}) predicted_values = [] for i, row in data.iterrows(): y = row[years].values model = LinearRegression() model.fit(X, y) pred = model.predict([[2022]])[0] predicted_values.append(max(0, int(pred))) # Avoid negatives data['Predicted 2022'] = predicted_values # Display result st.subheader("📈 Predicted Crime Rate for 2022") st.dataframe(data[['State/UT', 'Predicted 2022']].sort_values(by='Predicted 2022', ascending=False)) # Plot top 10 states st.subheader("🔝 Top 10 States by Predicted Crime Rate") top10 = data.sort_values(by='Predicted 2022', ascending=False).head(10) fig, ax = plt.subplots() ax.barh(top10['State/UT'], top10['Predicted 2022'], color='salmon') ax.set_xlabel("Predicted Cases") ax.set_ylabel("State/UT") ax.invert_yaxis() ax.set_title("Top 10 States with Highest Predicted Crime Rate (2022)") st.pyplot(fig) except FileNotFoundError: st.error(f"❌ File not found at path: {csv_path}. Please check the path.")