|
import gradio as gr |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from sklearn.neighbors import LocalOutlierFactor |
|
from datetime import datetime, timedelta |
|
import os |
|
import logging |
|
from reportlab.lib.pagesizes import letter |
|
from reportlab.pdfgen import canvas |
|
from reportlab.lib import colors |
|
import tempfile |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s,%(msecs)03d - %(levelname)s - %(message)s') |
|
|
|
|
|
css = """ |
|
body { |
|
font-family: Arial, sans-serif; |
|
background-color: #F3F4F6; |
|
color: #1E3A8A; |
|
} |
|
|
|
h1 { |
|
color: #1E3A8A; |
|
text-align: center; |
|
margin-bottom: 20px; |
|
} |
|
|
|
.gr-button { |
|
background-color: #1E3A8A; |
|
color: white; |
|
border: none; |
|
border-radius: 5px; |
|
padding: 10px 20px; |
|
} |
|
|
|
.gr-button:hover { |
|
background-color: #2B4C9B; |
|
} |
|
|
|
.summary-card { |
|
background-color: white; |
|
border-radius: 10px; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
padding: 20px; |
|
margin: 20px 0; |
|
} |
|
|
|
.summary-card h2 { |
|
color: #1E3A8A; |
|
margin-top: 0; |
|
} |
|
|
|
.maintenance-alert { |
|
background-color: white; |
|
border-radius: 10px; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
padding: 15px; |
|
margin: 10px 0; |
|
} |
|
|
|
.alert-urgent { |
|
color: #DC2626; |
|
font-weight: bold; |
|
} |
|
|
|
.alert-upcoming { |
|
color: #F59E0B; |
|
font-weight: bold; |
|
} |
|
|
|
.recommendation { |
|
font-style: italic; |
|
color: #4B5563; |
|
} |
|
|
|
.flowchart { |
|
display: flex; |
|
flex-direction: column; |
|
gap: 10px; |
|
margin: 20px 0; |
|
} |
|
|
|
.flowchart-step { |
|
background-color: #E5E7EB; |
|
border-left: 5px solid #1E3A8A; |
|
padding: 10px; |
|
border-radius: 5px; |
|
position: relative; |
|
} |
|
|
|
.flowchart-step:not(:last-child):after { |
|
content: '↓'; |
|
position: absolute; |
|
bottom: -20px; |
|
left: 50%; |
|
transform: translateX(-50%); |
|
font-size: 20px; |
|
color: #1E3A8A; |
|
} |
|
|
|
.report-preview { |
|
background-color: white; |
|
border-radius: 10px; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
padding: 15px; |
|
margin: 10px 0; |
|
} |
|
""" |
|
|
|
def validate_csv(df): |
|
""" |
|
Validate that the CSV has the required columns. |
|
Returns True if valid, False otherwise with an error message. |
|
""" |
|
required_columns = ['equipment', 'usage_count', 'status', 'amc_expiry'] |
|
missing_columns = [col for col in required_columns if col not in df.columns] |
|
if missing_columns: |
|
return False, f"Missing required columns: {', '.join(missing_columns)}" |
|
|
|
try: |
|
df['usage_count'] = pd.to_numeric(df['usage_count'], errors='raise') |
|
df['amc_expiry'] = pd.to_datetime(df['amc_expiry'], errors='raise') |
|
except Exception as e: |
|
return False, f"Invalid data types: {str(e)}" |
|
return True, "" |
|
|
|
def generate_summary(combined_df, anomaly_df, amc_df, plot_path, pdf_path): |
|
""" |
|
Generate a detailed and easy-to-understand summary of the processing results. |
|
Returns a markdown string for display in the Gradio interface. |
|
""" |
|
summary = [] |
|
|
|
|
|
summary.append("## Overview") |
|
total_records = len(combined_df) |
|
unique_devices = combined_df['equipment'].unique() |
|
summary.append(f"We processed **{total_records} log entries** for **{len(unique_devices)} devices** ({', '.join(unique_devices)}).") |
|
summary.append("This report helps you understand device usage, identify unusual activity, and plan maintenance.\n") |
|
|
|
|
|
summary.append("## Unusual Activity") |
|
if anomaly_df is not None: |
|
num_anomalies = sum(anomaly_df['anomaly'] == -1) |
|
if num_anomalies > 0: |
|
summary.append(f"We found **{num_anomalies} unusual activities** that might need your attention:") |
|
anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']] |
|
for _, row in anomaly_records.iterrows(): |
|
summary.append(f"- **{row['equipment']}** (Usage: {row['usage_count']}, Status: {row['status']}) - High or low usage compared to others might indicate overuse or underuse.") |
|
else: |
|
summary.append("No unusual activity detected. All devices are operating within expected usage patterns.") |
|
else: |
|
summary.append("We couldn’t check for unusual activity due to an error.") |
|
summary.append("\n") |
|
|
|
|
|
summary.append("## Maintenance Alerts") |
|
if amc_df is not None and not amc_df.empty: |
|
unique_devices_amc = amc_df['equipment'].unique() |
|
summary.append(f"**{len(unique_devices_amc)} devices** need maintenance soon (within 7 days from 2025-06-05):") |
|
for _, row in amc_df.iterrows(): |
|
days_until_expiry = (row['amc_expiry'] - datetime(2025, 6, 5)).days |
|
urgency = "Urgent" if days_until_expiry <= 3 else "Upcoming" |
|
urgency_class = "alert-urgent" if urgency == "Urgent" else "alert-upcoming" |
|
summary.append(f"- <span class='{urgency_class}'>⚠️ {urgency}</span>: **{row['equipment']}** - Due on {row['amc_expiry'].strftime('%Y-%m-%d')} ({days_until_expiry} days left)") |
|
summary.append("\n<div class='recommendation'>Recommendation: Contact the maintenance team within 24 hours for urgent alerts at support@company.com.</div>") |
|
else: |
|
summary.append("No devices need maintenance within the next 7 days.") |
|
summary.append("\n") |
|
|
|
|
|
summary.append("## Generated Reports") |
|
summary.append("- **Usage Chart**: Visualizes usage patterns across devices, helping identify overworked or underused equipment. See below for the chart.") |
|
summary.append("- **PDF Report**: A comprehensive report including a full data table, unusual activity details, maintenance alerts, and a detailed flowchart of our process. Download it below.") |
|
|
|
return "\n".join(summary) |
|
|
|
def generate_flowchart_html(): |
|
""" |
|
Generate an HTML representation of the flowchart for the Gradio interface. |
|
Returns an HTML string. |
|
""" |
|
steps = [ |
|
("Upload CSV File(s)", "User uploads log files in CSV format."), |
|
("Validate Data", "Checks for required columns (equipment, usage_count, status, amc_expiry) and correct data types."), |
|
("Generate Usage Chart", "Creates a bar chart showing usage counts by device and status (e.g., Active, Inactive)."), |
|
("Detect Unusual Activity", "Uses Local Outlier Factor to identify devices with unusual usage patterns (e.g., too high or too low)."), |
|
("Check Maintenance Dates", "Identifies devices with AMC expiries within 7 days from 2025-06-05."), |
|
("Create PDF Report", "Generates a detailed PDF with data tables, insights, and this flowchart.") |
|
] |
|
html = ["<div class='flowchart'>"] |
|
for step, description in steps: |
|
html.append(f"<div class='flowchart-step'><strong>{step}</strong><br>{description}</div>") |
|
html.append("</div>") |
|
return "\n".join(html) |
|
|
|
def process_files(uploaded_files): |
|
""" |
|
Process uploaded CSV files, generate usage plots, detect anomalies, and process AMC expiries. |
|
Returns a dataframe, plot path, PDF path, AMC expiry message, summary, and flowchart HTML. |
|
""" |
|
|
|
logging.info(f"Received uploaded files: {uploaded_files}") |
|
|
|
if not uploaded_files: |
|
logging.warning("No files uploaded.") |
|
return None, None, None, "Please upload at least one valid CSV file.", "## Summary\nNo files uploaded.", "" |
|
|
|
valid_files = [f for f in uploaded_files if f.name.endswith('.csv')] |
|
logging.info(f"Processing {len(valid_files)} valid files: {valid_files}") |
|
|
|
if not valid_files: |
|
logging.warning("No valid CSV files uploaded.") |
|
return None, None, None, "Please upload at least one valid CSV file.", "## Summary\nNo valid CSV files uploaded.", "" |
|
|
|
logging.info("Loading logs from uploaded files...") |
|
all_data = [] |
|
|
|
|
|
for file in valid_files: |
|
try: |
|
df = pd.read_csv(file.name) |
|
logging.info(f"Loaded {len(df)} records from {file.name}") |
|
|
|
is_valid, error_msg = validate_csv(df) |
|
if not is_valid: |
|
logging.error(f"Failed to load {file.name}: {error_msg}") |
|
return None, None, None, f"Error loading {file.name}: {error_msg}", f"## Summary\nError: {error_msg}", "" |
|
all_data.append(df) |
|
except Exception as e: |
|
logging.error(f"Failed to load {file.name}: {str(e)}") |
|
return None, None, None, f"Error loading {file.name}: {str(e)}", f"## Summary\nError: {str(e)}", "" |
|
|
|
if not all_data: |
|
logging.warning("No data loaded from uploaded files.") |
|
return None, None, None, "No valid data found in uploaded files.", "## Summary\nNo data loaded.", "" |
|
|
|
combined_df = pd.concat(all_data, ignore_index=True) |
|
logging.info(f"Combined {len(combined_df)} total records.") |
|
logging.info(f"Loaded {len(combined_df)} log records from uploaded files.") |
|
|
|
|
|
logging.info("Generating usage plot...") |
|
plot_path = generate_usage_plot(combined_df) |
|
if plot_path: |
|
logging.info("Usage plot generated successfully.") |
|
else: |
|
logging.error("Failed to generate usage plot.") |
|
return combined_df, None, None, "Failed to generate usage plot.", "## Summary\nUsage plot generation failed.", "" |
|
|
|
|
|
logging.info("Detecting anomalies using Local Outlier Factor...") |
|
anomaly_df = detect_anomalies(combined_df) |
|
if anomaly_df is None: |
|
logging.error("Failed to detect anomalies.") |
|
else: |
|
logging.info(f"Detected {sum(anomaly_df['anomaly'] == -1)} anomalies using Local Outlier Factor.") |
|
|
|
|
|
logging.info("Processing AMC expiries...") |
|
amc_message, amc_df = process_amc_expiries(combined_df) |
|
|
|
|
|
logging.info("Generating PDF report...") |
|
pdf_path = generate_pdf_report(combined_df, anomaly_df, amc_df) |
|
if pdf_path: |
|
logging.info("PDF report generated successfully.") |
|
else: |
|
logging.error("Failed to generate PDF report.") |
|
|
|
|
|
logging.info("Generating summary of results...") |
|
summary = generate_summary(combined_df, anomaly_df, amc_df, plot_path, pdf_path) |
|
logging.info("Summary generated successfully.") |
|
|
|
|
|
logging.info("Generating flowchart HTML...") |
|
flowchart_html = generate_flowchart_html() |
|
logging.info("Flowchart HTML generated successfully.") |
|
|
|
|
|
output_df = combined_df.copy() |
|
if anomaly_df is not None: |
|
output_df['anomaly'] = anomaly_df['anomaly'].map({1: "Normal", -1: "Unusual"}) |
|
|
|
return output_df, plot_path, pdf_path, amc_message, summary, flowchart_html |
|
|
|
def generate_usage_plot(df): |
|
""" |
|
Generate a bar plot of usage_count by equipment and status. |
|
Returns the path to the saved plot. |
|
""" |
|
try: |
|
plt.figure(figsize=(12, 6)) |
|
|
|
status_colors = {'Active': '#36A2EB', 'Inactive': '#FF6384', 'Down': '#FFCE56', 'Online': '#4BC0C0'} |
|
for status in df['status'].unique(): |
|
subset = df[df['status'] == status] |
|
plt.bar( |
|
subset['equipment'] + f" ({status})", |
|
subset['usage_count'], |
|
label=status, |
|
color=status_colors.get(status, '#999999') |
|
) |
|
plt.xlabel("Equipment (Status)", fontsize=12) |
|
plt.ylabel("Usage Count", fontsize=12) |
|
plt.title("Usage Count by Equipment and Status", fontsize=14) |
|
plt.legend(title="Status") |
|
plt.xticks(rotation=45, ha='right') |
|
plt.tight_layout() |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as tmp: |
|
plt.savefig(tmp.name, format='png', dpi=100) |
|
plot_path = tmp.name |
|
plt.close() |
|
return plot_path |
|
except Exception as e: |
|
logging.error(f"Failed to generate usage plot: {str(e)}") |
|
return None |
|
|
|
def detect_anomalies(df): |
|
""" |
|
Detect anomalies in usage_count using Local Outlier Factor. |
|
Returns a dataframe with an 'anomaly' column (-1 for anomalies, 1 for normal). |
|
""" |
|
try: |
|
model = LocalOutlierFactor(n_neighbors=5, contamination=0.1) |
|
anomalies = model.fit_predict(df[['usage_count']].values) |
|
anomaly_df = df.copy() |
|
anomaly_df['anomaly'] = anomalies |
|
return anomaly_df |
|
except Exception as e: |
|
logging.error(f"Failed to detect anomalies: {str(e)}") |
|
return None |
|
|
|
def process_amc_expiries(df): |
|
""" |
|
Identify devices with AMC expiries within 7 days from 2025-06-05. |
|
Returns a message and a dataframe of devices with upcoming expiries. |
|
""" |
|
try: |
|
current_date = datetime(2025, 6, 5) |
|
threshold = current_date + timedelta(days=7) |
|
df['amc_expiry'] = pd.to_datetime(df['amc_expiry']) |
|
upcoming_expiries = df[df['amc_expiry'] <= threshold] |
|
unique_devices = upcoming_expiries['equipment'].unique() |
|
message = f"Found {len(unique_devices)} devices with upcoming AMC expiries: {', '.join(unique_devices)}. Details: " + "; ".join( |
|
[f"{row['equipment']}: {row['amc_expiry'].strftime('%Y-%m-%d')}" for _, row in upcoming_expiries.iterrows()] |
|
) |
|
logging.info(f"Found {len(unique_devices)} devices with upcoming AMC expiries.") |
|
return message, upcoming_expiries |
|
except Exception as e: |
|
logging.error(f"Failed to process AMC expiries: {str(e)}") |
|
return f"Error processing AMC expiries: {str(e)}", None |
|
|
|
def generate_pdf_report(original_df, anomaly_df, amc_df): |
|
""" |
|
Generate a professionally formatted PDF report with necessary fields and a detailed flowchart. |
|
Returns the path to the saved PDF. |
|
""" |
|
try: |
|
if original_df is None or original_df.empty: |
|
logging.warning("No data available for PDF generation.") |
|
return None |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp: |
|
c = canvas.Canvas(tmp.name, pagesize=letter) |
|
width, height = letter |
|
|
|
def draw_header(): |
|
c.setFont("Helvetica-Bold", 16) |
|
c.setFillColor(colors.darkblue) |
|
c.drawString(50, height - 50, "Equipment Log Analysis Report") |
|
c.setFont("Helvetica", 10) |
|
c.setFillColor(colors.black) |
|
c.drawString(50, height - 70, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
|
c.line(50, height - 80, width - 50, height - 80) |
|
|
|
def draw_section_title(title, y): |
|
c.setFont("Helvetica-Bold", 14) |
|
c.setFillColor(colors.darkblue) |
|
c.drawString(50, y, title) |
|
c.setFillColor(colors.black) |
|
c.line(50, y - 5, width - 50, y - 5) |
|
return y - 30 |
|
|
|
y = height - 100 |
|
draw_header() |
|
|
|
|
|
y = draw_section_title("Summary", y) |
|
c.setFont("Helvetica", 12) |
|
c.drawString(50, y, f"Total Records: {len(original_df)}") |
|
y -= 20 |
|
c.drawString(50, y, f"Unique Devices: {', '.join(original_df['equipment'].unique())}") |
|
y -= 40 |
|
|
|
|
|
y = draw_section_title("Device Log Details", y) |
|
c.setFont("Helvetica-Bold", 10) |
|
headers = ["Equipment", "Usage Count", "Status", "AMC Expiry", "Activity"] |
|
x_positions = [50, 150, 250, 350, 450] |
|
for i, header in enumerate(headers): |
|
c.drawString(x_positions[i], y, header) |
|
c.line(50, y - 5, width - 50, y - 5) |
|
y -= 20 |
|
|
|
c.setFont("Helvetica", 10) |
|
output_df = original_df.copy() |
|
if anomaly_df is not None: |
|
output_df['anomaly'] = anomaly_df['anomaly'].map({1: "Normal", -1: "Unusual"}) |
|
for _, row in output_df.iterrows(): |
|
c.drawString(50, y, str(row['equipment'])) |
|
c.drawString(150, y, str(row['usage_count'])) |
|
c.drawString(250, y, str(row['status'])) |
|
c.drawString(350, y, str(row['amc_expiry'].strftime('%Y-%m-%d'))) |
|
c.drawString(450, y, str(row['anomaly'])) |
|
y -= 20 |
|
if y < 50: |
|
c.showPage() |
|
y = height - 100 |
|
draw_header() |
|
c.setFont("Helvetica", 10) |
|
|
|
|
|
y = draw_section_title("Unusual Activity (Using Local Outlier Factor)", y) |
|
c.setFont("Helvetica", 12) |
|
if anomaly_df is not None: |
|
num_anomalies = sum(anomaly_df['anomaly'] == -1) |
|
c.drawString(50, y, f"Unusual Activities Detected: {num_anomalies}") |
|
y -= 20 |
|
if num_anomalies > 0: |
|
anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']] |
|
c.drawString(50, y, "Details:") |
|
y -= 20 |
|
c.setFont("Helvetica-Oblique", 10) |
|
for _, row in anomaly_records.iterrows(): |
|
c.drawString(50, y, f"{row['equipment']}: Usage Count = {row['usage_count']}, Status = {row['status']}") |
|
y -= 20 |
|
c.drawString(70, y, "Note: This device’s usage is significantly higher or lower than others, which may indicate overuse or underuse.") |
|
y -= 20 |
|
if y < 50: |
|
c.showPage() |
|
y = height - 100 |
|
draw_header() |
|
c.setFont("Helvetica-Oblique", 10) |
|
else: |
|
c.drawString(50, y, "Unable to detect unusual activity due to an error.") |
|
y -= 20 |
|
y -= 20 |
|
|
|
|
|
y = draw_section_title("Maintenance Alerts (as of 2025-06-05)", y) |
|
c.setFont("Helvetica", 12) |
|
if amc_df is not None and not amc_df.empty: |
|
c.drawString(50, y, f"Devices Needing Maintenance Soon: {len(amc_df['equipment'].unique())}") |
|
y -= 20 |
|
|
|
c.setFont("Helvetica-Bold", 10) |
|
headers = ["Device", "Expiry Date", "Urgency", "Days Left", "Action"] |
|
x_positions = [50, 150, 250, 350, 450] |
|
for i, header in enumerate(headers): |
|
c.drawString(x_positions[i], y, header) |
|
c.line(50, y - 5, width - 50, y - 5) |
|
y -= 20 |
|
|
|
|
|
c.setFont("Helvetica", 10) |
|
for _, row in amc_df.iterrows(): |
|
days_until_expiry = (row['amc_expiry'] - datetime(2025, 6, 5)).days |
|
urgency = "Urgent" if days_until_expiry <= 3 else "Upcoming" |
|
action = "Contact maintenance team within 24 hours" if urgency == "Urgent" else "Schedule maintenance this week" |
|
c.drawString(50, y, str(row['equipment'])) |
|
c.drawString(150, y, str(row['amc_expiry'].strftime('%Y-%m-%d'))) |
|
c.drawString(250, y, urgency) |
|
c.drawString(350, y, str(days_until_expiry)) |
|
c.drawString(450, y, action) |
|
y -= 20 |
|
if y < 50: |
|
c.showPage() |
|
y = height - 100 |
|
draw_header() |
|
c.setFont("Helvetica", 10) |
|
c.setFont("Helvetica-Oblique", 10) |
|
c.drawString(50, y, "Contact: Email the maintenance team at support@company.com for scheduling.") |
|
y -= 20 |
|
else: |
|
c.drawString(50, y, "No devices need maintenance within the next 7 days.") |
|
y -= 20 |
|
y -= 20 |
|
|
|
|
|
y = draw_section_title("Processing Pipeline Flowchart", y) |
|
c.setFont("Helvetica", 10) |
|
flowchart = [ |
|
("1. Upload CSV File(s)", "User uploads log files in CSV format containing device usage data."), |
|
("2. Validate Data", "Ensures all required columns (equipment, usage_count, status, amc_expiry) are present and data types are correct (e.g., usage_count as numeric, amc_expiry as date)."), |
|
("3. Generate Usage Chart", "Creates a bar chart showing usage counts by device and status (e.g., Active, Inactive) to visualize usage patterns."), |
|
("4. Detect Unusual Activity", "Uses Local Outlier Factor (LOF) algorithm to identify devices with unusual usage patterns by comparing local density of usage counts (contamination=0.1, n_neighbors=5)."), |
|
("5. Check Maintenance Dates", "Identifies devices with AMC expiries within 7 days from 2025-06-05, calculating days left and urgency (urgent if ≤3 days)."), |
|
("6. Create PDF Report", "Generates this PDF with a data table, unusual activity details, maintenance alerts, and this detailed flowchart.") |
|
] |
|
for step, description in flowchart: |
|
c.drawString(50, y, step) |
|
y -= 15 |
|
c.setFont("Helvetica-Oblique", 9) |
|
c.drawString(70, y, description) |
|
c.setFont("Helvetica", 10) |
|
y -= 25 |
|
if y < 50: |
|
c.showPage() |
|
y = height - 100 |
|
draw_header() |
|
c.setFont("Helvetica", 10) |
|
|
|
c.showPage() |
|
c.save() |
|
return tmp.name |
|
except Exception as e: |
|
logging.error(f"Failed to generate PDF report: {str(e)}") |
|
return None |
|
|
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.Markdown("# Equipment Log Analysis") |
|
with gr.Row(): |
|
file_input = gr.File(file_count="multiple", label="Upload CSV Files") |
|
process_button = gr.Button("Process Files") |
|
with gr.Row(): |
|
output_summary = gr.Markdown(label="Summary of Results", elem_classes=["summary-card"]) |
|
with gr.Row(): |
|
output_df = gr.Dataframe(label="Processed Data") |
|
output_plot = gr.Image(label="Usage Chart") |
|
with gr.Row(): |
|
output_message = gr.Textbox(label="Maintenance Alerts", elem_classes=["maintenance-alert"]) |
|
output_pdf = gr.File(label="Download Detailed PDF Report") |
|
with gr.Row(): |
|
output_flowchart = gr.HTML(generate_flowchart_html(), label="Processing Flowchart") |
|
with gr.Row(): |
|
gr.Markdown("## Report Previews", elem_classes=["report-preview"]) |
|
gr.Markdown("- **Usage Chart**: See the bar chart above for a visual of device usage by status.") |
|
gr.Markdown("- **PDF Report**: Download the PDF above for a full analysis, including data tables, unusual activity, maintenance alerts, and a detailed flowchart.") |
|
|
|
process_button.click( |
|
fn=process_files, |
|
inputs=[file_input], |
|
outputs=[output_df, output_plot, output_pdf, output_message, output_summary, output_flowchart] |
|
) |
|
|
|
if __name__ == "__main__": |
|
logging.info("Application starting...") |
|
demo.launch(server_name="0.0.0.0", server_port=7860) |