MaheshP98 commited on
Commit
83ce7a6
·
verified ·
1 Parent(s): 231d664

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -21
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
- from sklearn.ensemble import IsolationForest
5
  from datetime import datetime, timedelta
6
  import os
7
  import logging
@@ -29,24 +29,69 @@ def validate_csv(df):
29
  return False, f"Invalid data types: {str(e)}"
30
  return True, ""
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def process_files(uploaded_files):
33
  """
34
  Process uploaded CSV files, generate usage plots, detect anomalies, and process AMC expiries.
35
- Returns a dataframe, plot path, PDF path, and AMC expiry message.
36
  """
37
  # Log received files
38
  logging.info(f"Received uploaded files: {uploaded_files}")
39
 
40
  if not uploaded_files:
41
  logging.warning("No files uploaded.")
42
- return None, None, None, "Please upload at least one valid CSV file."
43
 
44
  valid_files = [f for f in uploaded_files if f.name.endswith('.csv')]
45
  logging.info(f"Processing {len(valid_files)} valid files: {valid_files}")
46
 
47
  if not valid_files:
48
  logging.warning("No valid CSV files uploaded.")
49
- return None, None, None, "Please upload at least one valid CSV file."
50
 
51
  logging.info("Loading logs from uploaded files...")
52
  all_data = []
@@ -60,15 +105,15 @@ def process_files(uploaded_files):
60
  is_valid, error_msg = validate_csv(df)
61
  if not is_valid:
62
  logging.error(f"Failed to load {file.name}: {error_msg}")
63
- return None, None, None, f"Error loading {file.name}: {error_msg}"
64
  all_data.append(df)
65
  except Exception as e:
66
  logging.error(f"Failed to load {file.name}: {str(e)}")
67
- return None, None, None, f"Error loading {file.name}: {str(e)}"
68
 
69
  if not all_data:
70
  logging.warning("No data loaded from uploaded files.")
71
- return None, None, None, "No valid data found in uploaded files."
72
 
73
  combined_df = pd.concat(all_data, ignore_index=True)
74
  logging.info(f"Combined {len(combined_df)} total records.")
@@ -81,15 +126,15 @@ def process_files(uploaded_files):
81
  logging.info("Usage plot generated successfully.")
82
  else:
83
  logging.error("Failed to generate usage plot.")
84
- return combined_df, None, None, "Failed to generate usage plot."
85
 
86
- # Detect anomalies
87
- logging.info("Detecting anomalies...")
88
  anomaly_df = detect_anomalies(combined_df)
89
  if anomaly_df is None:
90
  logging.error("Failed to detect anomalies.")
91
  else:
92
- logging.info(f"Detected {sum(anomaly_df['anomaly'] == -1)} anomalies.")
93
 
94
  # Process AMC expiries
95
  logging.info("Processing AMC expiries...")
@@ -98,12 +143,17 @@ def process_files(uploaded_files):
98
  # Generate PDF report
99
  pdf_path = generate_pdf_report(combined_df, anomaly_df, amc_df)
100
 
 
 
 
 
 
101
  # Prepare output dataframe (combine original data with anomalies)
102
  output_df = combined_df.copy()
103
  if anomaly_df is not None:
104
  output_df['anomaly'] = anomaly_df['anomaly'].map({1: "Normal", -1: "Anomaly"})
105
 
106
- return output_df, plot_path, pdf_path, amc_message
107
 
108
  def generate_usage_plot(df):
109
  """
@@ -141,11 +191,11 @@ def generate_usage_plot(df):
141
 
142
  def detect_anomalies(df):
143
  """
144
- Detect anomalies in usage_count using Isolation Forest.
145
  Returns a dataframe with an 'anomaly' column (-1 for anomalies, 1 for normal).
146
  """
147
  try:
148
- model = IsolationForest(contamination=0.1, random_state=42)
149
  anomalies = model.fit_predict(df[['usage_count']].values)
150
  anomaly_df = df.copy()
151
  anomaly_df['anomaly'] = anomalies
@@ -165,7 +215,9 @@ def process_amc_expiries(df):
165
  df['amc_expiry'] = pd.to_datetime(df['amc_expiry'])
166
  upcoming_expiries = df[df['amc_expiry'] <= threshold]
167
  unique_devices = upcoming_expiries['equipment'].unique()
168
- message = f"Found {len(unique_devices)} devices with upcoming AMC expiries: {', '.join(unique_devices)}."
 
 
169
  logging.info(f"Found {len(unique_devices)} devices with upcoming AMC expiries.")
170
  return message, upcoming_expiries
171
  except Exception as e:
@@ -189,38 +241,44 @@ def generate_pdf_report(original_df, anomaly_df, amc_df):
189
  c.setFont("Helvetica", 12)
190
  y = 720
191
 
 
 
 
 
 
192
  # Summary
193
  c.drawString(100, y, "Summary")
194
  y -= 20
195
  c.drawString(100, y, f"Total Records: {len(original_df)}")
196
  y -= 20
197
- c.drawString(100, y, f"Devices: {', '.join(original_df['equipment'].unique())}")
198
  y -= 40
199
 
200
  # Anomalies
201
- c.drawString(100, y, "Anomaly Detection Results")
202
  y -= 20
203
  if anomaly_df is not None:
204
  num_anomalies = sum(anomaly_df['anomaly'] == -1)
205
  c.drawString(100, y, f"Anomalies Detected: {num_anomalies}")
206
  y -= 20
207
  if num_anomalies > 0:
208
- anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count']]
209
  c.drawString(100, y, "Anomalous Records:")
210
  y -= 20
211
  for _, row in anomaly_records.iterrows():
212
- c.drawString(100, y, f"{row['equipment']}: Usage Count = {row['usage_count']}")
213
  y -= 20
214
  if y < 50:
215
  c.showPage()
216
  y = 750
 
217
  else:
218
  c.drawString(100, y, "Anomaly detection failed.")
219
  y -= 20
220
  y -= 20
221
 
222
  # AMC Expiries
223
- c.drawString(100, y, "AMC Expiries Within 7 Days")
224
  y -= 20
225
  if amc_df is not None and not amc_df.empty:
226
  c.drawString(100, y, f"Devices with Upcoming AMC Expiries: {len(amc_df['equipment'].unique())}")
@@ -231,6 +289,7 @@ def generate_pdf_report(original_df, anomaly_df, amc_df):
231
  if y < 50:
232
  c.showPage()
233
  y = 750
 
234
  else:
235
  c.drawString(100, y, "No AMC expiry data available.")
236
  y -= 20
@@ -254,11 +313,13 @@ with gr.Blocks() as demo:
254
  with gr.Row():
255
  output_message = gr.Textbox(label="AMC Expiry Status")
256
  output_pdf = gr.File(label="Download PDF Report")
 
 
257
 
258
  process_button.click(
259
  fn=process_files,
260
  inputs=[file_input],
261
- outputs=[output_df, output_plot, output_pdf, output_message]
262
  )
263
 
264
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
+ from sklearn.neighbors import LocalOutlierFactor
5
  from datetime import datetime, timedelta
6
  import os
7
  import logging
 
29
  return False, f"Invalid data types: {str(e)}"
30
  return True, ""
31
 
32
+ def generate_summary(combined_df, anomaly_df, amc_df, plot_path, pdf_path):
33
+ """
34
+ Generate a detailed summary of the processing results.
35
+ Returns a markdown string for display in the Gradio interface.
36
+ """
37
+ summary = ["## Processing Summary\n"]
38
+
39
+ # Total records and devices
40
+ total_records = len(combined_df)
41
+ unique_devices = combined_df['equipment'].unique()
42
+ summary.append(f"- **Total Records Processed**: {total_records}")
43
+ summary.append(f"- **Unique Devices**: {len(unique_devices)} ({', '.join(unique_devices)})\n")
44
+
45
+ # Anomalies
46
+ if anomaly_df is not None:
47
+ num_anomalies = sum(anomaly_df['anomaly'] == -1)
48
+ summary.append(f"- **Anomalies Detected**: {num_anomalies}")
49
+ if num_anomalies > 0:
50
+ anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']]
51
+ summary.append(" **Anomalous Devices**:")
52
+ for _, row in anomaly_records.iterrows():
53
+ summary.append(f" - {row['equipment']} (Usage: {row['usage_count']}, Status: {row['status']})")
54
+ else:
55
+ summary.append(" No anomalies detected.")
56
+ else:
57
+ summary.append("- **Anomalies Detected**: Failed to detect anomalies.")
58
+ summary.append("\n")
59
+
60
+ # AMC Expiries
61
+ if amc_df is not None and not amc_df.empty:
62
+ unique_devices_amc = amc_df['equipment'].unique()
63
+ summary.append(f"- **Devices with Upcoming AMC Expiries (within 7 days)**: {len(unique_devices_amc)}")
64
+ summary.append(" **Details**:")
65
+ for _, row in amc_df.iterrows():
66
+ summary.append(f" - {row['equipment']}: {row['amc_expiry'].strftime('%Y-%m-%d')}")
67
+ else:
68
+ summary.append("- **Devices with Upcoming AMC Expiries**: None")
69
+ summary.append("\n")
70
+
71
+ # Plot and PDF
72
+ summary.append("- **Usage Plot**: " + ("Generated successfully." if plot_path else "Failed to generate."))
73
+ summary.append("- **PDF Report**: " + ("Available for download." if pdf_path else "Not generated."))
74
+
75
+ return "\n".join(summary)
76
+
77
  def process_files(uploaded_files):
78
  """
79
  Process uploaded CSV files, generate usage plots, detect anomalies, and process AMC expiries.
80
+ Returns a dataframe, plot path, PDF path, AMC expiry message, and summary.
81
  """
82
  # Log received files
83
  logging.info(f"Received uploaded files: {uploaded_files}")
84
 
85
  if not uploaded_files:
86
  logging.warning("No files uploaded.")
87
+ return None, None, None, "Please upload at least one valid CSV file.", "No files uploaded."
88
 
89
  valid_files = [f for f in uploaded_files if f.name.endswith('.csv')]
90
  logging.info(f"Processing {len(valid_files)} valid files: {valid_files}")
91
 
92
  if not valid_files:
93
  logging.warning("No valid CSV files uploaded.")
94
+ return None, None, None, "Please upload at least one valid CSV file.", "No valid CSV files uploaded."
95
 
96
  logging.info("Loading logs from uploaded files...")
97
  all_data = []
 
105
  is_valid, error_msg = validate_csv(df)
106
  if not is_valid:
107
  logging.error(f"Failed to load {file.name}: {error_msg}")
108
+ return None, None, None, f"Error loading {file.name}: {error_msg}", f"Error: {error_msg}"
109
  all_data.append(df)
110
  except Exception as e:
111
  logging.error(f"Failed to load {file.name}: {str(e)}")
112
+ return None, None, None, f"Error loading {file.name}: {str(e)}", f"Error: {str(e)}"
113
 
114
  if not all_data:
115
  logging.warning("No data loaded from uploaded files.")
116
+ return None, None, None, "No valid data found in uploaded files.", "No data loaded."
117
 
118
  combined_df = pd.concat(all_data, ignore_index=True)
119
  logging.info(f"Combined {len(combined_df)} total records.")
 
126
  logging.info("Usage plot generated successfully.")
127
  else:
128
  logging.error("Failed to generate usage plot.")
129
+ return combined_df, None, None, "Failed to generate usage plot.", "Usage plot generation failed."
130
 
131
+ # Detect anomalies using Local Outlier Factor
132
+ logging.info("Detecting anomalies using Local Outlier Factor...")
133
  anomaly_df = detect_anomalies(combined_df)
134
  if anomaly_df is None:
135
  logging.error("Failed to detect anomalies.")
136
  else:
137
+ logging.info(f"Detected {sum(anomaly_df['anomaly'] == -1)} anomalies using Local Outlier Factor.")
138
 
139
  # Process AMC expiries
140
  logging.info("Processing AMC expiries...")
 
143
  # Generate PDF report
144
  pdf_path = generate_pdf_report(combined_df, anomaly_df, amc_df)
145
 
146
+ # Generate summary
147
+ logging.info("Generating summary of results...")
148
+ summary = generate_summary(combined_df, anomaly_df, amc_df, plot_path, pdf_path)
149
+ logging.info("Summary generated successfully.")
150
+
151
  # Prepare output dataframe (combine original data with anomalies)
152
  output_df = combined_df.copy()
153
  if anomaly_df is not None:
154
  output_df['anomaly'] = anomaly_df['anomaly'].map({1: "Normal", -1: "Anomaly"})
155
 
156
+ return output_df, plot_path, pdf_path, amc_message, summary
157
 
158
  def generate_usage_plot(df):
159
  """
 
191
 
192
  def detect_anomalies(df):
193
  """
194
+ Detect anomalies in usage_count using Local Outlier Factor.
195
  Returns a dataframe with an 'anomaly' column (-1 for anomalies, 1 for normal).
196
  """
197
  try:
198
+ model = LocalOutlierFactor(n_neighbors=5, contamination=0.1)
199
  anomalies = model.fit_predict(df[['usage_count']].values)
200
  anomaly_df = df.copy()
201
  anomaly_df['anomaly'] = anomalies
 
215
  df['amc_expiry'] = pd.to_datetime(df['amc_expiry'])
216
  upcoming_expiries = df[df['amc_expiry'] <= threshold]
217
  unique_devices = upcoming_expiries['equipment'].unique()
218
+ message = f"Found {len(unique_devices)} devices with upcoming AMC expiries: {', '.join(unique_devices)}. Details: " + "; ".join(
219
+ [f"{row['equipment']}: {row['amc_expiry'].strftime('%Y-%m-%d')}" for _, row in upcoming_expiries.iterrows()]
220
+ )
221
  logging.info(f"Found {len(unique_devices)} devices with upcoming AMC expiries.")
222
  return message, upcoming_expiries
223
  except Exception as e:
 
241
  c.setFont("Helvetica", 12)
242
  y = 720
243
 
244
+ # Report generated timestamp
245
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
246
+ c.drawString(100, y, f"Generated on: {current_time}")
247
+ y -= 30
248
+
249
  # Summary
250
  c.drawString(100, y, "Summary")
251
  y -= 20
252
  c.drawString(100, y, f"Total Records: {len(original_df)}")
253
  y -= 20
254
+ c.drawString(100, y, f"Unique Devices: {', '.join(original_df['equipment'].unique())}")
255
  y -= 40
256
 
257
  # Anomalies
258
+ c.drawString(100, y, "Anomaly Detection Results (Using Local Outlier Factor)")
259
  y -= 20
260
  if anomaly_df is not None:
261
  num_anomalies = sum(anomaly_df['anomaly'] == -1)
262
  c.drawString(100, y, f"Anomalies Detected: {num_anomalies}")
263
  y -= 20
264
  if num_anomalies > 0:
265
+ anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']]
266
  c.drawString(100, y, "Anomalous Records:")
267
  y -= 20
268
  for _, row in anomaly_records.iterrows():
269
+ c.drawString(100, y, f"{row['equipment']}: Usage Count = {row['usage_count']}, Status = {row['status']}")
270
  y -= 20
271
  if y < 50:
272
  c.showPage()
273
  y = 750
274
+ c.setFont("Helvetica", 12)
275
  else:
276
  c.drawString(100, y, "Anomaly detection failed.")
277
  y -= 20
278
  y -= 20
279
 
280
  # AMC Expiries
281
+ c.drawString(100, y, "AMC Expiries Within 7 Days (as of 2025-06-05)")
282
  y -= 20
283
  if amc_df is not None and not amc_df.empty:
284
  c.drawString(100, y, f"Devices with Upcoming AMC Expiries: {len(amc_df['equipment'].unique())}")
 
289
  if y < 50:
290
  c.showPage()
291
  y = 750
292
+ c.setFont("Helvetica", 12)
293
  else:
294
  c.drawString(100, y, "No AMC expiry data available.")
295
  y -= 20
 
313
  with gr.Row():
314
  output_message = gr.Textbox(label="AMC Expiry Status")
315
  output_pdf = gr.File(label="Download PDF Report")
316
+ with gr.Row():
317
+ output_summary = gr.Markdown(label="Summary of Results")
318
 
319
  process_button.click(
320
  fn=process_files,
321
  inputs=[file_input],
322
+ outputs=[output_df, output_plot, output_pdf, output_message, output_summary]
323
  )
324
 
325
  if __name__ == "__main__":