zainulabedin949 commited on
Commit
d0cb32e
·
verified ·
1 Parent(s): b02bc3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -0
app.py CHANGED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import torch
4
+ import librosa
5
+ from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
6
+ import matplotlib.pyplot as plt
7
+ from matplotlib.colors import Normalize
8
+
9
+ # Constants
10
+ SAMPLING_RATE = 16000
11
+ MODEL_NAME = "MIT/ast-finetuned-audioset-10-10-0.4593"
12
+ DEFAULT_THRESHOLD = 0.7
13
+
14
+ # Load model and feature extractor
15
+ feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
16
+ model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME)
17
+
18
+ def analyze_audio(audio_array, threshold=DEFAULT_THRESHOLD):
19
+ """
20
+ Process audio and detect anomalies
21
+ Returns:
22
+ - classification result
23
+ - confidence score
24
+ - spectrogram visualization
25
+ """
26
+ try:
27
+ # Resample if needed and convert to mono
28
+ if isinstance(audio_array, tuple):
29
+ sr, audio = audio_array
30
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=SAMPLING_RATE)
31
+ else:
32
+ audio = audio_array
33
+
34
+ if len(audio.shape) > 1:
35
+ audio = librosa.to_mono(audio)
36
+
37
+ # Extract features
38
+ inputs = feature_extractor(
39
+ audio,
40
+ sampling_rate=SAMPLING_RATE,
41
+ return_tensors="pt",
42
+ padding=True,
43
+ return_attention_mask=True
44
+ )
45
+
46
+ # Run inference
47
+ with torch.no_grad():
48
+ outputs = model(**inputs)
49
+ logits = outputs.logits
50
+ probs = torch.softmax(logits, dim=-1)
51
+
52
+ # Get predicted class and confidence
53
+ predicted_class = "Normal" if probs[0][0] > threshold else "Anomaly"
54
+ confidence = probs[0][0].item() if predicted_class == "Normal" else 1 - probs[0][0].item()
55
+
56
+ # Create spectrogram visualization
57
+ spectrogram = librosa.feature.melspectrogram(
58
+ y=audio,
59
+ sr=SAMPLING_RATE,
60
+ n_mels=128,
61
+ fmax=8000
62
+ )
63
+ db_spec = librosa.power_to_db(spectrogram, ref=np.max)
64
+
65
+ plt.figure(figsize=(10, 4))
66
+ plt.imshow(db_spec, aspect='auto', origin='lower',
67
+ norm=Normalize(vmin=-80, vmax=0),
68
+ cmap='viridis')
69
+ plt.colorbar(format='%+2.0f dB')
70
+ plt.title('Mel Spectrogram')
71
+ plt.tight_layout()
72
+ plt.savefig('spec.png', bbox_inches='tight')
73
+ plt.close()
74
+
75
+ return (
76
+ predicted_class,
77
+ f"{confidence:.1%}",
78
+ 'spec.png',
79
+ str(probs.tolist()[0])
80
+ )
81
+
82
+ except Exception as e:
83
+ return f"Error: {str(e)}", "", "", ""
84
+
85
+ # Gradio interface
86
+ with gr.Blocks(title="Industrial Audio Analyzer", theme=gr.themes.Soft()) as demo:
87
+ gr.Markdown("""
88
+ # 🏭 Industrial Equipment Sound Analyzer
89
+ ### Powered by Audio Spectrogram Transformer (AST)
90
+ """)
91
+
92
+ with gr.Row():
93
+ with gr.Column():
94
+ audio_input = gr.Audio(
95
+ label="Upload Equipment Audio Recording",
96
+ type="numpy",
97
+ source="upload",
98
+ show_download_button=True
99
+ )
100
+ threshold = gr.Slider(
101
+ minimum=0.5,
102
+ maximum=0.95,
103
+ step=0.05,
104
+ value=DEFAULT_THRESHOLD,
105
+ label="Anomaly Detection Threshold",
106
+ info="Higher values reduce false positives but may miss subtle anomalies"
107
+ )
108
+ analyze_btn = gr.Button("🔍 Analyze Sound", variant="primary")
109
+
110
+ gr.Examples(
111
+ examples=["examples/normal_machine.wav", "examples/anomalous_machine.wav"],
112
+ inputs=audio_input,
113
+ label="Sample Recordings"
114
+ )
115
+
116
+ with gr.Column():
117
+ result_label = gr.Label(label="Detection Result")
118
+ confidence = gr.Textbox(label="Confidence Score")
119
+ spectrogram = gr.Image(label="Spectrogram Visualization")
120
+ raw_probs = gr.Textbox(
121
+ label="Model Output Probabilities",
122
+ visible=False
123
+ )
124
+
125
+ analyze_btn.click(
126
+ fn=analyze_audio,
127
+ inputs=[audio_input, threshold],
128
+ outputs=[result_label, confidence, spectrogram, raw_probs]
129
+ )
130
+
131
+ gr.Markdown("""
132
+ ## How It Works
133
+ - Upload audio recordings from industrial equipment
134
+ - The AI analyzes sound patterns using spectrogram analysis
135
+ - Detects anomalies indicating potential equipment issues
136
+
137
+ **Tip**: For best results, use 5-10 second recordings of steady operation
138
+ """)
139
+
140
+ if __name__ == "__main__":
141
+ demo.launch()