File size: 4,489 Bytes
d0cb32e 07c6db0 d0cb32e 07c6db0 d0cb32e 07c6db0 d0cb32e 07c6db0 d0cb32e 07c6db0 d0cb32e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import gradio as gr
import numpy as np
import torch
import librosa
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
# Constants
SAMPLING_RATE = 16000
MODEL_NAME = "MIT/ast-finetuned-audioset-10-10-0.4593"
DEFAULT_THRESHOLD = 0.7
# Load model and feature extractor
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME)
def analyze_audio(audio_array, threshold=DEFAULT_THRESHOLD):
"""
Process audio and detect anomalies
Returns:
- classification result
- confidence score
- spectrogram visualization
"""
try:
# Handle different audio input formats
if isinstance(audio_array, tuple):
sr, audio = audio_array
if sr != SAMPLING_RATE:
audio = librosa.resample(audio, orig_sr=sr, target_sr=SAMPLING_RATE)
else:
audio = audio_array
if len(audio.shape) > 1:
audio = librosa.to_mono(audio)
# Extract features
inputs = feature_extractor(
audio,
sampling_rate=SAMPLING_RATE,
return_tensors="pt",
padding=True,
return_attention_mask=True
)
# Run inference
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = torch.softmax(logits, dim=-1)
# Get predicted class and confidence
predicted_class = "Normal" if probs[0][0] > threshold else "Anomaly"
confidence = probs[0][0].item() if predicted_class == "Normal" else 1 - probs[0][0].item()
# Create spectrogram visualization
spectrogram = librosa.feature.melspectrogram(
y=audio,
sr=SAMPLING_RATE,
n_mels=64, # Reduced from 128 to avoid warning
fmax=8000
)
db_spec = librosa.power_to_db(spectrogram, ref=np.max)
fig, ax = plt.subplots(figsize=(10, 4))
img = librosa.display.specshow(
db_spec,
x_axis='time',
y_axis='mel',
sr=SAMPLING_RATE,
fmax=8000,
ax=ax
)
fig.colorbar(img, ax=ax, format='%+2.0f dB')
ax.set(title='Mel Spectrogram')
plt.tight_layout()
plt.savefig('spec.png', bbox_inches='tight')
plt.close()
return (
predicted_class,
f"{confidence:.1%}",
'spec.png',
str(probs.tolist()[0])
)
except Exception as e:
return f"Error: {str(e)}", "", "", ""
# Gradio interface
with gr.Blocks(title="Industrial Audio Analyzer", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# π Industrial Equipment Sound Analyzer
### Powered by Audio Spectrogram Transformer (AST)
""")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(
label="Upload Equipment Audio Recording",
type="numpy"
)
threshold = gr.Slider(
minimum=0.5,
maximum=0.95,
step=0.05,
value=DEFAULT_THRESHOLD,
label="Anomaly Detection Threshold",
info="Higher values reduce false positives but may miss subtle anomalies"
)
analyze_btn = gr.Button("π Analyze Sound", variant="primary")
with gr.Column():
result_label = gr.Label(label="Detection Result")
confidence = gr.Textbox(label="Confidence Score")
spectrogram = gr.Image(label="Spectrogram Visualization")
raw_probs = gr.Textbox(
label="Model Output Probabilities",
visible=False
)
analyze_btn.click(
fn=analyze_audio,
inputs=[audio_input, threshold],
outputs=[result_label, confidence, spectrogram, raw_probs]
)
gr.Markdown("""
## How It Works
- Upload audio recordings from industrial equipment
- The AI analyzes sound patterns using spectrogram analysis
- Detects anomalies indicating potential equipment issues
**Tip**: For best results, use 5-10 second recordings of steady operation
""")
if __name__ == "__main__":
demo.launch()
|