Spaces:

jc180
/

practice

Running

jc180 commited on 6 days ago

Commit

fa8a5a0

1 Parent(s): 35ebfd2

c

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,8 +1,46 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
 demo.launch()

 import gradio as gr
+import torch
+import torchaudio
+from transformers import AutoFeatureExtractor, ASTForAudioClassification
+model_name = "MIT/ast-finetuned-audioset-10-10-0.4593"
+model = ASTForAudioClassification.from_pretrained(model_name)
+feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
+device = torch.device("cpu")
+model.to(device)
+def classify_sound(file_path):
+    wv, sr = torchaudio.load(file_path)
+    # Convert to mono
+    if waveform.shape[0] > 1:
+        waveform = waveform.mean(dim=0, keepdim=True)
+    inputs = feature_extractor(
+        wv.squeeze().numpy(), sampling_rate=44100, return_tensors="pt"
+    )
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    probs = torch.softmax(logits, dim=-1)[0]
+    top5 = torch.topk(probs, k=5)
+    res = [
+        (model.config.id2label[idx.item()], round(prob.item(), 4))
+        for idx, prob in zip(top5.indices, top5.values)
+    ]
+    return dict(res)
+demo = gr.Interface(
+    fn=classify_sound,
+    inputs=gr.audio(source="upload", type="filepath"),
+    outputs=gr.Label(num_top_classes=5),
+    title="Audio Classification with AST",
+    description="Upload an audio clip (speech, music, ambient sound, etc.). Model: MIT AST fine-tuned on AudioSet (10 classes).",
+    live=False,
+)
 demo.launch()