|
|
|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
import librosa |
|
import joblib |
|
import tensorflow as tf |
|
from keras.models import load_model |
|
from transformers import AutoTokenizer, TFAutoModel |
|
|
|
|
|
|
|
|
|
model = load_model("raga_predictor_model.h5") |
|
scaler = joblib.load("scaler.pkl") |
|
encoder = joblib.load("label_encoder.pkl") |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBERTv2-MLM-only") |
|
bert_model = TFAutoModel.from_pretrained("ai4bharat/IndicBERTv2-MLM-only") |
|
|
|
|
|
meta = pd.read_csv("raga_metadata.csv") |
|
raga_descriptions = dict(zip(meta['raga'], meta['description'])) |
|
|
|
|
|
|
|
|
|
def extract_features(file_path): |
|
y, sr = librosa.load(file_path, sr=22050) |
|
features = { |
|
"chroma_stft": np.mean(librosa.feature.chroma_stft(y=y, sr=sr)), |
|
"spec_cent": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)), |
|
} |
|
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=18) |
|
for i in range(18): |
|
features[f"mfcc{i+1}"] = np.mean(mfccs[i]) |
|
return pd.DataFrame([features]) |
|
|
|
def tokenize_description(description_text): |
|
desc_tok = tokenizer(description_text, padding=True, truncation=True, max_length=64, return_tensors="tf") |
|
desc_embed = bert_model(desc_tok['input_ids'], attention_mask=desc_tok['attention_mask'])[0][:, 0, :] |
|
return desc_embed |
|
|
|
def predict_raga(audio_file): |
|
|
|
audio_df = extract_features(audio_file.name) |
|
audio_scaled = scaler.transform(audio_df) |
|
audio_lstm_input = audio_scaled.reshape((1, 1, audio_scaled.shape[1])) |
|
|
|
|
|
description_text = "" |
|
|
|
|
|
desc_embed = tokenize_description([description_text]) |
|
|
|
|
|
pred = model.predict([audio_lstm_input, desc_embed]) |
|
raga_pred = encoder.inverse_transform([np.argmax(pred)])[0] |
|
|
|
|
|
description = raga_descriptions.get(raga_pred, "No description available.") |
|
|
|
return f"π΅ Predicted Raga: {raga_pred}\n\nπ Description:\n{description}" |
|
|
|
|
|
|
|
|
|
title = "πΆ Raga Prediction App" |
|
description = "Upload an Indian classical music clip, and I will predict the Raga for you!" |
|
|
|
interface = gr.Interface( |
|
fn=predict_raga, |
|
inputs=gr.Audio(type="file", label="Upload Audio File"), |
|
outputs="text", |
|
title=title, |
|
description=description, |
|
) |
|
|
|
interface.launch() |
|
|