Spaces:

Har3ish
/

raga

Runtime error

App Files Files Community

raga / app.py

Har3ish

Update app.py

8f0ca83 verified 4 months ago

raw

history blame

2.61 kB

	# app.py
	import gradio as gr
	import pandas as pd
	import numpy as np
	import librosa
	import joblib
	import tensorflow as tf
	from keras.models import load_model
	from transformers import AutoTokenizer, TFAutoModel

	# ====================
	# 1. Load Model and Assets
	# ====================
	model = load_model("raga_predictor_model.h5")
	scaler = joblib.load("scaler.pkl")
	encoder = joblib.load("label_encoder.pkl")

	# Load tokenizer and BERT model directly from Hugging Face
	tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBERTv2-MLM-only")
	bert_model = TFAutoModel.from_pretrained("ai4bharat/IndicBERTv2-MLM-only")

	# Load metadata
	meta = pd.read_csv("raga_metadata.csv")
	raga_descriptions = dict(zip(meta['raga'], meta['description']))

	# ====================
	# 2. Define Utility Functions
	# ====================
	def extract_features(file_path):
	y, sr = librosa.load(file_path, sr=22050)
	features = {
	"chroma_stft": np.mean(librosa.feature.chroma_stft(y=y, sr=sr)),
	"spec_cent": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
	}
	mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=18)
	for i in range(18):
	features[f"mfcc{i+1}"] = np.mean(mfccs[i])
	return pd.DataFrame([features])

	def tokenize_description(description_text):
	desc_tok = tokenizer(description_text, padding=True, truncation=True, max_length=64, return_tensors="tf")
	desc_embed = bert_model(desc_tok['input_ids'], attention_mask=desc_tok['attention_mask'])[0][:, 0, :]
	return desc_embed

	def predict_raga(audio_file):
	# Extract features
	audio_df = extract_features(audio_file.name)
	audio_scaled = scaler.transform(audio_df)
	audio_lstm_input = audio_scaled.reshape((1, 1, audio_scaled.shape[1]))

	# Use a dummy description
	description_text = ""

	# Tokenize dummy description
	desc_embed = tokenize_description([description_text])

	# Predict
	pred = model.predict([audio_lstm_input, desc_embed])
	raga_pred = encoder.inverse_transform([np.argmax(pred)])[0]

	# Get description
	description = raga_descriptions.get(raga_pred, "No description available.")

	return f"🎵 Predicted Raga: {raga_pred}\n\n📝 Description:\n{description}"

	# ====================
	# 3. Gradio Interface
	# ====================
	title = "🎶 Raga Prediction App"
	description = "Upload an Indian classical music clip, and I will predict the Raga for you!"

	interface = gr.Interface(
	fn=predict_raga,
	inputs=gr.Audio(type="file", label="Upload Audio File"),
	outputs="text",
	title=title,
	description=description,
	)

	interface.launch()