Har3ish commited on
Commit
8f0ca83
·
verified ·
1 Parent(s): d8647ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -74
app.py CHANGED
@@ -1,74 +1,80 @@
1
- # ======================
2
- # app.py
3
- # ======================
4
-
5
- from flask import Flask, request, jsonify
6
- import tensorflow as tf
7
- import numpy as np
8
- import pandas as pd
9
- import librosa
10
- from transformers import AutoTokenizer, TFAutoModel
11
-
12
- # Load saved files
13
- model = tf.keras.models.load_model("model.h5")
14
- scaler = pd.read_pickle("scaler.pkl")
15
- encoder = pd.read_pickle("label_encoder.pkl")
16
- meta = pd.read_excel("raga_metadata.xlsx")
17
-
18
- # Load IndicBERT model
19
- tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBERTv2-MLM-only")
20
- bert_model = TFAutoModel.from_pretrained("ai4bharat/IndicBERTv2-MLM-only", from_pt=True)
21
-
22
- app = Flask(__name__)
23
-
24
- def extract_features(file_path):
25
- y, sr = librosa.load(file_path, sr=22050)
26
- features = {
27
- "chroma_stft": np.mean(librosa.feature.chroma_stft(y=y, sr=sr)),
28
- "spec_cent": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
29
- }
30
- mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=18)
31
- for i in range(18):
32
- features[f"mfcc{i+1}"] = np.mean(mfccs[i])
33
- return pd.DataFrame([features])
34
-
35
- def predict_raga(audio_df, raga_name):
36
- audio_scaled = scaler.transform(audio_df)
37
- audio_lstm_input = audio_scaled.reshape((1, 1, audio_scaled.shape[1]))
38
-
39
- # Get description for this raga
40
- description_text = meta[meta['raga'] == raga_name]['description'].values
41
- if len(description_text) == 0:
42
- description_text = [""]
43
-
44
- desc_tok = tokenizer(description_text.tolist(), padding=True, truncation=True, max_length=64, return_tensors="tf")
45
- desc_embed = bert_model(desc_tok['input_ids'], attention_mask=desc_tok['attention_mask'])[0][:, 0, :]
46
-
47
- pred = model.predict([audio_lstm_input, desc_embed])
48
- return encoder.inverse_transform([np.argmax(pred)])[0]
49
-
50
- @app.route("/")
51
- def home():
52
- return "🎶 Raga Prediction API is Live!"
53
-
54
- @app.route("/predict", methods=["POST"])
55
- def predict():
56
- try:
57
- audio_file = request.files['audio']
58
- raga_name = request.form['raga_name']
59
-
60
- temp_audio_path = "temp_audio.wav"
61
- audio_file.save(temp_audio_path)
62
-
63
- features = extract_features(temp_audio_path)
64
- predicted_raga = predict_raga(features, raga_name)
65
-
66
- return jsonify({
67
- "predicted_raga": predicted_raga
68
- })
69
-
70
- except Exception as e:
71
- return jsonify({"error": str(e)})
72
-
73
- if __name__ == "__main__":
74
- app.run(host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import numpy as np
5
+ import librosa
6
+ import joblib
7
+ import tensorflow as tf
8
+ from keras.models import load_model
9
+ from transformers import AutoTokenizer, TFAutoModel
10
+
11
+ # ====================
12
+ # 1. Load Model and Assets
13
+ # ====================
14
+ model = load_model("raga_predictor_model.h5")
15
+ scaler = joblib.load("scaler.pkl")
16
+ encoder = joblib.load("label_encoder.pkl")
17
+
18
+ # Load tokenizer and BERT model directly from Hugging Face
19
+ tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBERTv2-MLM-only")
20
+ bert_model = TFAutoModel.from_pretrained("ai4bharat/IndicBERTv2-MLM-only")
21
+
22
+ # Load metadata
23
+ meta = pd.read_csv("raga_metadata.csv")
24
+ raga_descriptions = dict(zip(meta['raga'], meta['description']))
25
+
26
+ # ====================
27
+ # 2. Define Utility Functions
28
+ # ====================
29
+ def extract_features(file_path):
30
+ y, sr = librosa.load(file_path, sr=22050)
31
+ features = {
32
+ "chroma_stft": np.mean(librosa.feature.chroma_stft(y=y, sr=sr)),
33
+ "spec_cent": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
34
+ }
35
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=18)
36
+ for i in range(18):
37
+ features[f"mfcc{i+1}"] = np.mean(mfccs[i])
38
+ return pd.DataFrame([features])
39
+
40
+ def tokenize_description(description_text):
41
+ desc_tok = tokenizer(description_text, padding=True, truncation=True, max_length=64, return_tensors="tf")
42
+ desc_embed = bert_model(desc_tok['input_ids'], attention_mask=desc_tok['attention_mask'])[0][:, 0, :]
43
+ return desc_embed
44
+
45
+ def predict_raga(audio_file):
46
+ # Extract features
47
+ audio_df = extract_features(audio_file.name)
48
+ audio_scaled = scaler.transform(audio_df)
49
+ audio_lstm_input = audio_scaled.reshape((1, 1, audio_scaled.shape[1]))
50
+
51
+ # Use a dummy description
52
+ description_text = ""
53
+
54
+ # Tokenize dummy description
55
+ desc_embed = tokenize_description([description_text])
56
+
57
+ # Predict
58
+ pred = model.predict([audio_lstm_input, desc_embed])
59
+ raga_pred = encoder.inverse_transform([np.argmax(pred)])[0]
60
+
61
+ # Get description
62
+ description = raga_descriptions.get(raga_pred, "No description available.")
63
+
64
+ return f"🎵 Predicted Raga: {raga_pred}\n\n📝 Description:\n{description}"
65
+
66
+ # ====================
67
+ # 3. Gradio Interface
68
+ # ====================
69
+ title = "🎶 Raga Prediction App"
70
+ description = "Upload an Indian classical music clip, and I will predict the Raga for you!"
71
+
72
+ interface = gr.Interface(
73
+ fn=predict_raga,
74
+ inputs=gr.Audio(type="file", label="Upload Audio File"),
75
+ outputs="text",
76
+ title=title,
77
+ description=description,
78
+ )
79
+
80
+ interface.launch()