Har3ish commited on
Commit
df24f56
·
verified ·
1 Parent(s): 528312b

Upload 6 files

Browse files
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ======================
2
+ # app.py
3
+ # ======================
4
+
5
+ from flask import Flask, request, jsonify
6
+ import tensorflow as tf
7
+ import numpy as np
8
+ import pandas as pd
9
+ import librosa
10
+ from transformers import AutoTokenizer, TFAutoModel
11
+
12
+ # Load saved files
13
+ model = tf.keras.models.load_model("model.h5")
14
+ scaler = pd.read_pickle("scaler.pkl")
15
+ encoder = pd.read_pickle("label_encoder.pkl")
16
+ meta = pd.read_excel("raga_metadata.xlsx")
17
+
18
+ # Load IndicBERT model
19
+ tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicBERTv2-MLM-only")
20
+ bert_model = TFAutoModel.from_pretrained("ai4bharat/IndicBERTv2-MLM-only", from_pt=True)
21
+
22
+ app = Flask(__name__)
23
+
24
+ def extract_features(file_path):
25
+ y, sr = librosa.load(file_path, sr=22050)
26
+ features = {
27
+ "chroma_stft": np.mean(librosa.feature.chroma_stft(y=y, sr=sr)),
28
+ "spec_cent": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
29
+ }
30
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=18)
31
+ for i in range(18):
32
+ features[f"mfcc{i+1}"] = np.mean(mfccs[i])
33
+ return pd.DataFrame([features])
34
+
35
+ def predict_raga(audio_df, raga_name):
36
+ audio_scaled = scaler.transform(audio_df)
37
+ audio_lstm_input = audio_scaled.reshape((1, 1, audio_scaled.shape[1]))
38
+
39
+ # Get description for this raga
40
+ description_text = meta[meta['raga'] == raga_name]['description'].values
41
+ if len(description_text) == 0:
42
+ description_text = [""]
43
+
44
+ desc_tok = tokenizer(description_text.tolist(), padding=True, truncation=True, max_length=64, return_tensors="tf")
45
+ desc_embed = bert_model(desc_tok['input_ids'], attention_mask=desc_tok['attention_mask'])[0][:, 0, :]
46
+
47
+ pred = model.predict([audio_lstm_input, desc_embed])
48
+ return encoder.inverse_transform([np.argmax(pred)])[0]
49
+
50
+ @app.route("/")
51
+ def home():
52
+ return "🎶 Raga Prediction API is Live!"
53
+
54
+ @app.route("/predict", methods=["POST"])
55
+ def predict():
56
+ try:
57
+ audio_file = request.files['audio']
58
+ raga_name = request.form['raga_name']
59
+
60
+ temp_audio_path = "temp_audio.wav"
61
+ audio_file.save(temp_audio_path)
62
+
63
+ features = extract_features(temp_audio_path)
64
+ predicted_raga = predict_raga(features, raga_name)
65
+
66
+ return jsonify({
67
+ "predicted_raga": predicted_raga
68
+ })
69
+
70
+ except Exception as e:
71
+ return jsonify({"error": str(e)})
72
+
73
+ if __name__ == "__main__":
74
+ app.run(host="0.0.0.0", port=7860)
label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9527a080b81de4f169669257ca18c6a7240a235f5e18c0a1c5d09414b89f1afa
3
+ size 1035
raga_metadata.xlsx ADDED
Binary file (21.1 kB). View file
 
raga_predictor_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12cfb239bef9460e0c13f0a1466b36f058fa3685d640d4b8a0aa2b863fa20ad5
3
+ size 3708664
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tensorflow
2
+ keras
3
+ transformers
4
+ scikit-learn
5
+ librosa
6
+ lime
7
+ pandas
8
+ numpy
9
+ matplotlib
10
+ openpyxl
11
+ flask
scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cb12ac67332836dd372b4ddff48648ac39e9e46217da55e32ee10c63d0ad2e1
3
+ size 1655