Spaces:
Sleeping
Sleeping
File size: 1,881 Bytes
c9d400e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import opensmile
import joblib
import wave
import datetime
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from IPython.display import Javascript, Audio
from base64 import b64decode
import gradio as gr
model_path = "RF_emobase_20_model_top1_score0.6863_20231207_1537.joblib"
model = joblib.load(model_path)
def extract_features(audio_path):
smile = opensmile.Smile(
#feature_set=opensmile.FeatureSet.GeMAPSv01b,
feature_set=opensmile.FeatureSet.emobase,
feature_level=opensmile.FeatureLevel.Functionals,
)
feature_df = smile.process_files(audio_path)
output_features = ['F0env_sma_de_amean', 'lspFreq_sma_de[5]_linregc1', 'mfcc_sma[3]_linregc1', 'lspFreq_sma[6]_quartile1', 'lspFreq_sma_de[6]_linregerrQ', 'lspFreq_sma_de[6]_maxPos', 'lspFreq_sma_de[6]_iqr2-3', 'lspFreq_sma_de[7]_minPos', 'lspFreq_sma_de[4]_linregc1', 'lspFreq_sma_de[6]_linregerrA', 'lspFreq_sma_de[6]_linregc2', 'lspFreq_sma[5]_amean', 'lspFreq_sma_de[6]_iqr1-2', 'mfcc_sma[1]_minPos', 'mfcc_sma[4]_linregc1', 'mfcc_sma[9]_iqr2-3', 'lspFreq_sma[5]_kurtosis', 'lspFreq_sma_de[3]_skewness', 'mfcc_sma[3]_minPos', 'mfcc_sma[12]_linregc1']
df = pd.DataFrame(feature_df.values[0], index=feature_df.columns)
df = df[df.index.isin(output_features)]
df = df.T
scaler = StandardScaler()
feature = scaler.fit_transform(df)
print(df.shape)
return feature
def main(input):
# openSMILEで特徴量抽出
feature_vector = extract_features([input])
# ロードしたモデルで推論
prediction = model.predict(feature_vector)
#print(f"Prediction: {prediction}")
return prediction
gr.Interface(
title = 'Question Classifier Model',
fn = main,
inputs=[
gr.Audio(sources=["microphone","upload"], type="filepath")
],
outputs=[
"textbox"
],
live=True
).launch(debug=True) |