ttt-tkmr's picture
Update app.py
cbbe902
raw
history blame
1.83 kB
import opensmile
import joblib
import wave
import datetime
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from base64 import b64decode
import gradio as gr
model_path = "RF_emobase_20_model_top1_score0.6863_20231207_1537.joblib"
model = joblib.load(model_path)
def extract_features(audio_path):
smile = opensmile.Smile(
#feature_set=opensmile.FeatureSet.GeMAPSv01b,
feature_set=opensmile.FeatureSet.emobase,
feature_level=opensmile.FeatureLevel.Functionals,
)
feature_df = smile.process_files(audio_path)
output_features = ['F0env_sma_de_amean', 'lspFreq_sma_de[5]_linregc1', 'mfcc_sma[3]_linregc1', 'lspFreq_sma[6]_quartile1', 'lspFreq_sma_de[6]_linregerrQ', 'lspFreq_sma_de[6]_maxPos', 'lspFreq_sma_de[6]_iqr2-3', 'lspFreq_sma_de[7]_minPos', 'lspFreq_sma_de[4]_linregc1', 'lspFreq_sma_de[6]_linregerrA', 'lspFreq_sma_de[6]_linregc2', 'lspFreq_sma[5]_amean', 'lspFreq_sma_de[6]_iqr1-2', 'mfcc_sma[1]_minPos', 'mfcc_sma[4]_linregc1', 'mfcc_sma[9]_iqr2-3', 'lspFreq_sma[5]_kurtosis', 'lspFreq_sma_de[3]_skewness', 'mfcc_sma[3]_minPos', 'mfcc_sma[12]_linregc1']
df = pd.DataFrame(feature_df.values[0], index=feature_df.columns)
df = df[df.index.isin(output_features)]
df = df.T
scaler = StandardScaler()
feature = scaler.fit_transform(df)
print(df.shape)
return feature
def main(input):
# openSMILEで特徴量抽出
feature_vector = extract_features([input])
# ロードしたモデルで推論
prediction = model.predict(feature_vector)
#print(f"Prediction: {prediction}")
return prediction
gr.Interface(
title = 'Question Classifier Model',
fn = main,
inputs=[
gr.Audio(sources=["microphone","upload"], type="filepath")
],
outputs=[
"textbox"
],
live=True
).launch(debug=True)