|
import torch |
|
import joblib |
|
import librosa |
|
import numpy as np |
|
from torch import nn |
|
from transformers import AutoModel |
|
|
|
class VoiceRecognitionModel(nn.Module): |
|
def __init__(self, num_classes): |
|
super().__init__() |
|
|
|
self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1) |
|
|
|
|
|
def forward(self, x): |
|
|
|
return x |
|
|
|
def extract_features(file_path, max_pad_len=174): |
|
|
|
pass |
|
|
|
def pipeline(): |
|
|
|
model = VoiceRecognitionModel(num_classes=7) |
|
model.load_state_dict(torch.load("voice_recognition_model.pth")) |
|
model.eval() |
|
|
|
label_encoder = joblib.load("label_encoder.joblib") |
|
feature_params = joblib.load("feature_params.joblib") |
|
|
|
return model, label_encoder, feature_params |