数据加载
数据提取方式,利用前面的文章,提取MFCC,作为特征结果,并且取定长的语音长度为特征,保证输入的长度一致
如果需要提取不定长度的可以直接使用PS 或者其他工具直接提取MFCC
这里提到一下,time=300意味着,是300帧,也就是定长3s+15ms的长度 一帧的帧移动是10ms 也就是10*300+25-10
def mfcc_extract(file_name):
data, time, rate = mfcc_read_file(filename=file_name)
#提取到了data个,然后开始对时长判断
mel_spec = ps.logfbank(data, rate, nfilt=40)
time = mel_spec.shape[0]
if time <= 300:
part = mel_spec
part = np.pad(part, ((0, 300 - part.shape[0]), (0, 0)), 'constant',
constant_values=0)
mfcc_feature = dct(part, type=2, axis=1, norm='ortho')[:,:13]
else:
begin = 0
end = 300
part = mel_spec[begin:end, :]
mfcc_feature = dct(part, type=2, axis=1, norm='ortho')[:,:13]
return mfcc_feature
开始加载存好的feature
固定格式有的需要tensor 有的不需要,所以依情况而定
# -*- coding:utf-8 -*-
# @Time : 2022/2/28 21:12
# @Author: R
# @File : IEMPCAP_dataset.py
import torch
import torch.utils.data.dataset as Dataset
import torch.utils.data.dataloader as DataLoader
import numpy as np
import pickle
import joblib
file_name = 'four_emotion/four_label_mfcc.pkl'
with open(file_name, 'rb') as file:
print(file_name)
iemocap_data = joblib.load(file)
def str_to_float(ensor):
narry_float = []
for i in np.array(ensor):
narry_float.append(float(i))
return narry_float
class IEMOCAP_Dataset(Dataset.Dataset):
#初始化,定义数据内容和标签
def __init__(self, data_Label):
self.Data = data_Label[0]
self.Label = data_Label[1]
#返回数据集大小
def __len__(self):
return len(self.Data)
#得到数据内容和标签
def __getitem__(self, index):
data = torch.Tensor(self.Data[index])
return data, self.Label[index]
def get_data(source_data):
data_set = []
data_label = []
for i in enumerate (source_data):
temp_x = i[1][0]
temp_x = str_to_float(temp_x)
if i[1][1] =='fru':
pass
else:
data_set.append(np.array(temp_x))
label_index_mapping = {'hap': 0, 'sad': 1 ,'ang':2,'neu': 3}
data_label.append(label_index_mapping[i[1][1]])
return data_set,data_label
def ge_dataset():
Data,Label = get_data(iemocap_data)
data_array = np.array(Data)
Label = np.array(Label)
return data_array,Label
模型训练
模型训练,将加载的数据直接放入模型中
data_mfcc, data_label = IEMPCAP_dataset.ge_dataset()
array_data_mfcc = Normalizer().fit_transform(data_mfcc)
X_train, X_test, Y_train, Y_test = train_test_split(data_mfcc, data_label, test_size=0.2,shuffle=True)
clf = svm.SVC(decision_function_shape='ovo',kernel='rbf')
#clf.fit(array_data_mfcc[:4425],data_label[:4425])
clf.fit(X_train,Y_train)
预测结果
pre_ = clf.predict(X_test)
train_pre = clf.predict(X_train)
print("训练准确率:",accuracy_score(train_pre,Y_train))
print("测试准确率:",accuracy_score(pre_,Y_test))
SVM的主要的功能,是将数据都输入到一个超平面,然后利用超向量来进行分割,来实现分类,得到结果,所以,这里的数据在它看来都是一个样子,但是在神经网络当中,这些可能是不同的,比如Attention,可能会注意到某个模块,有着不一样的权重数值,都是可能的
文章出处登录后可见!
已经登录?立即刷新