GMM高斯混合模型及EM算法(matlab实现)

单元

%绘制男女生身高的GMM
clc
clear all
%男女生共取2000人,女生平均身高163,男声平均身高180
male=180+sqrt(10)*randn(1,1000);
%产生均值为180,方差为10的一个1*1000的随机数
female=163+sqrt(10)*randn(1,1000);
h=[female male];
%Step 1.首先根据经验来分别对男女生的均值、方差和权值进行初始化
mu1_first=170;sigma1_first=10;w1_first=0.7;%男生的
mu2_first=160;sigma2_first=10;w2_first=0.3;%以我们学校理工院校为例
outcome=[mu1_first,sigma1_first,w1_first,mu2_first,sigma2_first,w2_first];
max_iteration=1000;
for i_iter=1:max_iteration
    %Step 2. 
    %计算男的身高在男分布中的响应R1i
    %计算女的身高在男分布中的响应R2i
    h=[female male];%混合
    N=2000;
    R1i=zeros(1,N);
    R2i=zeros(1,N);
    for i=1:N
        p1=w1_first*pdf('norm',h(i),mu1_first,sigma1_first);
        p2=w2_first*pdf('norm',h(i),mu2_first,sigma2_first);
        %p1,p2权重*男女生的后验概率
       R1i(i)=p1/(p1+p2);
       R2i(i)=p2/(p1+p2);
    end
    %Step 3.
    %更新男、女生身高分布的期望mu
    s1=0;
    s2=0;
    for i=1:N
        s1=s1+R1i(i)*h(i);
        s2=s2+R2i(i)*h(i);
    end
    s11=sum(R1i);
    s22=sum(R2i);
    mu1_last=s1/s11;
    mu2_last=s2/s22;
    %Step 4.
    %更新男、女生身高分布的标准差sigma(一维)
    t1=0;
    t2=0;
    for i=1:N
        t1=t1+R1i(i)*(h(i)-mu1_last)^2;
        t2=t2+R2i(i)*(h(i)-mu2_last)^2;
    end
    t11=sum(R1i);
    t22=sum(R2i);
    sigma1_last=sqrt(t1/t11);
    sigma2_last=sqrt(t2/t22);
    %Step 5.
    %更新权值
    w1_last=s11/N;
    w2_last=s22/N;
    % 终止条件
    outcome_=[mu1_last,sigma1_last,w1_last,mu2_last,sigma2_last,w2_last];
    if norm(outcome_-outcome(end,:))<0.01
        break;
    end
    [mu1_first]=outcome_(1,1);
    [sigma1_first]=outcome_(1,2);
    [w1_first]=outcome_(1,3);
    [mu2_first]=outcome_(1,4);
    [sigma2_first]=outcome_(1,5);
    [w2_first]=outcome_(1,6);

    outcome=[outcome;outcome_];
end



%画出男女生的权重迭代历史
figure(3);
iteration=size(outcome,1);
x1=1:0.5:iteration;
y1=interp1(outcome(:,3),x1,'spline');
%interp1用法参考https://www.cnblogs.com/jiahuiyu/articles/4978005.html   
plot(y1,'linewidth',1.5);%画出男生的权重迭代历史
hold on;
grid on;
y2=interp1(outcome(:,6),x1,'spline');
plot(y2,'r','linewidth',1.5);%画出女生权重迭代历史
legend('男生权重变化','女生权重变化','location','northeast');%坐标轴设置,将标识框放置在图的左上角
title('Changes in weights of boys and girls with the number of iterations');
xlabel('Number of iterations');
ylabel('Weights');
axis([1 iteration 0 1]);
figure(4);
hold on;
%男生女生以及混合后身高的概率密度曲线
t=linspace(140,220,550);%500个
%女生的概率密度函数
yy2=normpdf(t,mu2_last,sigma2_last);
plot(t,yy2,'m','linewidth',1.5);
%男生的概率密度函数
yy1=normpdf(t,mu1_last,sigma1_last);
plot(t,yy1,'linewidth',1.5);
y3=w1_last*yy1+w2_last*yy2;
plot(t,y3,'k','linewidth',1.5);
legend('女生','男生','混合');
title('男生女生以及混合后身高的概率密度曲线');
xlabel('身高/cm');ylabel('概率');hold off;%坐标轴设置
hold off;
%画高斯混合模型的随迭代次数的变化而变化图形
figure(5)
plot(t,yy2,'--g','linewidth',1.5);
hold on;
plot(t,yy1,'linewidth',1.5);
title('高斯混合模型的随迭代次数的变化图形');
xlabel('身高/cm');
ylabel('概率密度');
grid on;
weights1=outcome(:,3);
weights2=outcome(:,6);
%
%将字符串基本信息显示在右上角
text(180,0.13,'当前男生权重W1: ','FontSize',14,'FontWeight','demi');%图中显示权重
text(180,0.117,'当前女生权重W2: ','FontSize',14,'FontWeight','demi');
c=colormap(lines(iteration));%定义times条不同颜色的线条
for i=1:iteration
      pause(0.2);
    %绘制迭代i次的图像,以不同颜色
    y4=weights1(i)*yy1+weights2(i)*yy2;%对两个高斯概率密度图像进行加权
    plt=plot(t,y4,'color',c(i,:),'linewidth',1.5);%绘画加权后的图像
    %weights里面的数值转换为字符串
    str1=num2str(weights1(i));
    str2=num2str(weights2(i));
    %权重跟着i变换
    tex1=text(210,0.13,str1,'FontSize',14,'FontWeight','demi','color','r');%显示当前权重值
    tex2=text(210,0.117,str2,'FontSize',14,'FontWeight','demi','color','r');
    %删除原来的,动态显示
    pause(0.4);
    if(iteration>i)
        delete(tex1);
        delete(tex2);
        delete(plt);
    end
end
hold off;

多元高斯

使用库


X = [mvnrnd(mu1,Sigma1,1000);mvnrnd(mu2,Sigma2,1000)]; % 2000 x 2
GMModel = fitgmdist(X,2); %fit GMM distribution

figure;
y = [zeros(1000,1);ones(1000,1)];
hold on;
fsurf (@(x1,x2)pdf(GMModel,[x1 x2]),[150 200 150 200]);
title('{\bf 3-D Fitted Gaussian Mixture}');
hold off;

figure;
y = [zeros(1000,1);ones(1000,1)];
h = gscatter(X(:,1),X(:,2),y);
hold on;
fcontour (@(x1,x2)pdf(GMModel,[x1 x2]),[150 200 150 200]);
axis([150 200 150 200]);
title('{\bf Fitted Gaussian Mixture Contours}');
legend(h,'Model 0','Model1')
hold off;

自己写

%绘制男女生身高的GMM
clc
clear all
close all

mu1_true = [180 180]; % 均指向量
sigma1_true = [16 0; 0 16]; % 协方差矩阵
x1 = mvnrnd(mu1_true, sigma1_true, 1000);

mu2_true = [163 163]; % 均指向量
sigma2_true = [25 0; 0 25]; % 协方差矩阵
x2 = mvnrnd(mu2_true, sigma2_true, 1000);

% 绘制二元正态分布散点图
x=[x1;x2];
scatter(x(:,1),x(:,2),'filled');
grid on;
%%
%Step 1.首先根据经验来分别对男女生的均值、方差和权值进行初始化
mu1_first=170;sigma1_first=10;w1_first=0.7;%男生的
mu2_first=160;sigma2_first=10;w2_first=0.3;%以我们学校理工院校为例
outcome=[mu1_first,sigma1_first,w1_first,mu2_first,sigma2_first,w2_first];
max_iteration=1000;
for i_iter=1:max_iteration
    %Step 2. 
    %计算男的身高在男分布中的响应R1i
    %计算女的身高在男分布中的响应R2i
    N=2000;
    R1i=zeros(1,N);
    R2i=zeros(1,N);
    for i=1:N
        p1=w1_first*mvnpdf(x(i,:),[mu1_first,mu1_first],[sigma1_first,0;0,sigma1_first]);
        p2=w2_first*mvnpdf(x(i,:),[mu2_first,mu2_first],[sigma2_first,0;0,sigma2_first]);
        %p1,p2权重*男女生的后验概率
       R1i(i)=p1/(p1+p2);
       R2i(i)=p2/(p1+p2);
    end
    %Step 3.
    %更新男、女生身高分布的期望mu
    s1=0;
    s2=0;
    for i=1:N
        s1=s1+R1i(i)*(x(i,1)+x(i,2));
        s2=s2+R2i(i)*(x(i,1)+x(i,2));
    end
    s11=sum(R1i);
    s22=sum(R2i);
    mu1_last=s1/s11/2;
    mu2_last=s2/s22/2;
    %Step 4.
    %更新男、女生身高分布的标准差sigma(一维)
    t1=0;
    t2=0;
    for i=1:N
        t1=t1+R1i(i)*((x(i,1)-mu1_last)^2+(x(i,2)-mu1_last)^2);
        t2=t2+R2i(i)*((x(i,1)-mu2_last)^2+(x(i,2)-mu2_last)^2);
    end
    t11=sum(R1i);
    t22=sum(R2i);
    sigma1_last=sqrt(t1/t11/2);
    sigma2_last=sqrt(t2/t22/2);
    %Step 5.
    %更新权值
    w1_last=s11/N;
    w2_last=s22/N;
    % 终止条件
    outcome_=[mu1_last,sigma1_last,w1_last,mu2_last,sigma2_last,w2_last];
    if norm(outcome_-outcome(end,:))<0.01
        break;
    end
    [mu1_first]=outcome_(1,1);
    [sigma1_first]=outcome_(1,2);
    [w1_first]=outcome_(1,3);
    [mu2_first]=outcome_(1,4);
    [sigma2_first]=outcome_(1,5);
    [w2_first]=outcome_(1,6);

    outcome=[outcome;outcome_];
end
mu1=[outcome(end,1) outcome(end,1)];
Sigma1=[outcome(end,2),0;0,outcome(end,2)];
mu2=[outcome(end,4) outcome(end,4)];
Sigma2=[outcome(end,5),0;0,outcome(end,5)];
figure
[X1,X2] = meshgrid(linspace(150,200,100)',linspace(150,200,100)');
X = [X1(:) X2(:)];
y = mvnpdf(X,mu1,Sigma1);
y = reshape(y,length(X2),length(X1));
contour(X1,X2,y)
hold on
y = mvnpdf(X,mu2,Sigma2);
y = reshape(y,length(X2),length(X1));
contour(X1,X2,y)
xlabel('x')
ylabel('y')

clc
clear all
close all
[X, L_true, ~,~,~]=getReal10RoomData(1);
% X=X(L_true<=2);
% L_true=L_true(L_true<=2);
K=max(L_true);
X = zscore(X);% 按照列标准化
N=size(X,1);
W=zeros(N,N);
x=[];
for i=1:N
    for j=1:N
%         W(i,j)=(X(i,:)*X(j,:)'/(norm(X(i,:))*norm(X(j,:))))^10;
%         if  isnan(W(i,j))
%             W(i,j)=0;
%         end
        W(i,j)=exp(-norm(X(i,:)-X(j,:),2));
%         if i==j
%             W(i,j)=0;
%         end
        if W(i,j)>0.5
            x=[x;[i,j]];
        end
    end
end



% mu1_true = [180 180]; % 均指向量
% sigma1_true = [16 0; 0 16]; % 协方差矩阵
% x1 = mvnrnd(mu1_true, sigma1_true, 1000);
% 
% mu2_true = [163 163]; % 均指向量
% sigma2_true = [25 0; 0 25]; % 协方差矩阵
% x2 = mvnrnd(mu2_true, sigma2_true, 1000);
% 
% % 绘制二元正态分布散点图
% x=[x1;x2];
% scatter(x(:,1),x(:,2),'filled');
% grid on;
%%
%Step 1.首先根据经验来分别对男女生的均值、方差和权值进行初始化

mu1_first=60;sigma1_first=10;w1_first=0.5;

mu2_first=70;sigma2_first=10;w2_first=0.5;%以我们学校理工院校为例
outcome=[mu1_first,sigma1_first,w1_first,mu2_first,sigma2_first,w2_first];
max_iteration=1000;
N=size(x,1);
for i_iter=1:max_iteration
    %Step 2. 
    %计算男的身高在男分布中的响应R1i
    %计算女的身高在男分布中的响应R2i
    
    R1i=zeros(1,N);
    R2i=zeros(1,N);
    for i=1:N
        p1=w1_first*mvnpdf(x(i,:),[mu1_first,mu1_first],[sigma1_first,0;0,sigma1_first]);
        p2=w2_first*mvnpdf(x(i,:),[mu2_first,mu2_first],[sigma2_first,0;0,sigma2_first]);
        %p1,p2权重*男女生的后验概率
       R1i(i)=p1/(p1+p2);
       R2i(i)=p2/(p1+p2);
    end
    %Step 3.
    %更新男、女生身高分布的期望mu
    s1=0;
    s2=0;
    for i=1:N
        s1=s1+R1i(i)*(x(i,1)+x(i,2));
        s2=s2+R2i(i)*(x(i,1)+x(i,2));
    end
    s11=sum(R1i);
    s22=sum(R2i);
    mu1_last=s1/s11/2;
    mu2_last=s2/s22/2;
    %Step 4.
    %更新男、女生身高分布的标准差sigma(一维)
    t1=0;
    t2=0;
    for i=1:N
        t1=t1+R1i(i)*((x(i,1)-mu1_last)^2+(x(i,2)-mu1_last)^2);
        t2=t2+R2i(i)*((x(i,1)-mu2_last)^2+(x(i,2)-mu2_last)^2);
    end
    t11=sum(R1i);
    t22=sum(R2i);
    sigma1_last=sqrt(t1/t11/2);
    sigma2_last=sqrt(t2/t22/2);
    %Step 5.
    %更新权值
    w1_last=s11/N;
    w2_last=s22/N;
    % 终止条件
    outcome_=[mu1_last,sigma1_last,w1_last,mu2_last,sigma2_last,w2_last];
    if norm(outcome_-outcome(end,:))<0.001
        break;
    end
    [mu1_first]=outcome_(1,1);
    [sigma1_first]=outcome_(1,2);
    [w1_first]=outcome_(1,3);
    [mu2_first]=outcome_(1,4);
    [sigma2_first]=outcome_(1,5);
    [w2_first]=outcome_(1,6);

    outcome=[outcome;outcome_];
end
mu1=[outcome(end,1) outcome(end,1)];
Sigma1=[outcome(end,2),0;0,outcome(end,2)];
mu2=[outcome(end,4) outcome(end,4)];
Sigma2=[outcome(end,5),0;0,outcome(end,5)];
figure
[X1,X2] = meshgrid(linspace(mu1(1)-100,mu1(1)+100,100)',linspace(mu1(1)-100,mu1(1)+100,100)');
X = [X1(:) X2(:)];
y = mvnpdf(X,mu1,Sigma1);
y = reshape(y,length(X2),length(X1));
contour(X1,X2,y)
hold on
y = mvnpdf(X,mu2,Sigma2);
y = reshape(y,length(X2),length(X1));
contour(X1,X2,y)
xlabel('x')
ylabel('y')
axis([0,size(W,1),0,size(W,1)]);

版权声明:本文为博主北海北_CrazyZheng原创文章,版权归属原作者,如果侵权,请联系我们删除!

原文链接:https://blog.csdn.net/lusics/article/details/123202422

共计人评分,平均

到目前为止还没有投票!成为第一位评论此文章。

(0)
扎眼的阳光的头像扎眼的阳光普通用户
上一篇 2022年3月2日 下午4:28
下一篇 2022年3月2日 下午4:43

相关推荐