# gaussian mixture modeling with EM priorp <- 0.6 m1 <- 0 m2 <- 2 s1 <- 1 s2 <- 0.707 # generate some data with 2 classes N <- 1000 cl <- rbinom(N, 1, priorp) x <- cl*rnorm(N, m1, s1) + (1-cl)*rnorm(N, m2, s2) # the e-step, compute posterior probabilities from density estep <- function(x, theta, K = 2) { d <- matrix(0.0, length(x), K) for (k in 1:K) { d[,k] <- dnorm(x, theta$m[k], theta$s[k]) } t(apply(d, 1, function(x) x/sum(x))) } # the m-step, compute parameters using posterior probability mstep <- function(x, postp, K = 2) { s <- numeric(K) m <- numeric(K) for (k in 1:K) { m[k] <- weighted.mean(x, w = postp[,k]) s[k] <- sqrt(weighted.mean((x - m[k])^2, w = postp[,k])) } return(list(m = m, s = s)) } # initial values theta <- list( m = c(0, 2), s = c(0.5, 0.5) ) # run EM for (i in 1:100) { postp <- estep(x, theta, K = 2) theta <- mstep(x, postp, K = 2) } # plot dmix <- function(x, postp, theta) { priorp <- colMeans(postp) d <- 0 for (k in 1:length(priorp)) { d <- d + priorp[k]*dnorm(x, theta$m[k], theta$s[k]) } d } hist(x, freq = FALSE, breaks = "FD") curve(dmix(x, postp, theta), add = TRUE)