poniedziałek, 4 lipca 2011

Plotting PCA results in ggplot2

Default PCA plots in R are disgusting. ggplot2 comes to the rescue.


First, let's write some data generating functions that will be useful for PCA:
getVector <- function(n = 20, dims = 2) {
  out <- vector()
  breaks <- round((n/dims)*(1:dims))
  start <- 1  
  for(i in 1:dims) {
    num <- rnorm(1, 10, 1)
    for(j in start:(breaks[i])) {
      out[j] <- num + rnorm(1, 0, 0.2)
    }
    start <- breaks[i] + 1
  } 
  out
}

getData <- function(rows = 10, cols = 20, groups = 2, dims = 2, distort = 0.5) {
  out <- matrix(data = 10, nrow = rows, ncol = cols)
  ### make groups ###
  breaks <- round((rows/groups)*(1:groups))
  start <- 1  
  for(i in 1:groups) {
      vector <- getVector(n = cols, dim = 2)
    for(j in start:(breaks[i])) {
      out[j,] <- vector
    }
    start <- breaks[i] + 1
  }
  ### make error ###
  for(i in 1:rows) {
    out[i,] <- out[i,] + rnorm(n = cols, mean = 0, sd = distort)
  }
  ### return ###
  rownames(out) <- paste("Row ", 1:rows, sep = "")
  colnames(out) <- paste("Col ", 1:cols, sep = "")
  out
}

getGroup <- function(rows = 10, groups = 2) {
  out <- vector()
  breaks <- round((rows/groups)*(1:groups))
  start <- 1  
  for(i in 1:groups) {
    for(j in start:(breaks[i])) {
      out[j] <- i
    }
    start <- breaks[i] + 1
  }
  out <- as.factor(out)
  out
}

Let's write PCA plotting function:
makePcaPlot <- function(x = getData(), group = getGroup(), title = "") {
  require(ggplot2)
  require(RColorBrewer)
  data <- x
  data <- t(apply(data, 1, scale))
  rownames(data) <- rownames(x)
  colnames(data) <- colnames(x)
  mydata <- t(data)  
  groupFactor <- group
  mydata.pca <- prcomp(mydata, retx=TRUE, center=TRUE, scale.=TRUE)  

  percent <- round((((mydata.pca$sdev)^2 / sum(mydata.pca$sdev^2))*100)[1:2])
  loadings <- mydata.pca$rotation
  rownames(loadings) <- colnames(mydata)
  scores <- mydata.pca$x
  cex = 3
  group <- as.character(groupFactor)
  what <- groupFactor == "1"; group[what] <- "One"
  what <- groupFactor == "2"; group[what] <- "Two"
  group <- factor(group)
  group <- factor(group, levels=c("Two", "One"), ordered=TRUE)
  colors = sample(1:11, size = length(levels(group)))


  PCA1 <- scores[,1]
  PCA2 <- scores[,2]
  base_size = 10

  qplot(PCA2, PCA1, geom="blank", main = title, xlab = paste("PCA2 (", percent[2], "%)", sep = ""), ylab = paste("PCA1 (", percent[1], "%)", sep = "")) + geom_point(aes(colour = group), size = 6, alpha = 3/4) + scale_colour_manual(values=brewer.pal(11, "RdYlGn")[11:1][colors]) + opts(
      axis.text.x = theme_text(size = base_size * 1.3 , lineheight = 0.9, colour = "grey50", hjust = 1, angle = 90),
      axis.text.y =       theme_text(size = base_size * 1.3, lineheight = 0.9, colour = "grey50", hjust = 1)
      )
}
Finally, let's generate plot:
makePcaPlot(getData(30,4,2,distort = 0.7), getGroup(30,2))

Brak komentarzy:

Prześlij komentarz