niedziela, 31 lipca 2011

Initialization of github repository

sudo apt-get install git-core git-gui git-doc
ssh -T git@github.com
git config --global user.name "John Doe"
git config --global user.email "doe.john@gmail.com"
git config --global github.user jodoe
git config --global github.token 12345yourtokenabcdef
git init
git add .
git commit -m 'Initial commit'
git remote add origin git@github.com:jodoe/jodoeproject.git
git push -u origin master

[solved] Cassandra TApplicationError type: 6

Today I have got strange error while using cassandra:

me.prettyprint.hector.api.exceptions.HCassandraInternalException:
Cassandra encountered an internal error processing this request:
TApplicationError type: 6 message:Internal error processing
batch_mutate

It was because I am using OrderPreservingPartitioner with IntegerType as key validator.
Trying to insert key over 127 gives above error.
This partitioner sholud be used with one of String types (e.g. utf8).

Sollution: ByteOrderedPartitioner.

środa, 6 lipca 2011

[solved] Missing artifact hector-core

Another trap...

While trying to add hector-core dependency through the m2eclipse I got this error:

Missing artifact me.prettyprint:hector-core:bundle:0.8.0-1:compile

Hint: this is not a bundle...

40 minutes is lost...

wtorek, 5 lipca 2011

Plotting ROC curves in ggplot2

Default ROC curves in R are disgusting. ggplot2 comes to the rescue.


First, let's write some data generating function that will be useful for ROC:
getExampleDataForROC <- function(len = 100, distort = 1) {
  half <- round(len/2)
  ### Group ###
  group <- vector()
  group[1:half] <- "Disease"
  group[(half + 1):len] <- "Control"
  group <- as.factor(group)
  ### Score ###
  score <- vector()
  score[1:half] <- 1
  score[(half + 1):len] <- -1
  for (i in 1:len)
    score[i] <- score[i] + rnorm(1, 0, distort)
  order <- order(score)
  data.frame(pos = score[order], group = group[order])
}

Let's write ROC plotting functions:
getPointsForROC <- function(len = 100, distort = 1) {
  basal <- getExampleDataForROC(len, distort)
  tp <- vector(); tn <-vector(); fp <-vector(); fn <- vector()
  tpr <- vector(); fpr <- vector()
  acc <- vector(); spc <- vector()
  len <- dim(basal)[1]
  for(i in 1:len-1) {
    tp[i] <- sum(basal[(i+1):len,2] == "Disease")
    tn[i] <- sum(basal[1:i,2] == "Control")
    fp[i] <- sum(basal[(i+1):len,2] == "Control")
    fn[i] <- sum(basal[1:i,2] == "Disease")
    tpr[i] <- tp[i] / (tp[i] + fn[i])
    fpr[i] <- fp[i] / (fp[i] + tn[i])
    acc[i] <- (tp[i] + tn[i]) / ((tp[i] + fn[i]) +  (fp[i] + tn[i]))
    spc[i] <- 1 - fpr[i]
  }
  points <- (cbind(fpr,tpr))[(len-1):1,]
  points <- rbind(points, c(1,1))
  return(points)
} 


plotROC <- function(len = 100) {
  points.part1 <- getPointsForROC(len, 1)
  desc.part1 <- rep("Distort = 1",len)
  points.part2 <- getPointsForROC(len, 1.5)
  desc.part2 <- rep("Distort = 1.5",len)
  points.part3 <- getPointsForROC(len, 2)
  desc.part3 <- rep("Distort = 2",len)
  points.part4 <- getPointsForROC(len, 2.5)
  desc.part4 <- rep("Distort = 2.5",len)

  points <- rbind(points.part1, points.part2, points.part3, points.part4)
  desc <- c(desc.part1, desc.part2, desc.part3, desc.part4)
  data <- data.frame(TPR = points[,2], FPR = points[,1], GeneSet = desc)
  colors = brewer.pal(5, "YlOrRd")[2:5]
  qplot(FPR, TPR, data = data, geom = "blank", main = "ROC curve", xlab = "False Positive Rate (1-Specificity)", ylab = "True Positive Rate (Sensitivity)" ) + geom_line(aes(x = FPR, y = TPR, data = data, colour = GeneSet), size = 2, alpha = 0.7) + scale_colour_manual(values=colors)
}
Finally, let's generate plot:
plotROC(1000)

poniedziałek, 4 lipca 2011

Plotting PCA results in ggplot2

Default PCA plots in R are disgusting. ggplot2 comes to the rescue.


First, let's write some data generating functions that will be useful for PCA:
getVector <- function(n = 20, dims = 2) {
  out <- vector()
  breaks <- round((n/dims)*(1:dims))
  start <- 1  
  for(i in 1:dims) {
    num <- rnorm(1, 10, 1)
    for(j in start:(breaks[i])) {
      out[j] <- num + rnorm(1, 0, 0.2)
    }
    start <- breaks[i] + 1
  } 
  out
}

getData <- function(rows = 10, cols = 20, groups = 2, dims = 2, distort = 0.5) {
  out <- matrix(data = 10, nrow = rows, ncol = cols)
  ### make groups ###
  breaks <- round((rows/groups)*(1:groups))
  start <- 1  
  for(i in 1:groups) {
      vector <- getVector(n = cols, dim = 2)
    for(j in start:(breaks[i])) {
      out[j,] <- vector
    }
    start <- breaks[i] + 1
  }
  ### make error ###
  for(i in 1:rows) {
    out[i,] <- out[i,] + rnorm(n = cols, mean = 0, sd = distort)
  }
  ### return ###
  rownames(out) <- paste("Row ", 1:rows, sep = "")
  colnames(out) <- paste("Col ", 1:cols, sep = "")
  out
}

getGroup <- function(rows = 10, groups = 2) {
  out <- vector()
  breaks <- round((rows/groups)*(1:groups))
  start <- 1  
  for(i in 1:groups) {
    for(j in start:(breaks[i])) {
      out[j] <- i
    }
    start <- breaks[i] + 1
  }
  out <- as.factor(out)
  out
}

Let's write PCA plotting function:
makePcaPlot <- function(x = getData(), group = getGroup(), title = "") {
  require(ggplot2)
  require(RColorBrewer)
  data <- x
  data <- t(apply(data, 1, scale))
  rownames(data) <- rownames(x)
  colnames(data) <- colnames(x)
  mydata <- t(data)  
  groupFactor <- group
  mydata.pca <- prcomp(mydata, retx=TRUE, center=TRUE, scale.=TRUE)  

  percent <- round((((mydata.pca$sdev)^2 / sum(mydata.pca$sdev^2))*100)[1:2])
  loadings <- mydata.pca$rotation
  rownames(loadings) <- colnames(mydata)
  scores <- mydata.pca$x
  cex = 3
  group <- as.character(groupFactor)
  what <- groupFactor == "1"; group[what] <- "One"
  what <- groupFactor == "2"; group[what] <- "Two"
  group <- factor(group)
  group <- factor(group, levels=c("Two", "One"), ordered=TRUE)
  colors = sample(1:11, size = length(levels(group)))


  PCA1 <- scores[,1]
  PCA2 <- scores[,2]
  base_size = 10

  qplot(PCA2, PCA1, geom="blank", main = title, xlab = paste("PCA2 (", percent[2], "%)", sep = ""), ylab = paste("PCA1 (", percent[1], "%)", sep = "")) + geom_point(aes(colour = group), size = 6, alpha = 3/4) + scale_colour_manual(values=brewer.pal(11, "RdYlGn")[11:1][colors]) + opts(
      axis.text.x = theme_text(size = base_size * 1.3 , lineheight = 0.9, colour = "grey50", hjust = 1, angle = 90),
      axis.text.y =       theme_text(size = base_size * 1.3, lineheight = 0.9, colour = "grey50", hjust = 1)
      )
}
Finally, let's generate plot:
makePcaPlot(getData(30,4,2,distort = 0.7), getGroup(30,2))

sobota, 2 lipca 2011

Spring Tool Suite (STS) Maven ‘Add Dependency’ does not work

There is small but annoying issue in Spring Tool Suite. After right-click on a project and selection of "add dependency" no search results are found.

1. Preferences -> Maven -> Download repository index on startup
2. Restart STS
3. Wait for index download

Or add to pom.xml
<repositories>
<repository>
      <id>central</id>
      <name>Maven Repository Switchboard</name>
      <layout>default</layout>
      <url>http://repo1.maven.org/maven2</url>
      <snapshots>
        <enabled>false</enabled>
      </snapshots>
    </repository>
</repositories>