expressionStats.R 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. # parse parameter ---------------------------------------------------------
  2. library(argparser, quietly=TRUE)
  3. # Create a parser
  4. p <- arg_parser("expression matrix statistics")
  5. # Add command line arguments
  6. p <- add_argument(p, "--dat", help="expression matrix", type="character")
  7. p <- add_argument(p, "--out", help="output prefix", type="character")
  8. # Parse the command line arguments
  9. argv <- parse_args(p)
  10. dat<-argv$dat
  11. outprefix<-argv$out
  12. library(ggplot2)
  13. library(tidyverse)
  14. library(formattable)
  15. ## read data and melt
  16. expr <- read_delim(dat,
  17. "\t", escape_double = FALSE, trim_ws = TRUE)
  18. expr <- rename(expr, featureId = X1)
  19. expr_tidy<-gather(expr, key = "sample", value = "expression", -1)
  20. # density
  21. f<-paste(outprefix, "density.pdf", sep = ".")
  22. p<-qplot(log10(expression), data=expr_tidy, geom="density", xlim=c(-2,6), colour=sample, fill=sample, alpha=I(1/10), main = "Density distribution of transcript expression") +
  23. theme_bw() +
  24. scale_y_continuous(expand = c(0,0))
  25. pdf(file=f)
  26. p
  27. dev.off()
  28. # boxplot
  29. p<-ggplot(expr_tidy, aes(x=sample, y=log10(expression), fill=sample)) + geom_boxplot() +
  30. theme_bw() +
  31. theme(axis.text.x=element_text(hjust=1,angle=60)) + scale_y_continuous(expand = c(0,0))
  32. f<-paste(outprefix, "boxplot.pdf", sep = ".")
  33. pdf(file=f)
  34. p
  35. dev.off()
  36. # expressed gene
  37. total_number = length(expr$featureId)
  38. expressed_tidy <- expr_tidy %>% filter(expression > 1) %>%
  39. group_by(sample) %>%
  40. summarise(expressed = n()) %>%
  41. mutate(percentage = percent(expressed/total_number))
  42. f<-paste(outprefix, "stat.txt", sep = ".")
  43. write.table(expressed_tidy, file = f, quote = F, row.names = F)