run-featurecounts.R 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. #!/usr/bin/env Rscript
  2. # parse parameter ---------------------------------------------------------
  3. library(argparser, quietly=TRUE)
  4. # Create a parser
  5. p <- arg_parser("run featureCounts and calculate FPKM/TPM")
  6. # Add command line arguments
  7. p <- add_argument(p, "--bam", help="input: bam file", type="character")
  8. p <- add_argument(p, "--gtf", help="input: gtf file", type="character")
  9. p <- add_argument(p, "--featureType", help="a character string or a vector of character strings giving the feature type or types used to select rows in the GTF annotation which will be used for read summarization", type="character", default="exon")
  10. p <- add_argument(p, "--attrType", help="a character string giving the attribute type in the GTF annotation which will be used to group features (eg. exons) into meta-features", type="character", default="gene_id")
  11. p <- add_argument(p, "--isPairedEnd", help="indicating whether libraries contain paired-end reads or not", type="logical", default=TRUE)
  12. p <- add_argument(p, "--strandSpecific", help="0 (unstranded), 1 (stranded) and 2 (reversely stranded)", type="numeric", default=0)
  13. p <- add_argument(p, "--output", help="output prefix", type="character")
  14. # Parse the command line arguments
  15. argv <- parse_args(p)
  16. library(Rsubread)
  17. library(limma)
  18. library(edgeR)
  19. bamFile <- argv$bam
  20. gtfFile <- argv$gtf
  21. nthreads <- 1
  22. outFilePref <- argv$output
  23. outStatsFilePath <- paste(outFilePref, '.log', sep = '');
  24. outCountsFilePath <- paste(outFilePref, '.count', sep = '');
  25. fCountsList = featureCounts(bamFile, annot.ext=gtfFile, isGTFAnnotationFile=TRUE, nthreads=nthreads, GTF.featureType=argv$featureType, GTF.attrType=argv$attrType, isPairedEnd=argv$isPairedEnd, strandSpecific=argv$strandSpecific)
  26. dgeList = DGEList(counts=fCountsList$counts, genes=fCountsList$annotation)
  27. cpm = cpm(dgeList)
  28. fpkm = rpkm(dgeList, dgeList$genes$Length)
  29. tpm = exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
  30. write.table(fCountsList$stat, outStatsFilePath, sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
  31. featureCounts = cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm, cpm)
  32. colnames(featureCounts) = c('gene_id', 'counts', 'fpkm','tpm', 'cpm')
  33. write.table(featureCounts, outCountsFilePath, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)