123456789101112131415161718192021222324252627282930313233343536 |
- #!/usr/bin/env Rscript
- # parse parameter ---------------------------------------------------------
- library(argparser, quietly=TRUE)
- # Create a parser
- p <- arg_parser("run featureCounts and calculate FPKM/TPM")
- # Add command line arguments
- p <- add_argument(p, "--bam", help="input: bam file", type="character")
- p <- add_argument(p, "--gtf", help="input: gtf file", type="character")
- p <- add_argument(p, "--output", help="output prefix", type="character")
- # Parse the command line arguments
- argv <- parse_args(p)
- library(Rsubread)
- library(limma)
- library(edgeR)
- bamFile <- argv$bam
- gtfFile <- argv$gtf
- nthreads <- 1
- outFilePref <- argv$output
- outStatsFilePath <- paste(outFilePref, '.log', sep = '');
- outCountsFilePath <- paste(outFilePref, '.count', sep = '');
- fCountsList = featureCounts(bamFile, annot.ext=gtfFile, isGTFAnnotationFile=TRUE, nthreads=nthreads, isPairedEnd=TRUE)
- dgeList = DGEList(counts=fCountsList$counts, genes=fCountsList$annotation)
- fpkm = rpkm(dgeList, dgeList$genes$Length)
- tpm = exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
- write.table(fCountsList$stat, outStatsFilePath, sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
- featureCounts = cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm)
- colnames(featureCounts) = c('gene_id', 'counts', 'fpkm','tpm')
- write.table(featureCounts, outCountsFilePath, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
|