|
@@ -1,41 +1,61 @@
|
|
|
#!/usr/bin/env Rscript
|
|
|
-# parse parameter ---------------------------------------------------------
|
|
|
+
|
|
|
+# 导入必要的库
|
|
|
library(argparser, quietly=TRUE)
|
|
|
-# Create a parser
|
|
|
-p <- arg_parser("run featureCounts and calculate FPKM/TPM")
|
|
|
+library(Rsubread)
|
|
|
+library(limma)
|
|
|
+library(edgeR)
|
|
|
|
|
|
-# Add command line arguments
|
|
|
+# 创建参数解析器
|
|
|
+p <- arg_parser("Run featureCounts and calculate FPKM/TPM")
|
|
|
+
|
|
|
+# 添加参数
|
|
|
p <- add_argument(p, "--bam", help="input: bam file", type="character")
|
|
|
-p <- add_argument(p, "--gtf", help="input: gtf file", type="character")
|
|
|
-p <- add_argument(p, "--featureType", help="a character string or a vector of character strings giving the feature type or types used to select rows in the GTF annotation which will be used for read summarization", type="character", default="exon")
|
|
|
-p <- add_argument(p, "--attrType", help="a character string giving the attribute type in the GTF annotation which will be used to group features (eg. exons) into meta-features", type="character", default="gene_id")
|
|
|
-p <- add_argument(p, "--isPairedEnd", help="indicating whether libraries contain paired-end reads or not", type="logical", default=TRUE)
|
|
|
-p <- add_argument(p, "--strandSpecific", help="0 (unstranded), 1 (stranded) and 2 (reversely stranded)", type="numeric", default=0)
|
|
|
-p <- add_argument(p, "--output", help="output prefix", type="character")
|
|
|
-
|
|
|
-# Parse the command line arguments
|
|
|
+p <- add_argument(p, "--gtf", help="input: gtf file", type="character", default=NA)
|
|
|
+p <- add_argument(p, "--saf", help="input: saf file", type="character", default=NA)
|
|
|
+p <- add_argument(p, "--isPairedEnd", help="Paired-end reads (default: TRUE)", type="logical", default=TRUE)
|
|
|
+p <- add_argument(p, "--strandSpecific", help="Strand specificity (0, 1, 2)", type="numeric", default=0)
|
|
|
+p <- add_argument(p, "--output", help="Output prefix", type="character")
|
|
|
+
|
|
|
+# 解析命令行参数
|
|
|
argv <- parse_args(p)
|
|
|
|
|
|
-library(Rsubread)
|
|
|
-library(limma)
|
|
|
-library(edgeR)
|
|
|
+# 检查 gtf 和 saf 参数是否互斥
|
|
|
+if (!is.na(argv$gtf) && !is.na(argv$saf)) {
|
|
|
+ stop("Cannot use both --gtf and --saf. Choose one.")
|
|
|
+}
|
|
|
|
|
|
+# 主要计算流程
|
|
|
bamFile <- argv$bam
|
|
|
-gtfFile <- argv$gtf
|
|
|
-nthreads <- 1
|
|
|
+annotFile <- if (!is.na(argv$gtf)) argv$gtf else argv$saf
|
|
|
+isGTF <- !is.na(argv$gtf)
|
|
|
outFilePref <- argv$output
|
|
|
|
|
|
-outStatsFilePath <- paste(outFilePref, '.log', sep = '');
|
|
|
-outCountsFilePath <- paste(outFilePref, '.count', sep = '');
|
|
|
-
|
|
|
-fCountsList = featureCounts(bamFile, annot.ext=gtfFile, isGTFAnnotationFile=TRUE, nthreads=nthreads, GTF.featureType=argv$featureType, GTF.attrType=argv$attrType, isPairedEnd=argv$isPairedEnd, strandSpecific=argv$strandSpecific)
|
|
|
-dgeList = DGEList(counts=fCountsList$counts, genes=fCountsList$annotation)
|
|
|
-cpm = cpm(dgeList)
|
|
|
-fpkm = rpkm(dgeList, dgeList$genes$Length)
|
|
|
-tpm = exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
|
|
|
-
|
|
|
+# 输出文件路径
|
|
|
+outStatsFilePath <- paste0(outFilePref, '.log')
|
|
|
+outCountsFilePath <- paste0(outFilePref, '.count')
|
|
|
+
|
|
|
+# 运行 featureCounts
|
|
|
+fCountsList <- featureCounts(
|
|
|
+ files = bamFile,
|
|
|
+ annot.ext = annotFile,
|
|
|
+ isGTFAnnotationFile = isGTF,
|
|
|
+ nthreads = 1,
|
|
|
+ isPairedEnd = argv$isPairedEnd,
|
|
|
+ strandSpecific = argv$strandSpecific
|
|
|
+)
|
|
|
+
|
|
|
+# 创建 DGEList
|
|
|
+dgeList <- DGEList(counts = fCountsList$counts, genes = fCountsList$annotation)
|
|
|
+
|
|
|
+# 计算表达量
|
|
|
+cpm <- cpm(dgeList)
|
|
|
+fpkm <- rpkm(dgeList, dgeList$genes$Length)
|
|
|
+tpm <- exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
|
|
|
+
|
|
|
+# 写入输出文件
|
|
|
write.table(fCountsList$stat, outStatsFilePath, sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
|
|
|
-
|
|
|
-featureCounts = cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm, cpm)
|
|
|
-colnames(featureCounts) = c('gene_id', 'counts', 'fpkm','tpm', 'cpm')
|
|
|
+featureCounts <- cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm, cpm)
|
|
|
+colnames(featureCounts) <- c('gene_id', 'counts', 'fpkm','tpm', 'cpm')
|
|
|
write.table(featureCounts, outCountsFilePath, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
|
|
|
+
|