2 years ago · 05b2977565
--- a/README.md
+++ b/README.md
@@ -9,6 +9,12 @@ git clone http://git.genek.cn:3333/zhxd2/RunFeatureCounts.git
 
				 
			
 
				 # 二.使用
			
 
				 
			
 
				+对 GTF 定量
			
 
				 ```
			
 
				-Rscript ../11.software/RunFeatureCounts/run-featurecounts.R -b ../21.Mapping/BLO_S1_LD1.bam -g ../12.ref/genes.gtf -f exon -a gene_id -i FALSE -s 2 -o BLO_S1_LD1
			
 
				+Rscript RunFeatureCounts/run-featurecounts.R -b ../21.Mapping/BLO_S1_LD1.bam -g ../12.ref/genes.gtf -f exon -a gene_id -i FALSE -s 2 -o BLO_S1_LD1
			
 
				+```
			
 
				+
			
 
				+对 SAF 定量
			
 
				+```
			
 
				+Rscript RunFeatureCounts/run-featurecounts.R -b aligned/FoxA1_E2_rep1_sorted_filtered.bam --saf merged_peaks.saf --isPairedEnd FALSE -o FoxA1_E2_rep1
			
 
				 ```
			
--- a/run-featurecounts.R
+++ b/run-featurecounts.R
@@ -1,41 +1,61 @@
 
				 #!/usr/bin/env Rscript
			
 
				-# parse parameter ---------------------------------------------------------
			
 
				+
			
 
				+# 导入必要的库
			
 
				 library(argparser, quietly=TRUE)
			
 
				-# Create a parser
			
 
				-p <- arg_parser("run featureCounts and calculate FPKM/TPM")
			
 
				+library(Rsubread)
			
 
				+library(limma)
			
 
				+library(edgeR)
			
 
				 
			
 
				-# Add command line arguments
			
 
				+# 创建参数解析器
			
 
				+p <- arg_parser("Run featureCounts and calculate FPKM/TPM")
			
 
				+
			
 
				+# 添加参数
			
 
				 p <- add_argument(p, "--bam", help="input: bam file", type="character")
			
 
				-p <- add_argument(p, "--gtf", help="input: gtf file", type="character")
			
 
				-p <- add_argument(p, "--featureType", help="a character string or a vector of character strings giving the feature type or types used to select rows in the GTF annotation which will be used for read summarization", type="character", default="exon")
			
 
				-p <- add_argument(p, "--attrType", help="a character string giving the attribute type in the GTF annotation which will be used to group features (eg. exons) into meta-features", type="character", default="gene_id")
			
 
				-p <- add_argument(p, "--isPairedEnd", help="indicating whether libraries contain paired-end reads or not", type="logical", default=TRUE)
			
 
				-p <- add_argument(p, "--strandSpecific", help="0 (unstranded), 1 (stranded) and 2 (reversely stranded)", type="numeric", default=0)
			
 
				-p <- add_argument(p, "--output", help="output prefix", type="character")
			
 
				-
			
 
				-# Parse the command line arguments
			
 
				+p <- add_argument(p, "--gtf", help="input: gtf file", type="character", default=NA)
			
 
				+p <- add_argument(p, "--saf", help="input: saf file", type="character", default=NA)
			
 
				+p <- add_argument(p, "--isPairedEnd", help="Paired-end reads (default: TRUE)", type="logical", default=TRUE)
			
 
				+p <- add_argument(p, "--strandSpecific", help="Strand specificity (0, 1, 2)", type="numeric", default=0)
			
 
				+p <- add_argument(p, "--output", help="Output prefix", type="character")
			
 
				+
			
 
				+# 解析命令行参数
			
 
				 argv <- parse_args(p)
			
 
				 
			
 
				-library(Rsubread)
			
 
				-library(limma)
			
 
				-library(edgeR)
			
 
				+# 检查 gtf 和 saf 参数是否互斥
			
 
				+if (!is.na(argv$gtf) && !is.na(argv$saf)) {
			
 
				+  stop("Cannot use both --gtf and --saf. Choose one.")
			
 
				+}
			
 
				 
			
 
				+# 主要计算流程
			
 
				 bamFile <- argv$bam
			
 
				-gtfFile <- argv$gtf
			
 
				-nthreads <- 1
			
 
				+annotFile <- if (!is.na(argv$gtf)) argv$gtf else argv$saf
			
 
				+isGTF <- !is.na(argv$gtf)
			
 
				 outFilePref <- argv$output
			
 
				 
			
 
				-outStatsFilePath  <- paste(outFilePref, '.log',  sep = ''); 
			
 
				-outCountsFilePath <- paste(outFilePref, '.count', sep = ''); 
			
 
				-
			
 
				-fCountsList = featureCounts(bamFile, annot.ext=gtfFile, isGTFAnnotationFile=TRUE, nthreads=nthreads, GTF.featureType=argv$featureType, GTF.attrType=argv$attrType, isPairedEnd=argv$isPairedEnd, strandSpecific=argv$strandSpecific)
			
 
				-dgeList = DGEList(counts=fCountsList$counts, genes=fCountsList$annotation)
			
 
				-cpm = cpm(dgeList)
			
 
				-fpkm = rpkm(dgeList, dgeList$genes$Length)
			
 
				-tpm = exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
			
 
				-
			
 
				+# 输出文件路径
			
 
				+outStatsFilePath  <- paste0(outFilePref, '.log')
			
 
				+outCountsFilePath <- paste0(outFilePref, '.count')
			
 
				+
			
 
				+# 运行 featureCounts
			
 
				+fCountsList <- featureCounts(
			
 
				+  files = bamFile,
			
 
				+  annot.ext = annotFile,
			
 
				+  isGTFAnnotationFile = isGTF,
			
 
				+  nthreads = 1,
			
 
				+  isPairedEnd = argv$isPairedEnd,
			
 
				+  strandSpecific = argv$strandSpecific
			
 
				+)
			
 
				+
			
 
				+# 创建 DGEList
			
 
				+dgeList <- DGEList(counts = fCountsList$counts, genes = fCountsList$annotation)
			
 
				+
			
 
				+# 计算表达量
			
 
				+cpm <- cpm(dgeList)
			
 
				+fpkm <- rpkm(dgeList, dgeList$genes$Length)
			
 
				+tpm <- exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
			
 
				+
			
 
				+# 写入输出文件
			
 
				 write.table(fCountsList$stat, outStatsFilePath, sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
			
 
				-
			
 
				-featureCounts = cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm, cpm)
			
 
				-colnames(featureCounts) = c('gene_id', 'counts', 'fpkm','tpm', 'cpm')
			
 
				+featureCounts <- cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm, cpm)
			
 
				+colnames(featureCounts) <- c('gene_id', 'counts', 'fpkm','tpm', 'cpm')
			
 
				 write.table(featureCounts, outCountsFilePath, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
			
 
				+