zhangxudong hace 8 meses
padre
commit
20c01a01a2
Se han modificado 2 ficheros con 25 adiciones y 5 borrados
  1. 6 1
      README.md
  2. 19 4
      run-featurecounts.R

+ 6 - 1
README.md

@@ -9,10 +9,15 @@ git clone http://git.genek.cn:3333/zhxd2/RunFeatureCounts.git
 
 # 二.使用
 
-对 GTF 定量
+对 GTF 中的 GENE 定量
 ```
 Rscript RunFeatureCounts/run-featurecounts.R -b ../21.Mapping/BLO_S1_LD1.bam -g ../12.ref/genes.gtf -f exon -a gene_id -i FALSE -s 2 -o BLO_S1_LD1
 ```
+对 GTF 中的 ISOFORM 定量
+```
+Rscript RunFeatureCounts/run-featurecounts.R -b ../21.Mapping/BLO_S1_LD1.bam -g ../12.ref/genes.gtf -f exon -a transcript_id -i FALSE -s 2 -o BLO_S1_LD1
+```
+
 
 对 SAF 定量
 ```

+ 19 - 4
run-featurecounts.R

@@ -13,8 +13,10 @@ p <- arg_parser("Run featureCounts and calculate FPKM/TPM")
 p <- add_argument(p, "--bam", help="input: bam file", type="character")
 p <- add_argument(p, "--gtf", help="input: gtf file", type="character", default=NA)
 p <- add_argument(p, "--saf", help="input: saf file", type="character", default=NA)
-p <- add_argument(p, "--isPairedEnd", help="Paired-end reads (default: TRUE)", type="logical", default=TRUE)
+p <- add_argument(p, "--isPairedEnd", help="Paired-end reads, TRUE or FALSE", type="logical", default=TRUE)
 p <- add_argument(p, "--strandSpecific", help="Strand specificity (0, 1, 2)", type="numeric", default=0)
+p <- add_argument(p, "--featureType", help="Feature type in GTF annotation (default: exon)", type="character", default="exon")
+p <- add_argument(p, "--attrType", help="Attribute type in GTF annotation (default: gene_id)", type="character", default="gene_id")
 p <- add_argument(p, "--output", help="Output prefix", type="character")
 
 # 解析命令行参数
@@ -36,15 +38,24 @@ outStatsFilePath  <- paste0(outFilePref, '.log')
 outCountsFilePath <- paste0(outFilePref, '.count')
 
 # 运行 featureCounts
-fCountsList <- featureCounts(
+fCountsArgs <- list(
   files = bamFile,
   annot.ext = annotFile,
-  isGTFAnnotationFile = isGTF,
   nthreads = 1,
   isPairedEnd = argv$isPairedEnd,
   strandSpecific = argv$strandSpecific
 )
 
+if (isGTF) {
+  fCountsArgs$isGTFAnnotationFile <- TRUE
+  fCountsArgs$GTF.featureType <- argv$featureType
+  fCountsArgs$GTF.attrType <- argv$attrType
+} else {
+  fCountsArgs$isGTFAnnotationFile <- FALSE
+}
+
+fCountsList <- do.call(featureCounts, fCountsArgs)
+
 # 创建 DGEList
 dgeList <- DGEList(counts = fCountsList$counts, genes = fCountsList$annotation)
 
@@ -55,7 +66,11 @@ tpm <- exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
 
 # 写入输出文件
 write.table(fCountsList$stat, outStatsFilePath, sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
+
+# 使用 --attrType 参数的值作为第一列的列名
+firstColName <- argv$attrType
 featureCounts <- cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm, cpm)
-colnames(featureCounts) <- c('gene_id', 'counts', 'fpkm','tpm', 'cpm')
+colnames(featureCounts) <- c(firstColName, 'counts', 'fpkm', 'tpm', 'cpm')
+
 write.table(featureCounts, outCountsFilePath, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)