run-featurecounts.R 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. #!/usr/bin/env Rscript
  2. # 导入必要的库
  3. library(argparser, quietly=TRUE)
  4. library(Rsubread)
  5. library(limma)
  6. library(edgeR)
  7. # 创建参数解析器
  8. p <- arg_parser("Run featureCounts and calculate FPKM/TPM")
  9. # 添加参数
  10. p <- add_argument(p, "--bam", help="input: bam file", type="character")
  11. p <- add_argument(p, "--gtf", help="input: gtf file", type="character", default=NA)
  12. p <- add_argument(p, "--saf", help="input: saf file", type="character", default=NA)
  13. p <- add_argument(p, "--isPairedEnd", help="Paired-end reads (default: TRUE)", type="logical", default=TRUE)
  14. p <- add_argument(p, "--strandSpecific", help="Strand specificity (0, 1, 2)", type="numeric", default=0)
  15. p <- add_argument(p, "--output", help="Output prefix", type="character")
  16. # 解析命令行参数
  17. argv <- parse_args(p)
  18. # 检查 gtf 和 saf 参数是否互斥
  19. if (!is.na(argv$gtf) && !is.na(argv$saf)) {
  20. stop("Cannot use both --gtf and --saf. Choose one.")
  21. }
  22. # 主要计算流程
  23. bamFile <- argv$bam
  24. annotFile <- if (!is.na(argv$gtf)) argv$gtf else argv$saf
  25. isGTF <- !is.na(argv$gtf)
  26. outFilePref <- argv$output
  27. # 输出文件路径
  28. outStatsFilePath <- paste0(outFilePref, '.log')
  29. outCountsFilePath <- paste0(outFilePref, '.count')
  30. # 运行 featureCounts
  31. fCountsList <- featureCounts(
  32. files = bamFile,
  33. annot.ext = annotFile,
  34. isGTFAnnotationFile = isGTF,
  35. nthreads = 1,
  36. isPairedEnd = argv$isPairedEnd,
  37. strandSpecific = argv$strandSpecific
  38. )
  39. # 创建 DGEList
  40. dgeList <- DGEList(counts = fCountsList$counts, genes = fCountsList$annotation)
  41. # 计算表达量
  42. cpm <- cpm(dgeList)
  43. fpkm <- rpkm(dgeList, dgeList$genes$Length)
  44. tpm <- exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
  45. # 写入输出文件
  46. write.table(fCountsList$stat, outStatsFilePath, sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)
  47. featureCounts <- cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm, cpm)
  48. colnames(featureCounts) <- c('gene_id', 'counts', 'fpkm','tpm', 'cpm')
  49. write.table(featureCounts, outCountsFilePath, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)