123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- #!/usr/bin/env Rscript
- ###############################################
- # parse parameter
- ###############################################
- library(argparser, quietly=TRUE)
- p <- arg_parser("make OrgDB from emapper")
- p <- add_argument(p, "emapper_anno", help="emapper annotation result", type="character")
- p <- add_argument(p, "proteins", help="proteins in fasta format", type="character")
- p <- add_argument(p, "ontology", help="GO ontology type (MF, BP, or CC)",
- default="BP", type="character")
- argv <- parse_args(p)
- # set script dir
- script_dir <- dirname(strsplit(commandArgs(trailingOnly = FALSE)[4],"=")[[1]][2])
- ###############################################
- # test input
- ###############################################
- # argv <- list()
- # argv$emapper_anno <- 'out.emapper.annotations'
- # argv$proteins <- 'proteins.fa'
- # script_dir <- 'emcp'
- library(tidyverse, quietly = TRUE)
- library(formattable, quietly = TRUE)
- library(AnnotationForge, quietly = TRUE)
- library(seqinr, quietly = TRUE)
- library(clusterProfiler, quietly = TRUE)
- library(GO.db, quietly = TRUE)
- ###############################################
- # parse parameter
- ###############################################
- emapper <- read_delim(argv$emapper_anno,
- "\t", escape_double = FALSE, col_names = FALSE,
- comment = "#", trim_ws = TRUE) %>%
- dplyr::select(GID = X1,
- COG = X7,
- Gene_Name = X8,
- Gene_Symbol = X9,
- GO = X10,
- KO = X12,
- Pathway = X13
- )
- ###############################################
- # make OrgDB
- ###############################################
- # gene name
- gene_info <- dplyr::select(emapper, GID, Gene_Name) %>%
- dplyr::filter(!is.na(Gene_Name)) %>%
- dplyr::filter(Gene_Name != '-')
- eggnog_anno = length(gene_info$GID)
- # gene to gene ontology
- gene2go <- dplyr::select(emapper, GID, GO) %>%
- separate_rows(GO, sep = ',', convert = F) %>%
- filter(!is.na(GO)) %>%
- filter(str_detect(GO, '^GO')) %>%
- mutate(EVIDENCE = 'IEA')
- go_terms <- AnnotationDbi::select(GO.db,
- keys = unique(gene2go$GO),
- columns = "ONTOLOGY",
- keytype = "GOID")
- # 分别处理三种GO类型
- message("拆分GO数据...")
- gene2go <- gene2go %>%
- left_join(go_terms, by = c("GO" = "GOID")) %>%
- filter(ONTOLOGY == argv$ontology) %>%
- dplyr::select(GID, GO, EVIDENCE) %>%
- distinct()
-
- # make org package
- makeOrgPackage(gene_info=gene_info,
- go=gene2go,
- maintainer='zhangsan <zhangsan@genek.cn>',
- author='zhangsan',
- outputDir="./",
- tax_id=0000,
- genus='M',
- species='y',
- goTable="go",
- version="1.0")
- # build package
- pkgbuild::build('.//org.My.eg.db', dest_path = ".")
- # install package
- dir.create('R_Library', recursive = T)
- install.packages('org.My.eg.db_1.0.tar.gz',
- repos = NULL, #从本地安装
- lib = 'R_Library') # 安装文件夹
- library(org.My.eg.db, lib = 'R_Library')
|