#!/usr/bin/env Rscript ############################################### # parse parameter ############################################### library(argparser, quietly=TRUE) p <- arg_parser("make OrgDB from emapper") p <- add_argument(p, "emapper_anno", help="emapper annotation result", type="character") p <- add_argument(p, "proteins", help="proteins in fasta format", type="character") p <- add_argument(p, "ontology", help="GO ontology type (MF, BP, or CC)", default="BP", type="character") argv <- parse_args(p) # set script dir script_dir <- dirname(strsplit(commandArgs(trailingOnly = FALSE)[4],"=")[[1]][2]) ############################################### # test input ############################################### # argv <- list() # argv$emapper_anno <- 'out.emapper.annotations' # argv$proteins <- 'proteins.fa' # script_dir <- 'emcp' library(tidyverse, quietly = TRUE) library(formattable, quietly = TRUE) library(AnnotationForge, quietly = TRUE) library(seqinr, quietly = TRUE) library(clusterProfiler, quietly = TRUE) library(GO.db, quietly = TRUE) ############################################### # parse parameter ############################################### emapper <- read_delim(argv$emapper_anno, "\t", escape_double = FALSE, col_names = FALSE, comment = "#", trim_ws = TRUE) %>% dplyr::select(GID = X1, COG = X7, Gene_Name = X8, Gene_Symbol = X9, GO = X10, KO = X12, Pathway = X13 ) ############################################### # make OrgDB ############################################### # gene name gene_info <- dplyr::select(emapper, GID, Gene_Name) %>% dplyr::filter(!is.na(Gene_Name)) %>% dplyr::filter(Gene_Name != '-') eggnog_anno = length(gene_info$GID) # gene to gene ontology gene2go <- dplyr::select(emapper, GID, GO) %>% separate_rows(GO, sep = ',', convert = F) %>% filter(!is.na(GO)) %>% filter(str_detect(GO, '^GO')) %>% mutate(EVIDENCE = 'IEA') go_terms <- AnnotationDbi::select(GO.db, keys = unique(gene2go$GO), columns = "ONTOLOGY", keytype = "GOID") # 分别处理三种GO类型 message("拆分GO数据...") gene2go <- gene2go %>% left_join(go_terms, by = c("GO" = "GOID")) %>% filter(ONTOLOGY == argv$ontology) %>% dplyr::select(GID, GO, EVIDENCE) %>% distinct() # make org package makeOrgPackage(gene_info=gene_info, go=gene2go, maintainer='zhangsan ', author='zhangsan', outputDir="./", tax_id=0000, genus='M', species='y', goTable="go", version="1.0") # build package pkgbuild::build('.//org.My.eg.db', dest_path = ".") # install package dir.create('R_Library', recursive = T) install.packages('org.My.eg.db_1.0.tar.gz', repos = NULL, #从本地安装 lib = 'R_Library') # 安装文件夹 library(org.My.eg.db, lib = 'R_Library')