Annotation.snake.bak 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. localrules: makeOrgDB
  2. ######################################################
  3. # Annotation with eggnog-mapper
  4. ######################################################
  5. Annotation_Input = config["Annotation_Input"]
  6. Annotation_Dir = config["Annotation_Dir"]
  7. EMAPPER_HOME = config["EMAPPER_HOME"]
  8. diamond_nr_db = config["diamond_nr_db"]
  9. blast_method = "blastp"
  10. emapper_method = " "
  11. if config["data_type"] == "nucl":
  12. blast_method = "blastx"
  13. emapper_method = " --translate "
  14. rule emapper_search:
  15. input:
  16. Annotation_Input
  17. output:
  18. Annotation_Dir + "/my.emapper.seed_orthologs"
  19. threads: 32
  20. shell:
  21. "export PATH={EMAPPER_HOME}/bin:$PATH;"
  22. "python2 {EMAPPER_HOME}/emapper.py -m diamond -i {input} -o {Annotation_Dir}/my --cpu {threads} --no_annot --no_file_comments {emapper_method}"
  23. rule emapper_annot:
  24. input:
  25. Annotation_Dir + "/my.emapper.seed_orthologs"
  26. output:
  27. Annotation_Dir + "/my.emapper.annotations"
  28. threads: 32
  29. shell:
  30. "export PATH={EMAPPER_HOME}/bin:$PATH;"
  31. "python2 {EMAPPER_HOME}/emapper.py --annotate_hits_table {input} -o {Annotation_Dir}/my --cpu {threads} "
  32. rule makeOrgDB:
  33. input:
  34. emapper = Annotation_Dir + "/my.emapper.annotations",
  35. Annotation_Input = Annotation_Input
  36. output:
  37. Annotation_Dir + "/anno_stat.txt"
  38. shell:
  39. "cd {Annotation_Dir} ;"
  40. "Rscript ../tools/emcp/makeOrgPackageFromEmapper.R my.emapper.annotations;"
  41. "Rscript ../tools/emcp/AnnoStat.R ../{input.Annotation_Input};"
  42. "cd ..;"
  43. rule nr:
  44. input:
  45. Annotation_Input
  46. output:
  47. Annotation_Dir + "/my.nr.diamond.tab"
  48. threads:
  49. 20
  50. shell:
  51. "diamond {blast_method} --threads {threads} --evalue 1e-10 --outfmt 6 -o {output} --query {input} --db {diamond_nr_db} --max-target-seqs 5"