CALDER_main.R 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. CALDER_v2 = function(contact_tab_straw=NULL, contact_file_straw=NULL, contact_file_hic=NULL, chrs, bin_size, save_dir, save_intermediate_data=FALSE, swap_AB=0, ref_genome=c("hg19", 'hg38', "mm10", 'dm6')[1], annotation_track=NULL, black_list_bins=NULL, n_cores=1, sub_domains=FALSE, single_binsize_only=FALSE)
  2. {
  3. ########################### https://stackoverflow.com/questions/30216613/how-to-use-dopar-when-only-import-foreach-in-description-of-a-package
  4. `%dopar%` <- foreach::`%dopar%`
  5. `%do%` <- foreach::`%do%`
  6. ###########################
  7. n_chrs = length(chrs)
  8. if(n_chrs > 1) ## when computing for multiple chrs, contact_tab_straw or contact_file_straw should be a list with elements matching to that of chrs
  9. {
  10. if(!is.null(contact_tab_straw))
  11. {
  12. if(class(contact_tab_straw)!='list' | length(contact_tab_straw)!=n_chrs) stop('contact_tab_straw should be a list of contact table with its length equal to the length of chrs you specified\n')
  13. }
  14. if(!is.null(contact_file_straw))
  15. {
  16. if(class(contact_file_straw)!='list' | length(contact_file_straw)!=n_chrs) stop('contact_file_straw should be a list of contact table with its length equal to the length of chrs you specified\n')
  17. }
  18. }
  19. ########################### try multiple bin_size parameters and choose the one that generates reliable compartments
  20. if(bin_size==5E3) bin_sizes = c(5E3, 10E3, 50E3, 100E3)
  21. if(bin_size==10E3) bin_sizes = c(10E3, 50E3, 100E3)
  22. if(bin_size==20E3) bin_sizes = c(20E3, 40E3, 100E3)
  23. if(bin_size==25E3) bin_sizes = c(25E3, 50E3, 100E3)
  24. if(bin_size==40E3) bin_sizes = c(40E3, 80E3)
  25. if(bin_size==50E3) bin_sizes = c(50E3, 100E3)
  26. if(!(bin_size %in% c(5E3, 10E3, 20E3, 25E3, 40E3, 50E3))) bin_sizes = bin_size
  27. if(is.null(ref_genome)) single_binsize_only = TRUE
  28. if(single_binsize_only) bin_sizes = bin_size ## do not try multiple bin_sizes
  29. ## choose the already computed good compartment as reference to decide correctly the A/B compartment direction
  30. if(!is.null(ref_genome)) ref_compartment_file = system.file("extdata", sprintf('%s_all_sub_compartments.bed', ref_genome), package = 'CALDER')
  31. if(is.null(ref_genome)) ref_compartment_file = NULL
  32. ###########################
  33. doParallel::registerDoParallel(cores=n_cores)
  34. for(bin_size2look in bin_sizes)
  35. {
  36. bin_size_kb = sprintf("%skb", bin_size2look/1E3)
  37. save_dir_binsize = file.path(save_dir, 'intermediate_data/sub_compartments', bin_size_kb)
  38. dir.create(save_dir_binsize, recursive=TRUE, showWarnings=FALSE)
  39. }
  40. para_tab = data.table::data.table(expand.grid(bin_sizes, chrs))
  41. for(i in 1:ncol(para_tab)) para_tab[[i]] = as.vector(para_tab[[i]])
  42. colnames(para_tab) = c('bin_size', 'chr')
  43. ########################### compute compartment for each bin_size and chr
  44. silent_out = foreach::foreach(i=1:nrow(para_tab)) %dopar%
  45. {
  46. bin_size2look = para_tab$bin_size[i]
  47. chr = para_tab$chr[i]
  48. bin_size_kb = sprintf("%skb", bin_size2look/1E3)
  49. save_dir_binsize = file.path(save_dir, 'intermediate_data/sub_compartments', bin_size_kb)
  50. save_intermediate_data_tmp = save_intermediate_data*(bin_size2look==bin_size)
  51. CALDER_CD_hierarchy_v2(contact_tab_straw=contact_tab_straw, contact_file_straw=contact_file_straw, contact_file_hic=contact_file_hic, chr=chr, bin_size_input=bin_size, bin_size2look=bin_size2look, save_dir=save_dir_binsize, save_intermediate_data=save_intermediate_data_tmp, swap_AB=swap_AB, ref_compartment_file=ref_compartment_file, annotation_track=annotation_track, black_list_bins=black_list_bins)
  52. }
  53. ########################### build optimal compartment from multiple resolutions
  54. try(build_comp_table_opt(save_dir=save_dir, chrs=chrs, bin_sizes=bin_sizes, with_ref=!is.null(ref_genome)))
  55. ########################### if computing sub-domains
  56. if(sub_domains==TRUE)
  57. {
  58. silence_out = foreach::foreach(chr=chrs) %do% ## use one core instead of multi-cores
  59. {
  60. chr_num = gsub('chr', '', chr, ignore.case=TRUE)
  61. intermediate_data_file = sprintf('%s/%skb/chr%s_intermediate_data.Rds', file.path(save_dir, 'intermediate_data/sub_compartments') , bin_size/1E3, chr_num)
  62. save_dir_sub_domains = file.path(save_dir, 'intermediate_data/sub_domains')
  63. dir.create(save_dir_sub_domains, showWarnings = FALSE)
  64. try(CALDER_sub_domains(intermediate_data_file=intermediate_data_file, chr=chr, save_dir=save_dir_sub_domains, bin_size=bin_size))
  65. }
  66. save_dir_opt = file.path(save_dir, "sub_domains")
  67. dir.create(save_dir_opt, recursive=TRUE, showWarnings=FALSE)
  68. save_opt_file = file.path(save_dir_opt, sprintf("all_nested_boundaries.bed"))
  69. opt_sub_domain_beds = sprintf('%s/chr%s_nested_boundaries.bed', file.path(save_dir, 'intermediate_data/sub_domains') , gsub('chr', '', chrs, ignore.case=TRUE))
  70. cmd_opt = sprintf("cat %s > %s", paste0(opt_sub_domain_beds, collapse=" "), save_opt_file)
  71. system(cmd_opt)
  72. }
  73. }