123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- CALDER = function(contact_tab_dump=NULL, contact_file_dump=NULL, contact_file_hic=NULL, chrs, bin_size, save_dir, save_intermediate_data=FALSE, swap_AB=0, genome='others', feature_track=NULL, black_list_bins=NULL, n_cores=1, sub_domains=FALSE, single_binsize_only=FALSE)
- {
- ########################### https://stackoverflow.com/questions/30216613/how-to-use-dopar-when-only-import-foreach-in-description-of-a-package
- `%dopar%` <- foreach::`%dopar%`
- `%do%` <- foreach::`%do%`
- ###########################
- chrs = as.character(chrs)
- n_chrs = length(chrs)
- if(n_chrs > 1) ## when computing for multiple chrs, contact_tab_dump or contact_file_dump should be a list with elements matching to that of chrs
- {
- if(!is.null(contact_tab_dump))
- {
- if(class(contact_tab_dump)!='list' | length(contact_tab_dump)!=n_chrs) stop('contact_tab_dump should be a list of contact table with its length equal to the length of chrs you specified\n')
- names(contact_tab_dump) = chrs
- }
- if(!is.null(contact_file_dump))
- {
- if(class(contact_file_dump)!='list' | length(contact_file_dump)!=n_chrs) stop('contact_file_dump should be a list of contact table with its length equal to the length of chrs you specified\n')
- names(contact_file_dump) = chrs
- }
- }
- ########################### try multiple bin_size parameters and choose the one that generates reliable compartments
- if(bin_size==5E3) bin_sizes = c(5E3, 10E3, 50E3, 100E3)
- if(bin_size==10E3) bin_sizes = c(10E3, 50E3, 100E3)
- if(bin_size==20E3) bin_sizes = c(20E3, 40E3, 100E3)
- if(bin_size==25E3) bin_sizes = c(25E3, 50E3, 100E3)
- if(bin_size==40E3) bin_sizes = c(40E3, 80E3)
- if(bin_size==50E3) bin_sizes = c(50E3, 100E3)
- if(!(bin_size %in% c(5E3, 10E3, 20E3, 25E3, 40E3, 50E3))) bin_sizes = bin_size
- if(genome=='others') single_binsize_only = TRUE
- if(single_binsize_only) bin_sizes = bin_size ## do not try multiple bin_sizes
- ## choose the already computed good compartment as reference to decide correctly the A/B compartment direction
- if(genome!='others') ref_compartment_file = system.file("extdata", sprintf('%s_all_sub_compartments.bed', genome), package = 'CALDER')
- if(genome=='others') ref_compartment_file = NULL
- ###########################
- if(is.null(ref_compartment_file) & is.null(feature_track)) stop('Should either specify genome in one of hg19, hg38, mm9, mm10, or provide a feature_track\n')
- ###########################
- doParallel::registerDoParallel(cores=n_cores)
- for(bin_size2look in bin_sizes)
- {
- bin_size_kb = sprintf("%skb", bin_size2look/1E3)
- save_dir_binsize = file.path(save_dir, 'intermediate_data/sub_compartments', bin_size_kb)
- dir.create(save_dir_binsize, recursive=TRUE, showWarnings=FALSE)
- }
- para_tab = data.table::data.table(expand.grid(bin_sizes, chrs))
- for(i in 1:ncol(para_tab)) para_tab[[i]] = as.vector(para_tab[[i]])
- colnames(para_tab) = c('bin_size', 'chr')
- ########################### compute compartment for each bin_size and chr
- silent_out = foreach::foreach(i=1:nrow(para_tab)) %dopar%
- {
- bin_size2look = para_tab$bin_size[i]
- chr = para_tab$chr[i]
- bin_size_kb = sprintf("%skb", bin_size2look/1E3)
- save_dir_binsize = file.path(save_dir, 'intermediate_data/sub_compartments', bin_size_kb)
- save_intermediate_data_tmp = save_intermediate_data*(bin_size2look==bin_size)
- CALDER_CD_hierarchy_v2(contact_tab_dump=contact_tab_dump[[chr]], contact_file_dump=contact_file_dump[[chr]], contact_file_hic=contact_file_hic, chr=chr, bin_size_input=bin_size, bin_size2look=bin_size2look, save_dir=save_dir_binsize, save_intermediate_data=save_intermediate_data_tmp, swap_AB=swap_AB, ref_compartment_file=ref_compartment_file, feature_track=feature_track, black_list_bins=black_list_bins)
- }
- ########################### build optimal compartment from multiple resolutions
- try(build_comp_table_opt(save_dir=save_dir, chrs=chrs, bin_sizes=bin_sizes, with_ref=!is.null(genome)))
- ########################### if computing sub-domains
- if(sub_domains==TRUE)
- {
- silence_out = foreach::foreach(chr=chrs) %do% ## use one core instead of multi-cores
- {
- chr_num = gsub('chr', '', chr, ignore.case=TRUE)
- intermediate_data_file = sprintf('%s/%skb/chr%s_intermediate_data.Rds', file.path(save_dir, 'intermediate_data/sub_compartments') , bin_size/1E3, chr_num)
- save_dir_sub_domains = file.path(save_dir, 'intermediate_data/sub_domains')
- dir.create(save_dir_sub_domains, showWarnings = FALSE)
- try(CALDER_sub_domains(intermediate_data_file=intermediate_data_file, chr=chr, save_dir=save_dir_sub_domains, bin_size=bin_size))
- }
- save_dir_opt = file.path(save_dir, "sub_domains")
- dir.create(save_dir_opt, recursive=TRUE, showWarnings=FALSE)
- save_opt_file = file.path(save_dir_opt, sprintf("all_nested_boundaries.bed"))
- opt_sub_domain_beds = sprintf('%s/chr%s_nested_boundaries.bed', file.path(save_dir, 'intermediate_data/sub_domains') , gsub('chr', '', chrs, ignore.case=TRUE))
- cmd_opt = sprintf("cat %s > %s", paste0(opt_sub_domain_beds, collapse=" "), save_opt_file)
- system(cmd_opt)
- }
- }
-
|