123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
-
-
-
-
-
-
-
-
-
-
- HRG_zigzag_compartment_domain_main_fun <- function(A, res_dir, compartment_segs, allowed_max_nbins_seq=NULL, max_nbins_fine=NULL, min_n_bins=2)
- {
- `%dopar%` <- foreach::`%dopar%`
- `%do%` <- foreach::`%do%`
-
- arg_list = as.list(environment())
-
- res_folder = file.path(res_dir)
- dir.create(res_folder, recursive=TRUE, showWarnings = FALSE)
-
- total_execution_time_file = file.path(res_dir, 'total_execution.time')
- time_begin = Sys.time()
- cat('Execution begins:', as.character(time_begin), '\n', file=total_execution_time_file, append=TRUE)
-
-
-
-
- A_sym = Matrix::forceSymmetric(A, uplo='U')
- tol = 100 * .Machine$double.eps
- max_diff = max(abs(A_sym - A))
- notSym_flag = max_diff > tol
- if( notSym_flag ) warning('Your input contact matrix is not symmetric. The maximum difference between symmetric values is: ', max_diff, '\nBy default, the contact profile used for downstream analysis is taken from the upper triangular part. To use the lower triangular part you can transpose the input contact matrix first')
-
- if( is.null(rownames(A)) | is.null(colnames(A))) stop('A should be named by the bin indices')
-
-
- pA_sym = as.matrix(remove_blank_cols(A_sym, sparse=TRUE, ratio=0))
- n_zero_rows = nrow(A_sym) - nrow(pA_sym)
- zero_rows_flag = n_zero_rows > 0
- if( zero_rows_flag )
- {
- warning('There are ', n_zero_rows, ' rows/columns in your input contact matrix that have all their values being 0. These rows/columns are removed for downstream analysis')
- original_row_names = rownames(A_sym)
- kept_row_names = rownames(pA_sym)
- }
-
-
-
-
-
-
-
-
-
-
-
-
- res_inner = foreach::foreach(i=1:nrow(compartment_segs)) %do%
- {
- seg = compartment_segs[i,1]:compartment_segs[i,2]
- cat('\r', sprintf('Find sub-domains in %d of %d CDs | length of current CD: %d bins\n', i, nrow(compartment_segs), length(seg)))
- A_seg = pA_sym[seg, seg]
- res_zigzag = zigzag_loglik_ancestors_v4_5(A_seg, nrow(A_seg), min_n_bins=min_n_bins)
- res_outer = list(A=A_seg, L=res_zigzag$L, ancestors=res_zigzag$ancestors, min_n_bins=min_n_bins)
- res_outer
-
- }
- cat('\n')
-
- segmentss = compartment_segs
- res_info = list( arg_list=arg_list, pA_sym=pA_sym, A_final=pA_sym, segmentss=segmentss, res_inner=res_inner )
-
-
-
-
- time_finish = Sys.time()
- cat('Execution finishes:', as.character(time_finish), '\n', file=total_execution_time_file, append=TRUE)
- cat('Total execution time:', capture.output( time_finish - time_begin ), '\n', file=total_execution_time_file, append=TRUE)
-
- return( res_info )
- }
|