123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 |
- trim_tree_adaptive_top_down_v2 = function( tree, wilcox_p_thresh, mean_diff_thresh )
- {
- # leaves = get_leaves(tree)
- if(igraph::vcount(tree)==1) return(tree)
- # cat('I am in trim_tree_adaptive_top_down_v2\n')
- nsig_nodes = union( igraph::V(tree)[which(igraph::V(tree)$wilcox_p > wilcox_p_thresh)]$name, igraph::V(tree)[which(igraph::V(tree)$mean_diff > mean_diff_thresh)]$name )
- children_of_nsig = names(unlist(igraph::ego(tree, order=1, node=nsig_nodes, mode='out', mindist=1)))
- if(length(children_of_nsig)!=0) trimmed_tree = tree - children_of_nsig
- if(length(children_of_nsig)==0) trimmed_tree = tree
- comps = igraph::decompose(trimmed_tree)
- root_index = which(sapply( comps, function(comp) igraph::V(tree)[1]$name %in% igraph::V(comp)$name )==1)
- trimmed_tree = comps[[root_index]]
- if(!is_binary_tree( trimmed_tree )) stop("trim_tree_adaptive_top_down, the resulted tree is not a binary tree")
- return( trimmed_tree )
- }
- prunning = function(branches, p0, to_correct=FALSE, width_thresh=-Inf, width_thresh_CD=5, boundary_signal_thresh=-Inf, return_which='TADs', top_down=FALSE, all_levels=FALSE, CD_border_adj=FALSE, peak_thresh=NULL, mean_diff_thresh)
- {
- size2correct = sum(sapply(branches, igraph::vcount)) - sum(sapply(branches, function(v) length(get_leaves(v))))
- p_thresh = p0/size2correct
- if(to_correct==FALSE) p_thresh=p0
- # if(!is.null(p0)) p_thresh = p0
-
- if(top_down==FALSE)
- {
- trimmed_branches = lapply( branches, trim_tree_adaptive, max_imp_p=p_thresh, max_nimp_p=Inf, width_thresh=width_thresh, boundary_signal_thresh=boundary_signal_thresh, peak_thresh=peak_thresh )
- # size2correct = sum(sapply(trimmed_branches, igraph::vcount)) - sum(sapply(trimmed_branches, function(v) length(get_leaves(v))))
-
- # for( i in 1:length( trimmed_branches ) )
- # {
- # trimmed_branch = trimmed_branches[[i]]
- # if(igraph::vcount(trimmed_branch) > 1) trimmed_branches[[i]] = lapply( trimmed_branches[i], trim_tree_adaptive, max_imp_p=p_thresh, max_nimp_p=Inf, width_thresh=width_thresh, boundary_signal_thresh=-1 )[[1]]
- # }
-
- }
-
- # if(top_down==TRUE) trimmed_branches = lapply( branches, trim_tree_adaptive_top_down, max_imp_p=p_thresh, max_nimp_p=Inf, width_thresh=width_thresh, boundary_signal_thresh=boundary_signal_thresh )
- if(top_down==TRUE) trimmed_branches = lapply( branches, function(branch) trim_tree_adaptive_top_down_v2(wilcox_p_thresh=p_thresh, mean_diff_thresh=mean_diff_thresh, tree=branch ))
- if(CD_border_adj==TRUE)
- {
- all_tads = get_adjusted_nested_TADs( trimmed_branches, width_thresh_CD, all_levels )
- return( all_tads )
- }
-
- ## get all nested TADs in trimmed_branches
- if(all_levels==TRUE)
- {
- all_tads = data.frame(start_pos=numeric(), end_pos=numeric())
- widths = c(0, sapply(trimmed_branches, function(v) igraph::V(v)[1]$width))
- for(i in 1:length(trimmed_branches))
- {
- all_tads_i = get_all_tads_in_a_trimmed_branch(trimmed_branches[[i]], pos_shift=sum(widths[1:i]))
- all_tads = rbind(all_tads, all_tads_i)
- }
- return( all_tads )
- }
-
- if( return_which=='trimmed_branches' ) return( trimmed_branches )
-
- tad_sizes_ind = lapply( trimmed_branches, function(v) get_leaves(v, 'igraph')$width )
- tad_sizes = unlist(tad_sizes_ind)
- # tads = split(1:sum(tad_sizes), rep(seq_along(tad_sizes), tad_sizes))
- end_pos = cumsum(tad_sizes)
- start_pos = c(1, 1 + end_pos[-length(end_pos)])
- tads = data.frame(start_pos=start_pos, end_pos=end_pos)
- return( tads )
- }
- ## This function combines prunning with branches of only one node
- prunning_hybrid <- function(branches, ...)
- {
- names(branches) = as.character(1:length(branches))
- normal_branches = branches[sapply( branches, function(v) class(v)=='igraph' )]
- unnormal_branches = branches[sapply( branches, function(v) class(v)!='igraph' )] ## that is reprsented as bin_start:bin_end
- trimmed_branches = prunning(normal_branches, return_which='trimmed_branches', ...)
- normal_tad_sizes_ind = lapply( trimmed_branches, function(v) get_leaves(v, 'igraph')$width )
- unormal_tad_sizes_ind = unnormal_branches
- tad_sizes_ind = c(normal_tad_sizes_ind, unormal_tad_sizes_ind)
- tad_sizes_ind = tad_sizes_ind[names(branches)]
- tad_sizes = unlist(tad_sizes_ind)
- # tads = split(1:sum(tad_sizes), rep(seq_along(tad_sizes), tad_sizes))
- end_pos = cumsum(tad_sizes)
- start_pos = c(1, 1 + end_pos[-length(end_pos)])
- tads = data.frame(start_pos=start_pos, end_pos=end_pos)
- return( tads )
- }
- get_all_tads_in_a_trimmed_branch <- function(trimmed_branch, pos_shift)
- {
- res = data.frame( start_pos=igraph::V(trimmed_branch)$left + pos_shift, end_pos=igraph::V(trimmed_branch)$right + pos_shift )
- res = res[order(res[,1], res[,2]), ]
- return(res)
- }
-
- prunning_bottom_up <- function(branches, p0=NULL, width_thresh)
- {
- size2correct = sum(sapply(branches, igraph::vcount)) - sum(sapply(branches, function(v) length(get_leaves(v))))
-
- p_thresh = 0.05/size2correct
- if(!is.null(p0)) p_thresh = p0
-
- trimmed_branches = lapply( branches, trim_tree_adaptive, max_imp_p=p_thresh, max_nimp_p=Inf, width_thresh=width_thresh )
- tad_sizes_ind = lapply( trimmed_branches, function(v) get_leaves(v, 'igraph')$width )
- tad_sizes = unlist(tad_sizes_ind)
- # tads = split(1:sum(tad_sizes), rep(seq_along(tad_sizes), tad_sizes))
- end_pos = cumsum(tad_sizes)
- start_pos = c(1, 1 + end_pos[-length(end_pos)])
- tads = data.frame(start_pos=start_pos, end_pos=end_pos)
- return( tads )
- }
-
-
- trim_tree_adaptive_bottom_up <- function( tree, which_p='imp_p' )
- {
- if(which_p=='imp_p') ps = sort(unique(igraph::V(tree)$imp_p), decreasing=TRUE)
- # if(which_p=='nimp_p') ps = sort(unique(igraph::V(tree)$nimp_p), decreasing=TRUE)
- # if(which_p=='both') ps = sort(unique(pmin(igraph::V(tree)$nimp_p, igraph::V(tree)$imp_p)), decreasing=TRUE)
-
- trimed_tree_current = tree
- trimmed_branch_bottom_up = vector('list', length(ps))
- for(i in 1:length(ps))
- {
- trimed_tree_current = trim_tree_adaptive( tree, L_diff_thresh=-Inf, max_imp_p=ps[i], max_nimp_p=Inf, width_thresh=-Inf )
- trimmed_branch_bottom_up[[i]] = trimed_tree_current
- }
- igraph::vcounts = sapply(trimmed_branch_bottom_up, igraph::vcount)
- ps = ps[!duplicated(igraph::vcounts)]
- trimmed_branch_bottom_up = trimmed_branch_bottom_up[!duplicated(igraph::vcounts)]
- res = list(ps=ps, trimmed_branch_bottom_up=trimmed_branch_bottom_up)
- return( res )
- }
-
-
- ## get adjusted nested TADs
- get_adjusted_nested_TADs <- function( trimmed_branches, width_thresh_CD, all_levels )
- {
- widths = c(0, sapply(trimmed_branches, function(v) igraph::V(v)[1]$width))
- all_tads_i_list = lapply( 1:length(trimmed_branches), function(i) get_all_tads_in_a_trimmed_branch(trimmed_branches[[i]], pos_shift=sum(widths[1:i])))
- for(i in 1:length(trimmed_branches))
- {
- all_tads_i = all_tads_i_list[[i]]
- if( nrow(all_tads_i) <= 1 ) next
- ## move the left-most border a little bit right if needed
- left_borders = unique(all_tads_i[,1])
- min_diff_left = left_borders[2] - left_borders[1]
- if( min_diff_left <= width_thresh_CD )
- {
- all_tads_i[ all_tads_i==left_borders[1] ] = left_borders[2]
- all_tads_i = all_tads_i[ all_tads_i[,2] > all_tads_i[,1], ] ## remove "negative" TADs
- all_tads_i = unique(all_tads_i[order(all_tads_i[,1], all_tads_i[,2]), ]) ## reorder the TADs
- all_tads_i_list[[i]] = all_tads_i
- ## need to modify the right border of nested TADs in previous CD if the left border of this CD is modified
- if(i > 1)
- {
- ## replace the max value of [i-1], i.e., the right most border, as the min of [i]-1, i.e., the left most border of [i]
- all_tads_i_list[[i-1]][ all_tads_i_list[[i-1]]==max(all_tads_i_list[[i-1]]) ] = min(all_tads_i_list[[i]]) - 1
- }
- }
- if( nrow(all_tads_i) <= 1 ) next
- ## move the right-most border a little bit left if needed
- right_borders = unique(rev(all_tads_i[,2]))
- min_diff_right = right_borders[1] - right_borders[2]
- if( min_diff_right <= width_thresh_CD )
- {
- all_tads_i[ all_tads_i==right_borders[1] ] = right_borders[2]
- all_tads_i = all_tads_i[ all_tads_i[,2] > all_tads_i[,1], ]
- all_tads_i = unique(all_tads_i[order(all_tads_i[,1], all_tads_i[,2]), ]) ## reorder the TADs
-
- all_tads_i_list[[i]] = all_tads_i
- if(i < length(trimmed_branches))
- {
- ## replace the max value of [i-1], i.e., the right most border, as the min of [i]-1, i.e., the left most border of [i]
- all_tads_i_list[[i+1]][ all_tads_i_list[[i+1]]==min(all_tads_i_list[[i+1]]) ] = max(all_tads_i_list[[i]]) + 1
- }
- }
- }
- if(!all_levels) all_tads_i_list = lapply( all_tads_i_list, function(v) data.frame(start_pos=head(v[,1],1), end_pos=tail(v[,2],1)) )
- all_tads = do.call(rbind, all_tads_i_list)
- colnames(all_tads) = c('start_pos', 'end_pos')
- return( all_tads )
- }
-
-
-
-
|