123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177 |
- ## These functions try to aggregate bin-wise contact matrix into a domain-wise contact matrix
- HighResolution2Low_k_complete <-function( A, max_nbins )
- {
- nbins = nrow( A )
-
- max_nbins_bp = max_nbins
- max_nbinss = max_nbins + (-5:20)
- expand_fold_info = t(sapply(max_nbinss, fold_expand_rate, K=nbins))
- index = max(which(expand_fold_info[,'rescale_rate']==min(expand_fold_info[,'rescale_rate'])))
- max_nbins = max_nbinss[ index ]
- n2one_head = expand_fold_info[index, 'n2one_head']
- n2one_last = expand_fold_info[index, 'n2one_last']
-
- if(n2one_head==1) return(list())
- rescale_rate = n2one_last / n2one_head
- if(max_nbins!=max_nbins_bp) warning('Value of max_nbins has been adjusted from ', max_nbins_bp, ' to ', max_nbins, '. This results in rescale_rate as: ', rescale_rate, '\n')
-
- if(rescale_rate!=1) ## only do when rescale is not 1
- {
- aa_end = n2one_head*(max_nbins-1)
- bb_end = nrow(A)
- A_aa = A[1:aa_end, 1:aa_end]
- A_bb = A[(aa_end+1):bb_end, (aa_end+1):bb_end]
- A_ab = A[1:aa_end, (aa_end+1):bb_end, drop=FALSE]
- A_aa_low = HighResolution2Low_k(mat=A_aa, k=max_nbins-1)
- A_bb_low = sum( A_bb )
-
- ## https://stackoverflow.com/questions/15265512/summing-every-n-points-in-r
- v = apply(A_ab, 1, sum)
- A_ab_low = unname(tapply(v, (seq_along(v)-1) %/% n2one_head, sum))
-
- A_ab_low_new = A_ab_low / n2one_last*n2one_head
- A_bb_low_new = A_bb_low / n2one_last^2*n2one_head^2
- A_low = rbind(cbind(A_aa_low, A_ab_low_new), c( A_ab_low_new, A_bb_low_new ))
- colnames(A_low) = NULL
- }
-
- if(rescale_rate==1) A_low = HighResolution2Low_k(mat=A, k=max_nbins) ## k is the dimension of A_final_low
- ## A_low has no row/col name
- res = list(A_low=A_low, n2one_head=n2one_head, n2one_last=n2one_last)
- return(res)
- }
-
- split_info <-function( K, max_nbins )
- {
- x = floor( K/max_nbins ) + 1
- n2 = max_nbins*x - K
- n1 = max_nbins - n2
- vec = c(rep(x, n1), rep(x-1, n2))
- split_vec = split(1:K, rep(1:max_nbins, vec))
- expand_fold_ratio = min(n1/n2, n2/n1)
- res = list(vec=vec, expand_fold_ratio=expand_fold_ratio, split_vec=split_vec)
- return(res)
- }
-
-
- ## diagonal values are summed twice
- HighResolution2Low <-function( A, rescale, which=2 )
- {
- nbins = nrow(A)
- nbins_low = floor(nbins/rescale)
- A_low = matrix( , nbins_low, nbins_low)
- keep_rows = nbins - nbins%%nbins_low
- A_truncated = A[ 1:keep_rows, 1:keep_rows ]
-
- if(which==1) A_low = matsplitter_sum(A_truncated, keep_rows/nbins_low, keep_rows/nbins_low)
- if(which==2) A_low = mat_split_sum(A_truncated, keep_rows/nbins_low, keep_rows/nbins_low)
- return( A_low )
- }
-
- ## https://stackoverflow.com/questions/24299171/function-to-split-a-matrix-into-sub-matrices-in-r
- ## Function to split a matrix into sub-matrices in R
- matsplitter_sum <- function(M, r, c)
- {
- rg <- (row(M)-1)%/%r + 1
- cg <- (col(M)-1)%/%c + 1
- rci <- (rg-1)*max(cg) + cg
- N <- prod(dim(M))/r/c
- cv <- unlist(lapply(1:N, function(x) M[rci==x]))
- dim(cv)<-c(r, c, N)
- res = matrix(apply(cv, 3, sum), nrow(M)/r, byrow=TRUE)
- return(res)
- }
- mat_split_sum <- function(M, r, c)
- {
- nr <- ceiling(nrow(M)/r)
- nc <- ceiling(ncol(M)/c)
- newM <- matrix(NA, nr*r, nc*c)
- newM[1:nrow(M), 1:ncol(M)] <- M
- div_k <- kronecker(matrix(seq_len(nr*nc), nr, byrow = TRUE), matrix(1, r, c))
- matlist <- split(newM, div_k)
- res = matrix(sapply(matlist, sum), nrow(M)/r, byrow=TRUE)
- return(res)
- }
-
- ## this is much faster
- HighResolution2Low_k <- function(mat, k)
- {
- chunk2 <- function(x, n) split(x, cut(seq_along(x), n, labels = FALSE))
- n = nrow(mat)
- A_low = matrix( , k, k )
- indices = chunk2( 1:n, k )
- for(row_index in 1:k)
- {
- A_sub = mat[indices[[row_index]], ]
- for(col_index in 1:k) A_low[row_index, col_index] = sum( A_sub[ , indices[[col_index]]] )
- }
- return( A_low )
- }
-
- HighResolution2Low_k_rectangle <- function(mat, row_split, col_split, sum_or_mean=c('sum', 'mean', 'median', 'p_above_one'))
- {
- # if(remove_zero==TRUE) mat[mat==0] = NA
- k_row = length(row_split)
- k_col = length(col_split)
-
- A_low = matrix( , k_row, k_col )
-
- if(sum_or_mean=='sum')
- {
- for(row_index in 1:k_row)
- {
- A_sub = mat[row_split[[row_index]], , drop=FALSE ]
- for(col_index in 1:k_col) A_low[row_index, col_index] = sum( A_sub[ , col_split[[col_index]]] )
- }
- }
- if(sum_or_mean=='mean')
- {
- for(row_index in 1:k_row)
- {
- A_sub = mat[row_split[[row_index]], , drop=FALSE ]
- for(col_index in 1:k_col) A_low[row_index, col_index] = mean( A_sub[ , col_split[[col_index]]], na.rm=TRUE )
- }
- }
- if(sum_or_mean=='median')
- {
- for(row_index in 1:k_row)
- {
- A_sub = mat[row_split[[row_index]], , drop=FALSE ]
- for(col_index in 1:k_col) A_low[row_index, col_index] = median( A_sub[ , col_split[[col_index]]], na.rm=TRUE )
- }
- }
- if(sum_or_mean=='p_above_one')
- {
- for(row_index in 1:k_row)
- {
- A_sub = mat[row_split[[row_index]], , drop=FALSE ]
- for(col_index in 1:k_col) A_low[row_index, col_index] = mean( A_sub[ , col_split[[col_index]]] > 1 )
- }
- }
- # if(remove_zero==TRUE) A_low[is.na(A_low)] = 0
- return( A_low )
- }
-
- ## for a n x n matrix, this function generate a nxm matrix, with m*compress_size = n
- ## this function is used for generating the nxm matrix, where the i,jth value is the contact value
- ## 08-08-2018
- compress_mat_fast = function(input_mat, compress_size)
- {
- mat_block <- function(n, r) suppressWarnings( matrix(c(rep(1, r), rep(0, n)), n, n/r) )
- n = ncol(input_mat)
- mat2prod = mat_block(n, compress_size)
- # return(t(input_mat%*%mat2prod / compress_size))
- return(t(matrix_multiplication_cpp(input_mat, mat2prod)) / compress_size)
- }
-
-
|