123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
-
-
- build_comp_table_opt = function(save_dir, chrs, bin_sizes, with_ref)
- {
- `%dopar%` <- foreach::`%dopar%`
- `%do%` <- foreach::`%do%`
- get_opt_index = function(consist_tab)
- {
- if(ncol(consist_tab)==2)
- {
- opt_index = rep(1, nrow(consist_tab))
- return(opt_index)
- }
- mins = apply(consist_tab[,-1], 1, max) - 0.05
- opt_index = apply(1*((consist_tab[,-1] >= mins)==1), 1, function(v) min(which(v==1)))
- return(opt_index)
- }
-
-
- identities = 'bulk'
- {
- consist_tab_li = foreach::foreach(identity=identities) %do%
- {
-
-
-
-
-
-
-
-
-
-
-
- consist_tab = foreach::foreach(bin_size=bin_sizes, .combine=merge) %do%
- {
- bin_size_kb = sprintf("%skb", bin_size/1E3)
- save_dir_binsize = file.path(save_dir, 'intermediate_data/sub_compartments', bin_size_kb)
- consist_tab_tmp = data.table::data.table(chr=paste0('chr', chrs), val=0)
- consist_tab_tmp$val = foreach::foreach(chr=chrs, .combine=c) %do%
- {
- log_file = paste0(save_dir_binsize, '/chr', chr, '_log.txt')
-
- cor_val = as.numeric(strsplit(readLines(log_file)[5], 'this chr is:')[[1]][2])
-
- cor_val
- }
- colnames(consist_tab_tmp)[2] = bin_size_kb
- consist_tab_tmp
- }
- s = gsub('chr', '', consist_tab[['chr']])
- x <- suppressWarnings(as.numeric(s))
- consist_tab = consist_tab[order(x, 'is.na<-'(s, !is.na(x))), ]
-
- }
- names(consist_tab_li) = identities
- min_consist = (sapply(consist_tab_li, function(v) min(v[,2])))
-
-
- {
- save_dir_opt = file.path(save_dir, "sub_compartments")
- dir.create(save_dir_opt, recursive=TRUE, showWarnings=FALSE)
-
- consist_tab = consist_tab_li[[identity]]
- opt_index = get_opt_index(consist_tab)
- names(opt_index) = gsub(":", "", consist_tab$chr)
- opt_bin_tab = data.table::data.table(chr=names(opt_index), opt_binsize=(colnames(consist_tab)[-1])[opt_index])
- consist_tab_tmp = cbind( consist_tab, opt_bin_tab[, 'opt_binsize'])
- colnames(consist_tab_tmp)[ncol(consist_tab_tmp)] = 'opt_binsize'
-
-
-
- cor_all_file = file.path(save_dir_opt, "cor_with_ref.ALL.txt")
- data.table::fwrite(consist_tab_tmp, file=cor_all_file, col.names=TRUE, sep="\t")
- cor_with_ref = foreach::foreach(j=1:nrow(opt_bin_tab), .combine=c) %do%
- {
- chr_query = opt_bin_tab$chr[j]
- opt_binsize = opt_bin_tab$opt_binsize[j]
- row_index = which(consist_tab$chr == chr_query)
- col_index = which(colnames(consist_tab)==opt_binsize)
- as.data.frame(consist_tab)[row_index, col_index]
- }
-
- cor_with_ref_tab = data.table::data.table(chr=consist_tab$chr, cor=cor_with_ref, chr_num=gsub("chr", "", opt_bin_tab$chr))
- s = cor_with_ref_tab$chr_num
- x <- suppressWarnings(as.numeric(s))
- cor_with_ref_tab = cor_with_ref_tab[order(x, 'is.na<-'(s, !is.na(x))), ]
- cor_opt_file = file.path(save_dir_opt, "cor_with_ref.txt")
- data.table::fwrite(cor_with_ref_tab[, 1:2], file=cor_opt_file, col.names=TRUE, sep="\t")
-
- cor_with_ref_tab$index = 1:nrow(cor_with_ref_tab)
- cor_opt_plot_file = file.path(save_dir_opt, "cor_with_ref.pdf")
- pdf(cor_opt_plot_file, width=8, height=6)
- p = ggplot2::ggplot(data=cor_with_ref_tab, ggplot2::aes(x=index, y=cor, label = format(round(cor_with_ref_tab$cor, 2), nsmall = 2))) + ggplot2::geom_line(color="red") + ggplot2::geom_point()
- p = p + ggplot2::geom_label() + ggplot2::scale_x_continuous(labels=cor_with_ref_tab$chr_num, breaks=1:nrow(cor_with_ref_tab))
- p = p + ggplot2::geom_hline(yintercept=ifelse(with_ref, 0.4, 0.15), linetype=2, color='darkblue')
- if(with_ref) p = p + ggplot2::xlab('chr') + ggplot2::ylab('Rho') + ggplot2::ggtitle('Correlation of compartment rank with reference\nRho < 0.4 indicates imprecise compartment calling')
- if(!with_ref) p = p + ggplot2::xlab('chr') + ggplot2::ylab('Rho') + ggplot2::ggtitle('Correlation of compartment rank with reference\nRho < 0.15 indicates imprecise compartment calling')
- print(p)
- dev.off()
-
- opt_sub_comp_beds = foreach::foreach(i=1:nrow(opt_bin_tab)) %do%
- {
- opt_bin_size_kb = opt_bin_tab[[2]][i]
- chr_name = opt_bin_tab[[1]][i]
- opt_sub_comp_bed_chr = sprintf("%s/%s_sub_compartments.bed", file.path(save_dir, 'intermediate_data/sub_compartments', opt_bin_size_kb), chr_name)
- opt_sub_comp_bed_chr
- }
-
- save_opt_file = file.path(save_dir_opt, sprintf("all_sub_compartments.bed"))
- cmd_opt = sprintf("cat %s > %s", paste0(opt_sub_comp_beds, collapse=" "), save_opt_file)
- system(cmd_opt)
-
-
- comp_raw = data.table::fread(save_opt_file)
- comp_tab = make_comp_tab(comp_raw, bin_size=10E3)
- cols2keep = c('chr', 'pos_start', 'pos_end', 'comp_name', 'comp_rank', 'continous_rank')
- comp_tab = comp_tab[, c('chr', 'pos_start', 'pos_end', 'comp_name', 'comp_rank', 'continous_rank')]
-
- save_opt_tab_file = file.path(save_dir_opt, sprintf("all_sub_compartments.tsv"))
- data.table::fwrite(comp_tab, file=save_opt_tab_file, sep='\t')
- }
- }
- }
-
-
|