|
@@ -121,9 +121,6 @@
|
|
|
|
|
|
CALDER_CD_hierarchy_v2 = function(contact_tab_dump=NULL, contact_file_dump=NULL, contact_file_hic=NULL, chr, bin_size_input, bin_size2look, save_dir, save_intermediate_data=FALSE, swap_AB, ref_compartment_file, black_list_bins=NULL, feature_track=NULL)
|
|
CALDER_CD_hierarchy_v2 = function(contact_tab_dump=NULL, contact_file_dump=NULL, contact_file_hic=NULL, chr, bin_size_input, bin_size2look, save_dir, save_intermediate_data=FALSE, swap_AB, ref_compartment_file, black_list_bins=NULL, feature_track=NULL)
|
|
{
|
|
{
|
|
- chr_num = gsub('chr', '', chr, ignore.case=TRUE)
|
|
|
|
- chr_name = paste0('chr', chr_num)
|
|
|
|
-
|
|
|
|
get_cor_with_ref = function(chr_bin_pc, bin_size, ref_compartment_file=NULL, feature_track=NULL) ## correlation of PC1 with comp. domain rank of genome
|
|
get_cor_with_ref = function(chr_bin_pc, bin_size, ref_compartment_file=NULL, feature_track=NULL) ## correlation of PC1 with comp. domain rank of genome
|
|
{
|
|
{
|
|
ext_chr_bin_pc = function(chr_bin_pc) ## spand chr_bin_pc using 5kb bin
|
|
ext_chr_bin_pc = function(chr_bin_pc) ## spand chr_bin_pc using 5kb bin
|
|
@@ -195,14 +192,12 @@
|
|
{
|
|
{
|
|
colnames(feature_track) = c('chr', 'start', 'end', 'score')
|
|
colnames(feature_track) = c('chr', 'start', 'end', 'score')
|
|
feature_track$chr = gsub('chr', '', feature_track$chr)
|
|
feature_track$chr = gsub('chr', '', feature_track$chr)
|
|
- feature_track_chr = feature_track[feature_track$chr==chr_num, ]
|
|
|
|
|
|
+ feature_track_chr = feature_track[feature_track$chr==chr, ]
|
|
ref_tab = get_binned_vals(feature_track_chr)
|
|
ref_tab = get_binned_vals(feature_track_chr)
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
- chr_name = gsub("chrchr", "chr", paste0("chr", ref_tab$chr))
|
|
|
|
- # chr_name = ifelse(chr_name=="chr23", "chrX", chr_name)
|
|
|
|
- ref_tab$chr = chr_name
|
|
|
|
|
|
+ ref_tab$chr = chr
|
|
colnames(ref_tab)[2] = "bin_index"
|
|
colnames(ref_tab)[2] = "bin_index"
|
|
|
|
|
|
################################# convert chr_bin_pc into 5kb
|
|
################################# convert chr_bin_pc into 5kb
|
|
@@ -218,14 +213,14 @@
|
|
|
|
|
|
|
|
|
|
time0 = Sys.time()
|
|
time0 = Sys.time()
|
|
- log_file = paste0(save_dir, '/chr', chr_num, '_log.txt')
|
|
|
|
- warning_file = paste0(save_dir, '/WARNING_chr', chr_num, '.txt')
|
|
|
|
|
|
+ log_file = paste0(save_dir, '/', chr, '_log.txt')
|
|
|
|
+ warning_file = paste0(save_dir, '/WARNING', chr, '.txt')
|
|
cor_log_file = paste0(save_dir, '/cor_with_ref.txt')
|
|
cor_log_file = paste0(save_dir, '/cor_with_ref.txt')
|
|
|
|
|
|
cat('>>>> Begin process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n', file=log_file, append=FALSE)
|
|
cat('>>>> Begin process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n', file=log_file, append=FALSE)
|
|
|
|
|
|
cat('[', as.character(chr),'] Begin process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n')
|
|
cat('[', as.character(chr),'] Begin process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n')
|
|
- processed_data = contact_mat_processing_v2(contact_tab_dump, contact_file_dump=contact_file_dump, contact_file_hic=contact_file_hic, chr=chr_num, bin_size_input=bin_size_input, bin_size2look=bin_size2look, black_list_bins=black_list_bins)
|
|
|
|
|
|
+ processed_data = contact_mat_processing_v2(contact_tab_dump, contact_file_dump=contact_file_dump, contact_file_hic=contact_file_hic, chr=chr, bin_size_input=bin_size_input, bin_size2look=bin_size2look, black_list_bins=black_list_bins)
|
|
|
|
|
|
|
|
|
|
mat_dense = processed_data$mat_dense
|
|
mat_dense = processed_data$mat_dense
|
|
@@ -279,7 +274,7 @@
|
|
## switch PC direction based on correlation with "ground_truth"
|
|
## switch PC direction based on correlation with "ground_truth"
|
|
{
|
|
{
|
|
initial_clusters_ori_bins = lapply(initial_clusters, function(v) as.numeric(bin_names[v]))
|
|
initial_clusters_ori_bins = lapply(initial_clusters, function(v) as.numeric(bin_names[v]))
|
|
- chr_bin_pc = data.table::data.table(chr=chr_name, bin=unlist(initial_clusters_ori_bins), PC1_val=rep(PC_12_atanh[,1], sapply(initial_clusters_ori_bins, length)))
|
|
|
|
|
|
+ chr_bin_pc = data.table::data.table(chr=chr, bin=unlist(initial_clusters_ori_bins), PC1_val=rep(PC_12_atanh[,1], sapply(initial_clusters_ori_bins, length)))
|
|
chr_bin_pc$start = (chr_bin_pc$bin - 1)*bin_size2look + 1
|
|
chr_bin_pc$start = (chr_bin_pc$bin - 1)*bin_size2look + 1
|
|
chr_bin_pc$end = chr_bin_pc$bin*bin_size2look
|
|
chr_bin_pc$end = chr_bin_pc$bin*bin_size2look
|
|
|
|
|
|
@@ -410,7 +405,7 @@
|
|
|
|
|
|
generate_hierachy_bed = function(chr, res, save_dir, bin_size)
|
|
generate_hierachy_bed = function(chr, res, save_dir, bin_size)
|
|
{
|
|
{
|
|
- chr_name = paste0('chr', chr)
|
|
|
|
|
|
+ chr_name = chr
|
|
# res = reses[[chr_name]][[CELL_LINE]]
|
|
# res = reses[[chr_name]][[CELL_LINE]]
|
|
hc = res$CALDER_hc
|
|
hc = res$CALDER_hc
|
|
|
|
|