Browse Source

Improved logging

lucananni93 2 years ago
parent
commit
78727a4b5d
3 changed files with 16 additions and 14 deletions
  1. 10 10
      R/CALDER_hierarchy_v2.R
  2. 3 0
      R/generate_compartments_bed_fun.R
  3. 3 4
      R/zigzag_nested_domain_v2.R

+ 10 - 10
R/CALDER_hierarchy_v2.R

@@ -221,17 +221,17 @@
 	        log_file = paste0(save_dir, '/chr', chr_num, '_log.txt')
 	        log_file = paste0(save_dir, '/chr', chr_num, '_log.txt')
 	        warning_file = paste0(save_dir, '/WARNING_chr', chr_num, '.txt')
 	        warning_file = paste0(save_dir, '/WARNING_chr', chr_num, '.txt')
 	        cor_log_file = paste0(save_dir, '/cor_with_ref.txt')
 	        cor_log_file = paste0(save_dir, '/cor_with_ref.txt')
-	        cat('\n')
 
 
 	        cat('>>>> Begin process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n', file=log_file, append=FALSE)
 	        cat('>>>> Begin process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n', file=log_file, append=FALSE)
-	        cat('>>>> Begin process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n')
+	        
+	        cat('[', as.character(chr),'] Begin process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n')
 	        processed_data = contact_mat_processing_v2(contact_tab_dump, contact_file_dump=contact_file_dump, contact_file_hic=contact_file_hic, chr=chr_num, bin_size_input=bin_size_input, bin_size2look=bin_size2look, black_list_bins=black_list_bins)
 	        processed_data = contact_mat_processing_v2(contact_tab_dump, contact_file_dump=contact_file_dump, contact_file_hic=contact_file_hic, chr=chr_num, bin_size_input=bin_size_input, bin_size2look=bin_size2look, black_list_bins=black_list_bins)
 	     
 	     
 
 
 	        mat_dense = processed_data$mat_dense
 	        mat_dense = processed_data$mat_dense
 	        ccmat_dense_compressed_log_atanh = processed_data$atanh_score
 	        ccmat_dense_compressed_log_atanh = processed_data$atanh_score
 
 
-	        cat('\r', '>>>> Finish process contact matrix and compute correlation score at:', as.character(Sys.time()))
+	        cat('[', as.character(chr),'] Finish process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n')
 	        cat('>>>> Finish process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n', file=log_file, append=TRUE)
 	        cat('>>>> Finish process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n', file=log_file, append=TRUE)
 
 
 	        p_thresh = ifelse(bin_size2look < 40000, 0.05, 1)
 	        p_thresh = ifelse(bin_size2look < 40000, 0.05, 1)
@@ -239,7 +239,7 @@
 	        compartments = vector("list", 2)
 	        compartments = vector("list", 2)
 
 
 	        cat('>>>> Begin compute compartment domains and their hierachy at:', as.character(Sys.time()), '\n', file=log_file, append=TRUE)
 	        cat('>>>> Begin compute compartment domains and their hierachy at:', as.character(Sys.time()), '\n', file=log_file, append=TRUE)
-	        cat('\r', '>>>> Begin compute compartment domains and their hierachy at:', as.character(Sys.time()))
+	        cat('[', as.character(chr),'] Begin compute compartment domains and their hierachy at:', as.character(Sys.time()), '\n')
 
 
 	        compartments[[2]] = generate_compartments_bed(chr = chr, bin_size = bin_size2look, window.sizes = window.sizes, ccmat_dense_compressed_log_atanh, p_thresh, out_file_name = NULL, stat_window_size = NULL)
 	        compartments[[2]] = generate_compartments_bed(chr = chr, bin_size = bin_size2look, window.sizes = window.sizes, ccmat_dense_compressed_log_atanh, p_thresh, out_file_name = NULL, stat_window_size = NULL)
 	        topDom_output = compartments[[2]]
 	        topDom_output = compartments[[2]]
@@ -304,8 +304,8 @@
 
 
 
 
 				if(class(cor_with_ref)=='try-error') cor_with_ref = 1 ## psudo cor
 				if(class(cor_with_ref)=='try-error') cor_with_ref = 1 ## psudo cor
-	        	if(!is.null(ref_compartment_file)) cat('\r', ">>>> Correlation between PC1 and reference compartment is :", format(abs(cor_with_ref), digits=5), '\n')
-	        	if(is.null(ref_compartment_file)) cat('\r', ">>>> Correlation between PC1 and feature_track is :", format(abs(cor_with_ref), digits=5), '\n')
+	        	if(!is.null(ref_compartment_file)) cat('[', as.character(chr),'] Correlation between PC1 and reference compartment is :', format(abs(cor_with_ref), digits=5), '\n')
+	        	if(is.null(ref_compartment_file)) cat('[', as.character(chr),'] Correlation between PC1 and feature_track is :', format(abs(cor_with_ref), digits=5), '\n')
 
 
 				PC_direction = PC_direction*sign(cor_with_ref)
 				PC_direction = PC_direction*sign(cor_with_ref)
 			    if(swap_AB==1) PC_direction = -PC_direction ## force swap PC direction if in some case the A/B direction is reverted
 			    if(swap_AB==1) PC_direction = -PC_direction ## force swap PC direction if in some case the A/B direction is reverted
@@ -348,7 +348,7 @@
 
 
 
 
 	        cat('>>>> Finish compute compartment domains and their hierachy at: ', as.character(Sys.time()), '\n', file=log_file, append=TRUE)
 	        cat('>>>> Finish compute compartment domains and their hierachy at: ', as.character(Sys.time()), '\n', file=log_file, append=TRUE)
-	        cat('\r', '>>>> Finish compute compartment domains and their hierachy at: ', as.character(Sys.time()))
+	        cat('[', as.character(chr),'] Finish compute compartment domains and their hierachy at: ', as.character(Sys.time()), '\n')
 
 
 	       	if(!is.null(ref_compartment_file)) cat('Correlation between PC1 and reference compartment domain rank on this chr is: ', format(abs(cor_with_ref), 1, 5), '\n', file=log_file, append=TRUE)
 	       	if(!is.null(ref_compartment_file)) cat('Correlation between PC1 and reference compartment domain rank on this chr is: ', format(abs(cor_with_ref), 1, 5), '\n', file=log_file, append=TRUE)
 	       	if(is.null(ref_compartment_file)) cat('Correlation between PC1 and feature_track on this chr is: ', format(abs(cor_with_ref), 1, 5), '\n', file=log_file, append=TRUE)	       	
 	       	if(is.null(ref_compartment_file)) cat('Correlation between PC1 and feature_track on this chr is: ', format(abs(cor_with_ref), 1, 5), '\n', file=log_file, append=TRUE)	       	
@@ -487,7 +487,7 @@
 		    time0 = Sys.time()
 		    time0 = Sys.time()
 		    log_file = paste0(save_dir, '/chr', chr, '_sub_domains_log.txt')
 		    log_file = paste0(save_dir, '/chr', chr, '_sub_domains_log.txt')
 
 
-		   	cat('\r', '>>>> Begin compute sub-domains at:', as.character(Sys.time()))
+		   	cat('[', as.character(chr),']Begin compute sub-domains at:', as.character(Sys.time()))
 		   	cat('>>>> Begin compute sub-domains at:', as.character(Sys.time()), '\n', file=log_file, append=FALSE)
 		   	cat('>>>> Begin compute sub-domains at:', as.character(Sys.time()), '\n', file=log_file, append=FALSE)
 
 
 			if(is.null(intermediate_data)) intermediate_data = readRDS(intermediate_data_file)
 			if(is.null(intermediate_data)) intermediate_data = readRDS(intermediate_data_file)
@@ -497,7 +497,7 @@
 			    if( !setequal(rownames(intermediate_data$mat), intermediate_data$bin_names) ) stop('!setequal(rownames(intermediate_data$mat), intermediate_data$bin_names) \n')     
 			    if( !setequal(rownames(intermediate_data$mat), intermediate_data$bin_names) ) stop('!setequal(rownames(intermediate_data$mat), intermediate_data$bin_names) \n')     
 			    compartment_segs = generate_compartment_segs( intermediate_data$initial_clusters )
 			    compartment_segs = generate_compartment_segs( intermediate_data$initial_clusters )
 
 
-				cat('\r', '>>>> Begin compute sub-domains within each compartment domain at:', as.character(Sys.time()))   			
+				cat('[', as.character(chr),'] Begin compute sub-domains within each compartment domain at:', as.character(Sys.time()), '\n')   			
 				cat('>>>> Begin compute sub-domains within each compartment domain at:', as.character(Sys.time()), '\n', file=log_file, append=TRUE)
 				cat('>>>> Begin compute sub-domains within each compartment domain at:', as.character(Sys.time()), '\n', file=log_file, append=TRUE)
 
 
 				sub_domains_raw = HRG_zigzag_compartment_domain_main_fun(intermediate_data$mat, './', compartment_segs, min_n_bins=2)   
 				sub_domains_raw = HRG_zigzag_compartment_domain_main_fun(intermediate_data$mat, './', compartment_segs, min_n_bins=2)   
@@ -505,7 +505,7 @@
 			    no_output = post_process_sub_domains(chr, sub_domains_raw, ncores=1, out_dir=save_dir, bin_size=bin_size)
 			    no_output = post_process_sub_domains(chr, sub_domains_raw, ncores=1, out_dir=save_dir, bin_size=bin_size)
 
 
 			    cat('>>>> Finish compute sub-domains within each compartment domain at:', as.character(Sys.time()), '\n', file=log_file, append=TRUE)
 			    cat('>>>> Finish compute sub-domains within each compartment domain at:', as.character(Sys.time()), '\n', file=log_file, append=TRUE)
-			    cat('\r', '>>>> Finish compute sub-domains within each compartment domain at:', as.character(Sys.time()), '\n')
+			    cat('[', as.character(chr),'] Finish compute sub-domains within each compartment domain at:', as.character(Sys.time()), '\n')
 
 
 			   	time1 = Sys.time()
 			   	time1 = Sys.time()
 		        # delta_time  = gsub('Time difference of', 'Total time used for computing compartment domains and their hierachy:', print(time1 - time0))
 		        # delta_time  = gsub('Time difference of', 'Total time used for computing compartment domains and their hierachy:', print(time1 - time0))

+ 3 - 0
R/generate_compartments_bed_fun.R

@@ -2,8 +2,11 @@
 
 
 	generate_compartments_bed <- function(input_mat, p_thresh, out_file_name, chr, window.sizes=3, stat_window_size=NULL, bin_size)
 	generate_compartments_bed <- function(input_mat, p_thresh, out_file_name, chr, window.sizes=3, stat_window_size=NULL, bin_size)
 	{	
 	{	
+
 		input_mat_extended = data.frame(chr=paste0('chr', chr), pos_start=0:(nrow(input_mat)-1), pos_end=1:nrow(input_mat), mat=input_mat)
 		input_mat_extended = data.frame(chr=paste0('chr', chr), pos_start=0:(nrow(input_mat)-1), pos_end=1:nrow(input_mat), mat=input_mat)
+
 		res_input_mat = TopDom_v2(input_mat_extended, window.size=NULL, NULL, T, p_thresh=p_thresh, window.sizes=window.sizes, stat_window_size=stat_window_size, domain_size_min=NULL)
 		res_input_mat = TopDom_v2(input_mat_extended, window.size=NULL, NULL, T, p_thresh=p_thresh, window.sizes=window.sizes, stat_window_size=stat_window_size, domain_size_min=NULL)
+		cat('[', as.character(chr),'] Computing compartments\n')
 		# return(res_input_mat)
 		# return(res_input_mat)
 		to_id = as.numeric(rownames(input_mat)[res_input_mat$domain$to.id])
 		to_id = as.numeric(rownames(input_mat)[res_input_mat$domain$to.id])
 		from_id = as.numeric(rownames(input_mat)[res_input_mat$domain$from.id])
 		from_id = as.numeric(rownames(input_mat)[res_input_mat$domain$from.id])

+ 3 - 4
R/zigzag_nested_domain_v2.R

@@ -61,12 +61,11 @@
 		# }
 		# }
 
 
 		## changed to paralell, 2018-11-11
 		## changed to paralell, 2018-11-11
-		cat('\n')
 
 
 		res_inner = foreach::foreach(i=1:nrow(compartment_segs)) %do%
 		res_inner = foreach::foreach(i=1:nrow(compartment_segs)) %do%
 		{
 		{
 			seg = compartment_segs[i,1]:compartment_segs[i,2]
 			seg = compartment_segs[i,1]:compartment_segs[i,2]
-			cat('\r', sprintf('Find sub-domains in %d of %d CDs | length of current CD: %d bins', i, nrow(compartment_segs), length(seg)))
+			cat('\r', sprintf('Find sub-domains in %d of %d CDs | length of current CD: %d bins\n', i, nrow(compartment_segs), length(seg)))
 
 
 			A_seg = pA_sym[seg, seg]
 			A_seg = pA_sym[seg, seg]
 			res_zigzag = zigzag_loglik_ancestors_v4_5(A_seg, nrow(A_seg), min_n_bins=min_n_bins)
 			res_zigzag = zigzag_loglik_ancestors_v4_5(A_seg, nrow(A_seg), min_n_bins=min_n_bins)
@@ -84,8 +83,8 @@
 		# save(res_info, file=file.path(res_folder_final, 'res_info.Rdata'))
 		# save(res_info, file=file.path(res_folder_final, 'res_info.Rdata'))
 	
 	
 		time_finish = Sys.time()
 		time_finish = Sys.time()
-		cat('Execution finishes:', as.character(time_finish), '\n\n', file=total_execution_time_file, append=TRUE)
-		cat('Total execution time:', capture.output( time_finish - time_begin ), '\n\n', file=total_execution_time_file, append=TRUE)
+		cat('Execution finishes:', as.character(time_finish), '\n', file=total_execution_time_file, append=TRUE)
+		cat('Total execution time:', capture.output( time_finish - time_begin ), '\n', file=total_execution_time_file, append=TRUE)
 	
 	
 		return( res_info )
 		return( res_info )
 	}
 	}