Ver código fonte

Updated functions

Yuanlong LIU 2 anos atrás
pai
commit
38196af9d5
5 arquivos alterados com 19 adições e 16 exclusões
  1. BIN
      .DS_Store
  2. 2 2
      R/CALDER_hierarchy_v2.R
  3. 10 7
      R/CALDER_main.R
  4. 6 6
      R/compartment_data_generation_fun.R
  5. 1 1
      README.md

BIN
.DS_Store


+ 2 - 2
R/CALDER_hierarchy_v2.R

@@ -119,7 +119,7 @@
 		# }
 
 
-		CALDER_CD_hierarchy_v2 = function(contact_tab_straw=NULL, contact_file_straw=NULL, contact_file_hic=NULL, chr, bin_size_input, bin_size2look, save_dir, save_intermediate_data=FALSE, swap_AB, ref_compartment_file, black_list_bins=NULL, annotation_track=NULL)
+		CALDER_CD_hierarchy_v2 = function(contact_tab_dump=NULL, contact_file_dump=NULL, contact_file_hic=NULL, chr, bin_size_input, bin_size2look, save_dir, save_intermediate_data=FALSE, swap_AB, ref_compartment_file, black_list_bins=NULL, annotation_track=NULL)
 		{
 			chr_num = gsub('chr', '', chr, ignore.case=TRUE)
 			chr_name = paste0('chr', chr_num)
@@ -225,7 +225,7 @@
 
 	        cat('>>>> Begin process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n', file=log_file, append=FALSE)
 	        cat('>>>> Begin process contact matrix and compute correlation score at:', as.character(Sys.time()), '\n')
-	        processed_data = contact_mat_processing_v2(contact_tab_straw, contact_file_straw=contact_file_straw, contact_file_hic=contact_file_hic, chr=chr_num, bin_size_input=bin_size_input, bin_size2look=bin_size2look, black_list_bins=black_list_bins)
+	        processed_data = contact_mat_processing_v2(contact_tab_dump, contact_file_dump=contact_file_dump, contact_file_hic=contact_file_hic, chr=chr_num, bin_size_input=bin_size_input, bin_size2look=bin_size2look, black_list_bins=black_list_bins)
 	     
 
 	        mat_dense = processed_data$mat_dense

+ 10 - 7
R/CALDER_main.R

@@ -1,5 +1,5 @@
 
-    CALDER_v2 = function(contact_tab_straw=NULL, contact_file_straw=NULL, contact_file_hic=NULL, chrs, bin_size, save_dir, save_intermediate_data=FALSE, swap_AB=0, ref_genome=c("hg19", 'hg38', "mm10", 'dm6')[1], annotation_track=NULL, black_list_bins=NULL, n_cores=1, sub_domains=FALSE, single_binsize_only=FALSE)
+    CALDER = function(contact_tab_dump=NULL, contact_file_dump=NULL, contact_file_hic=NULL, chrs, bin_size, save_dir, save_intermediate_data=FALSE, swap_AB=0, ref_genome=c("hg19", 'hg38', "mm10", 'dm6')[1], annotation_track=NULL, black_list_bins=NULL, n_cores=1, sub_domains=FALSE, single_binsize_only=FALSE)
     {
     		########################### https://stackoverflow.com/questions/30216613/how-to-use-dopar-when-only-import-foreach-in-description-of-a-package
 
@@ -8,18 +8,21 @@
 
     		###########################
 
+            chrs = as.character(chrs)
             n_chrs = length(chrs)
 
-            if(n_chrs > 1) ## when computing for multiple chrs, contact_tab_straw or contact_file_straw should be a list with elements matching to that of chrs
+            if(n_chrs > 1) ## when computing for multiple chrs, contact_tab_dump or contact_file_dump should be a list with elements matching to that of chrs
             {
-            	if(!is.null(contact_tab_straw))
+            	if(!is.null(contact_tab_dump))
             	{
-            		if(class(contact_tab_straw)!='list' | length(contact_tab_straw)!=n_chrs) stop('contact_tab_straw should be a list of contact table with its length equal to the length of chrs you specified\n')
+            		if(class(contact_tab_dump)!='list' | length(contact_tab_dump)!=n_chrs) stop('contact_tab_dump should be a list of contact table with its length equal to the length of chrs you specified\n')
+                    names(contact_tab_dump) = chrs
             	}
 
-            	if(!is.null(contact_file_straw))
+            	if(!is.null(contact_file_dump))
             	{
-            		if(class(contact_file_straw)!='list' | length(contact_file_straw)!=n_chrs) stop('contact_file_straw should be a list of contact table with its length equal to the length of chrs you specified\n')
+            		if(class(contact_file_dump)!='list' | length(contact_file_dump)!=n_chrs) stop('contact_file_dump should be a list of contact table with its length equal to the length of chrs you specified\n')
+                    names(contact_file_dump) = chrs
             	}
             }
 
@@ -66,7 +69,7 @@
 
             	save_intermediate_data_tmp = save_intermediate_data*(bin_size2look==bin_size)
 
-	            CALDER_CD_hierarchy_v2(contact_tab_straw=contact_tab_straw, contact_file_straw=contact_file_straw, contact_file_hic=contact_file_hic, chr=chr, bin_size_input=bin_size, bin_size2look=bin_size2look, save_dir=save_dir_binsize, save_intermediate_data=save_intermediate_data_tmp, swap_AB=swap_AB, ref_compartment_file=ref_compartment_file, annotation_track=annotation_track, black_list_bins=black_list_bins)
+	            CALDER_CD_hierarchy_v2(contact_tab_dump=contact_tab_dump[[chr]], contact_file_dump=contact_file_dump[[chr]], contact_file_hic=contact_file_hic, chr=chr, bin_size_input=bin_size, bin_size2look=bin_size2look, save_dir=save_dir_binsize, save_intermediate_data=save_intermediate_data_tmp, swap_AB=swap_AB, ref_compartment_file=ref_compartment_file, annotation_track=annotation_track, black_list_bins=black_list_bins)
 	        }
 
             ########################### build optimal compartment from multiple resolutions

+ 6 - 6
R/compartment_data_generation_fun.R

@@ -19,14 +19,14 @@
 		return(mat_compressed)
 	}
 
-	contact_mat_processing_v2 = function(contact_tab_straw=NULL, contact_file_straw=NULL, contact_file_hic=NULL, chr_num, bin_size_input, bin_size2look, black_list_bins=NULL)
+	contact_mat_processing_v2 = function(contact_tab_dump=NULL, contact_file_dump=NULL, contact_file_hic=NULL, chr_num, bin_size_input, bin_size2look, black_list_bins=NULL)
 	{	
 		compress_size = ifelse(bin_size2look < 40E3, 1, 1)
 		zero_ratio = 0.01
 
 
-		if(!is.null(contact_tab_straw)) contact_mat_raw = contact_tab_straw ## if contact matrix in data.frame or data.table of strawr format is provided
-		if(!is.null(contact_file_straw)) contact_mat_raw = data.table::fread(contact_file_straw) ## if contact matrix in strawr format is provided
+		if(!is.null(contact_tab_dump)) contact_mat_raw = contact_tab_dump ## if contact matrix in data.frame or data.table of strawr format is provided
+		if(!is.null(contact_file_dump)) contact_mat_raw = data.table::fread(contact_file_dump) ## if contact matrix in strawr format is provided
 
 		if(!is.null(contact_file_hic)) ## if contact matrix in hic format is provided
 		{
@@ -39,9 +39,9 @@
 
 			## try different normalization to get available dataset
 
-			contact_mat_raw = try(strawr::straw("KR", contact_file_hic, as.character(chr2query), as.character(chr2query), "BP", bin_size_input))
-			if(class(contact_mat_raw)=='try-error' | (class(contact_mat_raw)!='try-error' & nrow(na.omit(contact_mat_raw)) < 100)) contact_mat_raw = try(strawr::straw("VC_SQRT", contact_file_hic, as.character(chr2query), as.character(chr2query), "BP", bin_size_input))
-			if(class(contact_mat_raw)=='try-error' | (class(contact_mat_raw)!='try-error' & nrow(na.omit(contact_mat_raw)) < 100)) contact_mat_raw = try(strawr::straw("VC", contact_file_hic, as.character(chr2query), as.character(chr2query), "BP", bin_size_input))
+			contact_mat_raw = try(strawr::dump("KR", contact_file_hic, as.character(chr2query), as.character(chr2query), "BP", bin_size_input))
+			if(class(contact_mat_raw)=='try-error' | (class(contact_mat_raw)!='try-error' & nrow(na.omit(contact_mat_raw)) < 100)) contact_mat_raw = try(strawr::dump("VC_SQRT", contact_file_hic, as.character(chr2query), as.character(chr2query), "BP", bin_size_input))
+			if(class(contact_mat_raw)=='try-error' | (class(contact_mat_raw)!='try-error' & nrow(na.omit(contact_mat_raw)) < 100)) contact_mat_raw = try(strawr::dump("VC", contact_file_hic, as.character(chr2query), as.character(chr2query), "BP", bin_size_input))
 			if(class(contact_mat_raw)=='try-error' | (class(contact_mat_raw)!='try-error' & nrow(na.omit(contact_mat_raw)) < 100)) stop(sprintf('Your provided hic file does not contain information given the bin_size=%s and any of the normalization method KR/VC/VC_SQRT', bin_size_input))	
 			contact_mat_raw = data.table::as.data.table(contact_mat_raw)
 		}

+ 1 - 1
README.md

@@ -193,4 +193,4 @@ If you use CALDER in your work, please cite: https://www.nature.com/articles/s41
 
 * Author: Yuanlong LIU
 * Affiliation: Computational Systems Oncology group, Department of Computational Biology, University of Lausanne, Switzerland
-* Email: yliueagle@googlemail.com
+* Email: yliueagle@googlemail.com