| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 | #!/usr/bin/env RscriptsuppressPackageStartupMessages(library(optparse))suppressPackageStartupMessages(library(CALDER))AVAILABLE_REFERENCE_TRACKS_GENOMES <- c("hg19", "hg38", "mm9", "mm10")INPUT_TYPES <- c("hic", "cool")CHROMS_TO_REMOVE <- c("ALL", "M", "chrM", "MT", "chrMT", "Y", "chrY")parse_arguments <- function(){	# Creating the argument parsing options	option_list = list(	  make_option(c("-i", "--input"), action="store", default=NA, type='character',	              help="Input Hi-C contacts"),	  make_option(c("-t", "--type"), action="store", default='hic', type='character',	              help="The type of input: hic or cool [default %default]"),	  make_option(c("-b", "--bin_size"), action="store", default=50000, type='integer',	              help="Bin size to use for the analysis [default %default]"),	  make_option(c("-g", "--genome"), action="store", default="hg19", type='character',	              help="Genome assembly to use [default %default]"),	  make_option(c("-f", "--feature_track"), action="store", default=NA, type='character',	              help="Genomic feature track to be used to determine A/B compartment direction	              when genome == 'others'. The track should presumably have higher values	              in A than in B compartmnets. [default %default]"),	  make_option(c("-c", "--chromosomes"), action='store', default='all', type='character',	  			  help="Chromosomes to analyze, separated by comma. [default %default]"),	  make_option(c("-p", "--nproc"), action="store", default=1, type='integer',	              help="Number of cores to use [default %default]"),	  make_option(c("-o", "--outpath"), action="store", default=NA, type='character',	              help="Path to the output folder"),	  make_option(c("-k", "--keep_intermediate"), action="store_true", default=FALSE, type='logical',	              help="Keep intermediate data after done [default %default]")	)	parser <- OptionParser(usage = "%prog [options]", option_list=option_list)	opt <- parse_args(parser)	# Checking if input path exists	if(is.na(opt$input)){		print_help(parser)		stop(paste0("Input path (", opt$input,") does not exist"))	}	# Checking if output path is provided	if(is.na(opt$outpath)){		stop("Output path was not provided")	}	# Check that the input type is one of the possible ones	if(!(opt$type %in% INPUT_TYPES)){		stop(paste0("Input type ", opt$input_type, " not available"))	}	# Check if the provided genome is in the list of available reference genomes 	# or if a feature track is provided	if((!(opt$genome %in% AVAILABLE_REFERENCE_TRACKS_GENOMES)) || (file.exists(opt$feature_track))){		# in this case, we just assign it the name 'others'		opt$genome = "others"	}	writeLines(c(		"*******************************",		"*            CALDER           *",		"*******************************",		paste0("[Parameters] Input: ", opt$input),		paste0("[Parameters] Input type: ", opt$type),		paste0("[Parameters] Bin size: ", opt$bin_size),		paste0("[Parameters] Genome: ", opt$genome),		paste0("[Parameters] Chromosomes: ", opt$chromosomes),		paste0("[Parameters] N. cores: ", opt$nproc),		paste0("[Parameters] Output: ", opt$outpath)	))	return(opt)}sanitize_chroms <- function(chroms){	res <- lapply(chroms, function(x){		if(startsWith(x, "chr")){			return(substring(x, 4))		} else{			return(x)		}	})	return(res)}handle_input_hic <- function(opt){	suppressPackageStartupMessages(library(strawr))	chromsizes <- readHicChroms(opt$input)	chroms <- chromsizes[!(toupper(chromsizes$name) %in% toupper(CHROMS_TO_REMOVE)), "name"]	CALDER(contact_file_hic = opt$input,		   chrs = chroms,		   bin_size = opt$bin_size,		   genome = opt$genome,		   save_dir=opt$outpath,		   save_intermediate_data=TRUE,		   single_binsize_only=TRUE,		   n_cores = opt$nproc,		   sub_domains=TRUE)}handle_input_cool <- function(opt){	intermediate_data_dir = file.path(opt$outpath, "intermediate_data")	dir.create(intermediate_data_dir, recursive=TRUE, showWarnings=FALSE)	system(paste0("cooler dump --table chroms --out ", 				  file.path(intermediate_data_dir, "chroms.txt"), 				  " --header ", 				  opt$input))	chroms <- read.table(file.path(intermediate_data_dir, "chroms.txt"), sep="\t", header=TRUE)	if(opt$chromosomes == "all"){		chroms <- chroms[!( toupper(chroms$name) %in% toupper(CHROMS_TO_REMOVE) ), "name"]	}	else{		chrom_list <- strsplit(opt$chromosomes, ",")[[1]]		chroms <- chroms[chroms$name %in% chrom_list, "name"]	}		dump_paths <- list()	for(chrom in chroms){		cat(paste0("[Pre-processing] Dumping ", chrom, "\n"))		chrom_dump_path <- file.path(intermediate_data_dir, paste0(chrom, "_dump.txt"))		dump_paths <- c(dump_paths, chrom_dump_path)		if(! file.exists(chrom_dump_path)){			system(paste0("cooler dump --table pixels --range ", 						  chrom, 						  " --join --balanced ",						  opt$input,						  " | cut -f2,5,8 | awk '{if ($3) print;}' > ",						  chrom_dump_path))		}	}	chroms <- sanitize_chroms(chroms)	names(dump_paths) <- chroms	CALDER(contact_file_dump=dump_paths, 		   chrs=chroms, 		   bin_size=opt$bin_size,		   genome=opt$genome,		   save_dir=opt$outpath,,		   single_binsize_only=TRUE,		   save_intermediate_data=TRUE,		   n_cores=opt$nproc,		   sub_domains=TRUE)}opt <- parse_arguments()if(opt$type == "hic"){	handle_input_hic(opt)} else if(opt$type == "cool"){	handle_input_cool(opt)} else {	stop("Unknown input type")}# Cleaning the outputintermediate_data_dir = file.path(opt$outpath, "intermediate_data")if(dir.exists(intermediate_data_dir) && (!opt$keep_intermediate)){	cat('[Post-processing] Removing intermediate data\n')	unlink(intermediate_data_dir, recursive=TRUE)}exec_time_file = "./total_execution.time"if(file.exists(exec_time_file)){	cat("[Post-processing] Removing total_execution.time\n")	file.remove(exec_time_file)}
 |