lucananni93 2 years ago
parent
commit
f0f6dceaa7

+ 3 - 0
.gitignore

@@ -1,6 +1,9 @@
 ### CUSTOM ###
 total_execution.time
 
+### TESTING ###
+_snaps/
+
 ### R ###
 
 # History files

+ 3 - 0
DESCRIPTION

@@ -36,3 +36,6 @@ Imports:
 	rhdf5 (>= 2.28.0),
 	ggplot2 (>= 3.3.5)
 RoxygenNote: 7.1.2
+Suggests: 
+    testthat (>= 3.0.0)
+Config/testthat/edition: 3

+ 10 - 10
R/RcppExports.R

@@ -2,42 +2,42 @@
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 matrix_multiplication_cpp <- function(A, B) {
-    .Call('_CALDER_matrix_multiplication_cpp', PACKAGE = 'CALDER', A, B)
+    .Call(`_CALDER_matrix_multiplication_cpp`, A, B)
 }
 
 matrix_multiplication_sym_cpp <- function(A) {
-    .Call('_CALDER_matrix_multiplication_sym_cpp', PACKAGE = 'CALDER', A)
+    .Call(`_CALDER_matrix_multiplication_sym_cpp`, A)
 }
 
 loglik_lnorm_cpp <- function(sum_ln1, sum_ln2, p, q) {
-    .Call('_CALDER_loglik_lnorm_cpp', PACKAGE = 'CALDER', sum_ln1, sum_ln2, p, q)
+    .Call(`_CALDER_loglik_lnorm_cpp`, sum_ln1, sum_ln2, p, q)
 }
 
 loglik_lnorm_cpp_vec <- function(vec_values) {
-    .Call('_CALDER_loglik_lnorm_cpp_vec', PACKAGE = 'CALDER', vec_values)
+    .Call(`_CALDER_loglik_lnorm_cpp_vec`, vec_values)
 }
 
 get_A_len <- function(A) {
-    .Call('_CALDER_get_A_len', PACKAGE = 'CALDER', A)
+    .Call(`_CALDER_get_A_len`, A)
 }
 
 get_A_ln1 <- function(A) {
-    .Call('_CALDER_get_A_ln1', PACKAGE = 'CALDER', A)
+    .Call(`_CALDER_get_A_ln1`, A)
 }
 
 get_A_ln2 <- function(A) {
-    .Call('_CALDER_get_A_ln2', PACKAGE = 'CALDER', A)
+    .Call(`_CALDER_get_A_ln2`, A)
 }
 
 loglik_lnorm_cpp_mat <- function(sum_ln1, sum_ln2, ps, qs) {
-    .Call('_CALDER_loglik_lnorm_cpp_mat', PACKAGE = 'CALDER', sum_ln1, sum_ln2, ps, qs)
+    .Call(`_CALDER_loglik_lnorm_cpp_mat`, sum_ln1, sum_ln2, ps, qs)
 }
 
 zigzag_loglik_ancestors_v4_5 <- function(A, k, min_n_bins = 2L) {
-    .Call('_CALDER_zigzag_loglik_ancestors_v4_5', PACKAGE = 'CALDER', A, k, min_n_bins)
+    .Call(`_CALDER_zigzag_loglik_ancestors_v4_5`, A, k, min_n_bins)
 }
 
 compute_L <- function(A, L, k) {
-    .Call('_CALDER_compute_L', PACKAGE = 'CALDER', A, L, k)
+    .Call(`_CALDER_compute_L`, A, L, k)
 }
 

+ 36 - 0
tests/create_test_data.sh

@@ -0,0 +1,36 @@
+#!/bin/bash
+
+
+create_test_cool(){
+	url=$1
+	outpath=$2
+	binsize=$3
+
+	if [[ -f ${outpath}/test.cool ]]; then return; fi
+	if [[ ! -f ${outpath}/source.pairs.gz ]] 
+	then
+		echo "[Error] Please manually download the file ${url} to ${outpath}"
+		echo "        and give it source.pairs.gz as name"
+		exit -1
+	fi
+
+	zcat ${outpath}/source.pairs.gz | head -26 | tail -2 | cut -d' ' --output-delimiter=$'\t' -f 2,3 > ${outpath}/test.chrom.sizes
+	cooler cload pairs --chrom1 2 --pos1 3 --chrom2 4 --pos2 5 \
+						${outpath}/test.chrom.sizes:${binsize} \
+						${outpath}/source.pairs.gz \
+						${outpath}/test.cool
+	rm ${outpath}/source.pairs.gz
+
+	cooler balance --force --max-iters 1000 ${outpath}/test.cool
+}
+
+
+test_data_path="tests/testthat/data"
+
+mkdir -p ${test_data_path}
+
+# Test cool file
+source_cool_file="https://data.4dnucleome.org/files-processed/4DNFI2EK1IOQ/@@download/4DNFI2EK1IOQ.pairs.gz"
+test_cool_binsize=50000
+
+create_test_cool ${source_cool_file} ${test_data_path} ${test_cool_binsize}

+ 10 - 0
tests/test_cmd_cool.sh

@@ -0,0 +1,10 @@
+#!/bin/bash
+
+mkdir -p "tests/output"
+
+scripts/calder --input tests/data/test.cool \
+			   --type cool \
+			   --bin_size 50000 \
+			   --genome hg38 \
+			   --nproc 10 \
+			   --outpath "tests/output/test_cmd_cool_out"

+ 4 - 0
tests/testthat.R

@@ -0,0 +1,4 @@
+library(testthat)
+library(CALDER)
+
+test_check("CALDER")

+ 2 - 0
tests/testthat/data/test.chrom.sizes

@@ -0,0 +1,2 @@
+chr21	46709983
+chr22	50818468

BIN
tests/testthat/data/test.cool


+ 69 - 0
tests/testthat/test-main.R

@@ -0,0 +1,69 @@
+# Testing using Calder with a .cool file as input
+sanitize_chroms <- function(chroms){
+    res <- lapply(chroms, function(x){
+        if(startsWith(x, "chr")){
+            return(substring(x, 4))
+        } else{
+            return(x)
+        }
+    })
+    return(res)
+}
+
+
+handle_input_cool <- function(input, 
+                              outpath, 
+                              bin_size=50000, 
+                              genome="hg38",  
+                              nproc=10,
+                              chroms_to_remove = c("MT", "M", 'chrMT', 'chrM', 'Y', 'chrY')){
+
+    intermediate_data_dir = file.path(outpath, "intermediate_data")
+    dir.create(intermediate_data_dir, recursive=TRUE, showWarnings=FALSE)
+
+    system(paste0("cooler dump --table chroms --out ", 
+                  file.path(intermediate_data_dir, "chroms.txt"), 
+                  " --header ", 
+                  input))
+    chroms <- read.table(file.path(intermediate_data_dir, "chroms.txt"), sep="\t", header=TRUE)
+    chroms <- chroms[!(chroms$name %in% chroms_to_remove), "name"]
+
+    dump_paths <- list()
+    for(chrom in chroms){
+        cat(paste0("[Pre-processing] Dumping ", chrom, "\n"))
+        chrom_dump_path <- file.path(intermediate_data_dir, paste0(chrom, "_dump.txt"))
+        dump_paths <- c(dump_paths, chrom_dump_path)
+        if(! file.exists(chrom_dump_path)){
+            system(paste0("cooler dump --table pixels --range ", 
+                          chrom, 
+                          " --join --balanced ",
+                          input,
+                          " | cut -f2,5,8 | awk '{if ($3) print;}' > ",
+                          chrom_dump_path))
+        }
+    }
+
+    chroms <- sanitize_chroms(chroms)
+    names(dump_paths) <- chroms
+
+    CALDER(contact_file_dump=dump_paths, 
+           chrs=chroms, 
+           bin_size=bin_size,
+           genome=genome,
+           save_dir=outpath,,
+           single_binsize_only=TRUE,
+           save_intermediate_data=TRUE,
+           n_cores=nproc,
+           sub_domains=TRUE)
+    file.remove(file.path(testthat::test_path(), "total_execution.time"))
+}
+
+test_that("CALDER works with cool files", {
+    input_cool_path <- file.path(testthat::test_path("data"), "test.cool")
+    output_path <- testthat::test_path("test-main-cool-out")
+    handle_input_cool(input_cool_path, output_path)
+    expect_snapshot_file(file.path(output_path, "sub_compartments", "all_sub_compartments.bed"))
+    expect_snapshot_file(file.path(output_path, "sub_compartments", "all_sub_compartments.tsv"))
+    expect_snapshot_file(file.path(output_path, "sub_domains", "all_nested_boundaries.bed"))
+    unlink(output_path, recursive=TRUE)
+})