test-main.R 3.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # Testing using Calder with a .cool file as input
  2. sanitize_chroms <- function(chroms){
  3. res <- lapply(chroms, function(x){
  4. if(startsWith(x, "chr")){
  5. return(substring(x, 4))
  6. } else{
  7. return(x)
  8. }
  9. })
  10. return(res)
  11. }
  12. handle_input_cool <- function(input,
  13. outpath,
  14. bin_size=50000,
  15. genome="hg38",
  16. nproc=10,
  17. chroms_to_remove = c("MT", "M", 'chrMT', 'chrM', 'Y', 'chrY')){
  18. intermediate_data_dir = file.path(outpath, "intermediate_data")
  19. dir.create(intermediate_data_dir, recursive=TRUE, showWarnings=FALSE)
  20. system(paste0("cooler dump --table chroms --out ",
  21. file.path(intermediate_data_dir, "chroms.txt"),
  22. " --header ",
  23. input))
  24. chroms <- read.table(file.path(intermediate_data_dir, "chroms.txt"), sep="\t", header=TRUE)
  25. chroms <- chroms[!(chroms$name %in% chroms_to_remove), "name"]
  26. dump_paths <- list()
  27. for(chrom in chroms){
  28. cat(paste0("[Pre-processing] Dumping ", chrom, "\n"))
  29. chrom_dump_path <- file.path(intermediate_data_dir, paste0(chrom, "_dump.txt"))
  30. dump_paths <- c(dump_paths, chrom_dump_path)
  31. if(! file.exists(chrom_dump_path)){
  32. system(paste0("cooler dump --table pixels --range ",
  33. chrom,
  34. " --join --balanced ",
  35. input,
  36. " | cut -f2,5,8 | awk '{if ($3) print;}' > ",
  37. chrom_dump_path))
  38. }
  39. }
  40. chroms <- sanitize_chroms(chroms)
  41. names(dump_paths) <- chroms
  42. CALDER(contact_file_dump=dump_paths,
  43. chrs=chroms,
  44. bin_size=bin_size,
  45. genome=genome,
  46. save_dir=outpath,,
  47. single_binsize_only=TRUE,
  48. save_intermediate_data=TRUE,
  49. n_cores=nproc,
  50. sub_domains=TRUE)
  51. file.remove(file.path(testthat::test_path(), "total_execution.time"))
  52. }
  53. test_that("CALDER works with cool files", {
  54. input_cool_path <- file.path(testthat::test_path("data"), "test.cool")
  55. output_path <- testthat::test_path("test-main-cool-out")
  56. handle_input_cool(input_cool_path, output_path)
  57. expect_snapshot_file(file.path(output_path, "sub_compartments", "all_sub_compartments.bed"), name = "TestCool_all_sub_compartments.bed")
  58. expect_snapshot_file(file.path(output_path, "sub_compartments", "all_sub_compartments.tsv"), name = "TestCool_all_sub_compartments.tsv")
  59. expect_snapshot_file(file.path(output_path, "sub_domains", "all_nested_boundaries.bed"), name = "TestCool_all_nested_boundaries.bed")
  60. unlink(output_path, recursive=TRUE)
  61. })
  62. test_that("CALDER works with dumps", {
  63. chrs = c(21:22)
  64. ## demo contact matrices in dump format
  65. contact_file_dump = as.list(system.file("extdata", sprintf("mat_chr%s_10kb_ob.txt.gz", chrs),
  66. package='CALDER'))
  67. names(contact_file_dump) = chrs
  68. output_path <- testthat::test_path("test-main-dump-out")
  69. ## Run CALDER to compute compartments but not nested sub-domains
  70. CALDER(contact_file_dump=contact_file_dump,
  71. chrs=chrs,
  72. bin_size=10E3,
  73. genome='hg19',
  74. save_dir=output_path,
  75. save_intermediate_data=FALSE,
  76. n_cores=2,
  77. sub_domains=FALSE)
  78. expect_snapshot_file(file.path(output_path, "sub_compartments", "all_sub_compartments.bed"), name = "TestDump_all_sub_compartments.bed")
  79. expect_snapshot_file(file.path(output_path, "sub_compartments", "all_sub_compartments.tsv"), name = "TestDump_all_sub_compartments.tsv")
  80. unlink(output_path, recursive=TRUE)
  81. })