compartment_PCA.R 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. get_PCs = function( mat, which ) ## fast way to compute PCs
  2. {
  3. PC_mat = crossprod(mat)
  4. res_eigs_sym = rARPACK::eigs_sym( PC_mat, k=max(which), which = "LM" )
  5. if(any(res_eigs_sym$values <0)) stop('Non-positive eigenvalues for A^2')
  6. PCs = mat%*%(res_eigs_sym$vectors)
  7. return( PCs[, which] )
  8. }
  9. PC_compartment <- function(A_oe) ## compute PC and define A/B compartment. Not used currently
  10. {
  11. cA_oe = fast_cor(A_oe)
  12. PC_mat = crossprod(cA_oe)
  13. res_eigs_sym = rARPACK::eigs_sym( PC_mat, k=2, which = "LM" )
  14. PC1 = cA_oe%*%res_eigs_sym$vectors[,1]
  15. PC2 = cA_oe%*%res_eigs_sym$vectors[,2]
  16. borders = which(diff(1*(PC1 > 0))!=0)
  17. to_id = as.numeric(rownames(A_oe)[borders])
  18. from_id = as.numeric(rownames(A_oe)[c(1, head(borders, length(borders)-1)+1)])
  19. start_poses = (from_id-1)*bin_size + 1
  20. end_poses = to_id*bin_size
  21. compartment_AB = data.frame(chr=paste0('chr', chr), start_poses=start_poses, end_poses=end_poses, A_or_B=NA, zero=0, dot='.', start_poses_2=start_poses, end_poses_2=end_poses, col=NA)
  22. compartment_AB[which((1:nrow(compartment_AB))%%2==0), 'A_or_B'] = 'A'
  23. compartment_AB[which((1:nrow(compartment_AB))%%2==1), 'A_or_B'] = 'B'
  24. compartment_AB[which((1:nrow(compartment_AB))%%2==0), 'col'] = '112,128,144'
  25. compartment_AB[which((1:nrow(compartment_AB))%%2==1), 'col'] = '255,255,0'
  26. compartments_bed_files = paste0(sub_folder, '/chr', chr, '_compartments_PCA_AB_2', '.bed')
  27. write.table( compartment_AB, file=compartments_bed_files, quote=FALSE, row.names=FALSE, col.names=FALSE, sep=' ' )
  28. }
  29. PC_compartment_slim <- function(A_oe, downsratio=NULL) ## compute and save the PC values only. Not used currently
  30. {
  31. cA_oe = fast_cor(A_oe)
  32. PC_mat = crossprod(cA_oe)
  33. res_eigs_sym = rARPACK::eigs_sym( PC_mat, k=2, which = "LM" )
  34. PC1 = cA_oe%*%res_eigs_sym$vectors[,1]
  35. PC2 = cA_oe%*%res_eigs_sym$vectors[,2]
  36. compartment_AB = data.frame(PC1=PC1, bin_names=rownames(A_oe))
  37. if( is.null(downsratio) ) compartments_AB_file = paste0(sub_folder, '/chr', chr, '_compartments_PCA_AB.Rdata')
  38. if( !is.null(downsratio) ) compartments_AB_file = paste0(sub_folder, '/chr', chr, '_compartments_PCA_AB_downsratio_', downsratio, '.Rdata')
  39. save( compartment_AB, file=compartments_AB_file )
  40. }