bam2pseudo.sh 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. #!/bin/bash
  2. # Check for proper number of command line args.
  3. if [ $# -ne 3 ]; then
  4. echo "Usage: $0 <input.bam> <out_prefix> <out_dir>"
  5. exit 1
  6. fi
  7. # Assign command line arguments to variables
  8. input_bam=$1
  9. out_prefix=$2
  10. out_dir=$3
  11. # Create output directory if it doesn't exist
  12. mkdir -p "${out_dir}"
  13. # Extract header and exclude @PG lines
  14. samtools view -H "${input_bam}" | grep -v "^@PG" > "${out_dir}/${out_prefix}.header"
  15. # Shuffle the reads in the BAM file to randomize
  16. samtools view "${input_bam}" | shuf - > "${out_dir}/${out_prefix}.shuf.sam"
  17. # Split the randomized SAM into two parts
  18. split -n l/2 -d --additional-suffix=.sam "${out_dir}/${out_prefix}.shuf.sam" "${out_dir}/${out_prefix}_pseudo_rep"
  19. # Add the header back to the split SAM files and convert back to BAM
  20. for i in 00 01; do
  21. cat "${out_dir}/${out_prefix}.header" "${out_dir}/${out_prefix}_pseudo_rep${i}.sam" | \
  22. samtools view -bS - > "${out_dir}/${out_prefix}_pseudo_rep${i}.bam"
  23. done
  24. # Clean up intermediate files
  25. mv ${out_dir}/${out_prefix}_pseudo_rep00.bam ${out_dir}/${out_prefix}_pseudo_rep1.bam
  26. mv ${out_dir}/${out_prefix}_pseudo_rep01.bam ${out_dir}/${out_prefix}_pseudo_rep2.bam
  27. rm "${out_dir}/${out_prefix}.header" "${out_dir}/${out_prefix}.shuf.sam"
  28. rm ${out_dir}/${out_prefix}_pseudo_rep00.sam ${out_dir}/${out_prefix}_pseudo_rep01.sam
  29. echo "Pseudo-replicate BAM files have been created: ${out_dir}/${out_prefix}_pseudo_rep1.bam and ${out_dir}/${out_prefix}_pseudo_rep2.bam"