123456789101112131415161718192021222324252627282930313233343536373839 |
- #!/bin/bash
- # Check for proper number of command line args.
- if [ $# -ne 3 ]; then
- echo "Usage: $0 <input.bam> <out_prefix> <out_dir>"
- exit 1
- fi
- # Assign command line arguments to variables
- input_bam=$1
- out_prefix=$2
- out_dir=$3
- # Create output directory if it doesn't exist
- mkdir -p "${out_dir}"
- # Extract header and exclude @PG lines
- samtools view -H "${input_bam}" | grep -v "^@PG" > "${out_dir}/${out_prefix}.header"
- # Shuffle the reads in the BAM file to randomize
- samtools view "${input_bam}" | shuf - > "${out_dir}/${out_prefix}.shuf.sam"
- # Split the randomized SAM into two parts
- split -n l/2 -d --additional-suffix=.sam "${out_dir}/${out_prefix}.shuf.sam" "${out_dir}/${out_prefix}_pseudo_rep"
- # Add the header back to the split SAM files and convert back to BAM
- for i in 00 01; do
- cat "${out_dir}/${out_prefix}.header" "${out_dir}/${out_prefix}_pseudo_rep${i}.sam" | \
- samtools view -bS - > "${out_dir}/${out_prefix}_pseudo_rep${i}.bam"
- done
- # Clean up intermediate files
- mv ${out_dir}/${out_prefix}_pseudo_rep00.bam ${out_dir}/${out_prefix}_pseudo_rep1.bam
- mv ${out_dir}/${out_prefix}_pseudo_rep01.bam ${out_dir}/${out_prefix}_pseudo_rep2.bam
- rm "${out_dir}/${out_prefix}.header" "${out_dir}/${out_prefix}.shuf.sam"
- rm ${out_dir}/${out_prefix}_pseudo_rep00.sam ${out_dir}/${out_prefix}_pseudo_rep01.sam
- echo "Pseudo-replicate BAM files have been created: ${out_dir}/${out_prefix}_pseudo_rep1.bam and ${out_dir}/${out_prefix}_pseudo_rep2.bam"
|