sample_from_bam.sh 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. #!/bin/bash
  2. # 检查输入参数数量
  3. if [ "$#" -ne 5 ]; then
  4. echo "Usage: $0 sample.bam chr1 chrM sample_name outputdir"
  5. exit 1
  6. fi
  7. # 读取输入参数
  8. BAMFILE=$1
  9. CHR1=$2
  10. CHRM=$3
  11. SAMPLENAME=$4
  12. OUTPUTDIR=$5
  13. # 创建输出目录
  14. mkdir -p $OUTPUTDIR
  15. # 生成临时文件的唯一标识符
  16. TMPID=$(date +%s%N) # 使用当前时间的纳秒作为唯一ID
  17. # 提取 chr1 的 reads
  18. samtools view -b $BAMFILE $CHR1 > $OUTPUTDIR/${SAMPLENAME}_${CHR1}_${TMPID}.bam
  19. # 提取 chrM 的前 100 条 reads
  20. samtools view -h $BAMFILE $CHRM | head -n 400 | samtools view -Sb - > $OUTPUTDIR/${SAMPLENAME}_${CHRM}_${TMPID}.bam
  21. # 合并 chr1 和 chrM 的 reads
  22. samtools merge -f $OUTPUTDIR/${SAMPLENAME}_merged_${TMPID}.bam $OUTPUTDIR/${SAMPLENAME}_${CHR1}_${TMPID}.bam $OUTPUTDIR/${SAMPLENAME}_${CHRM}_${TMPID}.bam
  23. # 检查是单端还是双端测序
  24. READFLAG=$(samtools view -f 1 -c $OUTPUTDIR/${SAMPLENAME}_merged_${TMPID}.bam)
  25. # 根据测序类型生成对应的 FASTQ 文件
  26. if [ $READFLAG -eq 0 ]; then
  27. # 单端
  28. samtools fastq $OUTPUTDIR/${SAMPLENAME}_merged_${TMPID}.bam > $OUTPUTDIR/${SAMPLENAME}_rep1_R1.fastq
  29. gzip $OUTPUTDIR/${SAMPLENAME}_rep1_R1.fastq
  30. else
  31. # 双端
  32. samtools fastq -1 $OUTPUTDIR/${SAMPLENAME}_rep1_R1.fastq -2 $OUTPUTDIR/${SAMPLENAME}_rep1_R2.fastq $OUTPUTDIR/${SAMPLENAME}_merged_${TMPID}.bam
  33. gzip $OUTPUTDIR/${SAMPLENAME}_rep1_R1.fastq
  34. gzip $OUTPUTDIR/${SAMPLENAME}_rep1_R2.fastq
  35. fi
  36. # 清理临时文件
  37. rm $OUTPUTDIR/${SAMPLENAME}_${CHR1}_${TMPID}.bam
  38. rm $OUTPUTDIR/${SAMPLENAME}_${CHRM}_${TMPID}.bam
  39. rm $OUTPUTDIR/${SAMPLENAME}_merged_${TMPID}.bam
  40. echo "Completed."