12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152 |
- #!/bin/bash
- # 检查输入参数数量
- if [ "$#" -ne 5 ]; then
- echo "Usage: $0 sample.bam chr1 chrM sample_name outputdir"
- exit 1
- fi
- # 读取输入参数
- BAMFILE=$1
- CHR1=$2
- CHRM=$3
- SAMPLENAME=$4
- OUTPUTDIR=$5
- # 创建输出目录
- mkdir -p $OUTPUTDIR
- # 生成临时文件的唯一标识符
- TMPID=$(date +%s%N) # 使用当前时间的纳秒作为唯一ID
- # 提取 chr1 的 reads
- samtools view -b $BAMFILE $CHR1 > $OUTPUTDIR/${SAMPLENAME}_${CHR1}_${TMPID}.bam
- # 提取 chrM 的前 100 条 reads
- samtools view -h $BAMFILE $CHRM | head -n 400 | samtools view -Sb - > $OUTPUTDIR/${SAMPLENAME}_${CHRM}_${TMPID}.bam
- # 合并 chr1 和 chrM 的 reads
- samtools merge -f $OUTPUTDIR/${SAMPLENAME}_merged_${TMPID}.bam $OUTPUTDIR/${SAMPLENAME}_${CHR1}_${TMPID}.bam $OUTPUTDIR/${SAMPLENAME}_${CHRM}_${TMPID}.bam
- # 检查是单端还是双端测序
- READFLAG=$(samtools view -f 1 -c $OUTPUTDIR/${SAMPLENAME}_merged_${TMPID}.bam)
- # 根据测序类型生成对应的 FASTQ 文件
- if [ $READFLAG -eq 0 ]; then
- # 单端
- samtools fastq $OUTPUTDIR/${SAMPLENAME}_merged_${TMPID}.bam > $OUTPUTDIR/${SAMPLENAME}_rep1_R1.fastq
- gzip $OUTPUTDIR/${SAMPLENAME}_rep1_R1.fastq
- else
- # 双端
- samtools fastq -1 $OUTPUTDIR/${SAMPLENAME}_rep1_R1.fastq -2 $OUTPUTDIR/${SAMPLENAME}_rep1_R2.fastq $OUTPUTDIR/${SAMPLENAME}_merged_${TMPID}.bam
- gzip $OUTPUTDIR/${SAMPLENAME}_rep1_R1.fastq
- gzip $OUTPUTDIR/${SAMPLENAME}_rep1_R2.fastq
- fi
- # 清理临时文件
- rm $OUTPUTDIR/${SAMPLENAME}_${CHR1}_${TMPID}.bam
- rm $OUTPUTDIR/${SAMPLENAME}_${CHRM}_${TMPID}.bam
- rm $OUTPUTDIR/${SAMPLENAME}_merged_${TMPID}.bam
- echo "Completed."
|