пре 2 година · cf10f909cb
--- a/Snakefile
+++ b/Snakefile
@@ -59,6 +59,16 @@ with open("sample_sheet.csv", 'w') as f:
 
				 ##########################################################
			
 
				 # rule all: 最终想要生成的文件 
			
 
				 ##########################################################
			
 
				+def get_motifs_from_meme(meme_file):
			
 
				+    motifs = []
			
 
				+    with open(meme_file, 'r') as file:
			
 
				+        for line in file:
			
 
				+            if line.startswith("MOTIF"):
			
 
				+                parts = line.split()
			
 
				+                motif_name = parts[2] + "_" + parts[1]  # 第二列和第三列的组合
			
 
				+                motifs.append(motif_name)
			
 
				+    return motifs
			
 
				+
			
 
				 rule all:
			
 
				     input:
			
 
				 	#####################################
			
@@ -105,6 +115,11 @@ rule all:
 
				         "quality_control/estimate_read_filtering.tsv",
			
 
				         # cross correlation 质控表格
			
 
				         "quality_control/cross_correlation_summary.tsv" if config["cross_correlation"]["do"] else [],
			
 
				+        #####################################
			
 
				+        # 足迹分析
			
 
				+        #####################################
			
 
				+        "footprinting/bindetect_results.txt",
			
 
				+        expand("footprinting/{motif}/{motif}_Plot{plot}.pdf", motif=get_motifs_from_meme("motif_databases/JASPAR/JASPAR2022_CORE_plants_non-redundant_v2.meme"), plot=["Aggregate", "Heatmap"])
			
 
				 
			
 
				 ##########################################################
			
 
				 # 使用fastp进行原始数据质控和过滤，自动判断 SE？PE？
			
@@ -366,7 +381,7 @@ rule callpeak_with_macs3:
 
				     threads: 1
			
 
				     params:
			
 
				         control=lambda wildcards: f"-c clean_bams/{config['samples'][wildcards.replicate]}_final.bam" if config['samples'][wildcards.replicate] else "",
			
 
				-        format="BAMPE" if SEQ_TYPE == 'PE' else "BAM",
			
 
				+        format="BAMPE" if SEQ_TYPE == 'PE' and '--nomodel' not in config["macs3"]["extra"] else "BAM",
			
 
				         gsize=config["gsize"],
			
 
				         PEAK_TYPE="--broad" if PEAK_TYPE == "broad" else "",
			
 
				         extra = config["macs3"]["extra"] if config["macs3"]["extra"] else ""
			
@@ -672,6 +687,89 @@ rule combine_fastp_reports:
 
				         Rscript {params.peaksnake_home}/scripts/combine_fastp_report.R --input {input} --output quality_control/ 1>{log} 2>&1
			
 
				         """
			
 
				 
			
 
				+##########################################################
			
 
				+# 使用 TOBIAS 进行足迹分析
			
 
				+##########################################################
			
 
				+rule merge_replicate_bams:
			
 
				+    input:
			
 
				+        bams=lambda wildcards: expand("clean_bams/{sample}_{rep}_final.bam", sample = wildcards.sample, rep=SAMPLE_TO_REPLICATE[wildcards.sample])
			
 
				+    output:
			
 
				+        bam="clean_bams/{sample}_merged.bam"
			
 
				+    shell:
			
 
				+        """
			
 
				+        samtools merge -o {output.bam} {input.bams}
			
 
				+        samtools index {output.bam}
			
 
				+        """
			
 
				+
			
 
				+rule ATACorrect:
			
 
				+    input:
			
 
				+        bam="clean_bams/{sample}_merged.bam",
			
 
				+        genome=config["genome"],
			
 
				+        merged_peaks_bed="clean_peaks/merge/merged_peaks.bed"
			
 
				+    output:
			
 
				+        bw="footprinting/{sample}_corrected.bw"
			
 
				+    params: 
			
 
				+        prefix="{sample}"
			
 
				+    threads: 8
			
 
				+    log: "logs/{sample}_ATACorrect.log"
			
 
				+    singularity:
			
 
				+        PEAKSNAKE_HOME + "/sifs/tobias_20231231.sif"
			
 
				+    shell:
			
 
				+        """
			
 
				+        TOBIAS ATACorrect --window 10 --bam {input.bam} --genome {input.genome} --peaks {input.merged_peaks_bed} --outdir footprinting --prefix {params.prefix} --cores {threads} 1>{log} 2>&1
			
 
				+        """
			
 
				+
			
 
				+rule FootprintScores:
			
 
				+    input:
			
 
				+        bw="footprinting/{sample}_corrected.bw",
			
 
				+        merged_peaks_bed="clean_peaks/merge/merged_peaks.bed"
			
 
				+    output:
			
 
				+        bw="footprinting/{sample}_footprints.bw"
			
 
				+    params:
			
 
				+        prefix={sample}
			
 
				+    threads: 8
			
 
				+    log: "logs/{sample}_FootprintScores.log"
			
 
				+    singularity:
			
 
				+        PEAKSNAKE_HOME + "/sifs/tobias_20231231.sif"
			
 
				+    shell:
			
 
				+        """
			
 
				+        TOBIAS FootprintScores --window 10 --signal {input.bw} --regions {input.merged_peaks_bed} --output {output.bw} --cores {threads} 1>{log} 2>&1
			
 
				+        """
			
 
				+
			
 
				+rule BINDetect:
			
 
				+    input:
			
 
				+        bws=expand("footprinting/{sample}_footprints.bw", sample=SAMPLE_TO_REPLICATE.keys()),
			
 
				+        merged_peaks_bed="clean_peaks/merge/merged_peaks.bed",
			
 
				+        genome=config["genome"]
			
 
				+    output:
			
 
				+        "footprinting/bindetect_results.txt"
			
 
				+    params:
			
 
				+        samples=" ".join(SAMPLE_TO_REPLICATE.keys())
			
 
				+    threads: 8
			
 
				+    log: "logs/BINDetect.log"
			
 
				+    singularity:
			
 
				+        PEAKSNAKE_HOME + "/sifs/tobias_20231231.sif"
			
 
				+    shell:
			
 
				+        """
			
 
				+        TOBIAS BINDetect --motifs motif_databases/JASPAR/JASPAR2022_CORE_plants_non-redundant_v2.meme --signals {input.bws} --genome {input.genome} --peaks {input.merged_peaks_bed} --outdir footprinting --cond_names {params.samples} --cores {threads} 
			
 
				+        """
			
 
				+
			
 
				+rule plot_per_motif:
			
 
				+    input:
			
 
				+        motif_bed="footprinting/{motif}/beds/{motif}_all.bed",
			
 
				+        bws=expand("footprinting/{sample}_corrected.bw", sample=SAMPLE_TO_REPLICATE.keys())
			
 
				+    output:
			
 
				+        aggregate_pdf="footprinting/{motif}/{motif}_PlotAggregate.pdf",
			
 
				+        heatmap_pdf="footprinting/{motif}/{motif}_PlotHeatmap.pdf"
			
 
				+    threads: 1
			
 
				+    singularity:
			
 
				+        PEAKSNAKE_HOME + "/sifs/tobias_20231231.sif"
			
 
				+    shell:
			
 
				+        """
			
 
				+        TOBIAS PlotAggregate --TFBS {input.motif_bed} --signals {input.bws} --output {output.aggregate_pdf} --share_y both --plot_boundaries --signal-on-x
			
 
				+        TOBIAS PlotHeatmap --TFBS {input.motif_bed} --signals {input.bws} --output {output.heatmap_pdf} --sort_by -2
			
 
				+        """
			
 
				+
			
 
				 ##########################################################
			
 
				 # cross correlation
			
 
				 ##########################################################
			
--- a/config.yaml
+++ b/config.yaml
@@ -6,24 +6,27 @@ genome: ref/genome.fa
 
				 gtf: ref/genes_longest.gtf 
			
 
				 tss_tes_bed: ref/genes_longest.bed6  
			
 
				 tss_tes_shuffle_bed: ref/genes_shuffle.bed6
			
 
				-seq_type: SE  # 或者 "PE"
			
 
				-peak_type: narrow # narrow | broad
			
 
				+seq_type: PE  # 或者 "PE"
			
 
				+peak_type: broad # narrow | broad
			
 
				 # 有效基因组大小, 参考 https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html#effective-genome-size
			
 
				 gsize: 119481543 
			
 
				 # idr: 只支持 2 重复,适合 narrowPeak) ; intersect (2 + 重复 ) ; norep: 没有重复 
			
 
				-peak_selection: idr
			
 
				+peak_selection: intersect
			
 
				 
			
 
				 #######################
			
 
				 # samples  
			
 
				 #######################
			
 
				 samples:
			
 
				-  ZAT6_ABA_rep1: Col0_mock_rep1
			
 
				-  ZAT6_ABA_rep2: Col0_mock_rep2
			
 
				-  H3K4me3_WT_rep1: Input_WT_rep1
			
 
				-  H3K4me3_WT_rep2: Input_WT_rep1
			
 
				+  bZIP11_CHXDEX_rep1: 
			
 
				+  bZIP11_CHXDEX_rep2:
			
 
				+  bZIP11_CHXMOCK_rep1:
			
 
				+  bZIP11_CHXMOCK_rep2:
			
 
				+  WT_CHXDEX_rep1:
			
 
				+  WT_CHXDEX_rep2:
			
 
				 
			
 
				 contrasts:
			
 
				-  - ZAT6_ABA_vs_H3K4me3_WT
			
 
				+  - bZIP11_CHXDEX_vs_bZIP11_CHXMOCK
			
 
				+  - bZIP11_CHXDEX_vs_WT_CHXDEX
			
 
				 
			
 
				 #######################
			
 
				 # fastp  
			
@@ -36,7 +39,7 @@ fastp: "--adapter_sequence AGATCGGAAGAGCACACGTCTGAACTCCAGTCA --adapter_sequence_
 
				 # ATAC-seq 必须设置 --maxins 1000 ~ 2000
			
 
				 #######################
			
 
				 bowtie2_index: ref/genome  # 不设置则从genome.fa 构建
			
 
				-bowtie2:
			
 
				+bowtie2: --no-mixed --no-discordant --maxins 1000
			
 
				 
			
 
				 #######################
			
 
				 # alignmentSieve 
			
@@ -62,7 +65,7 @@ bamCoverage: "--binSize 50 --normalizeUsing RPKM"
 
				 #######################
			
 
				 macs3: 
			
 
				   qscore: 3
			
 
				-  extra: 
			
 
				+  extra: --nomodel --shift -100 --extsize 200 
			
 
				 
			
 
				 #######################
			
 
				 # replicate intersect 
			
@@ -91,9 +94,9 @@ motif:
 
				 uropa:
			
 
				   do: yes
			
 
				   feature: ['gene']
			
 
				-  feature_anchor: ['start'] # ['start', 'center', 'end']
			
 
				-  relative_location: ['Upstream', 'OverlapStart'] # ['Upstream', 'Downstream', 'OverlapStart', 'OverlapEnd','PeakInsideFeature', 'FeatureInsidePeak']
			
 
				-  distance: [5000, 5000]
			
 
				+  feature_anchor: ['start', 'center', 'end']
			
 
				+  relative_location:  ['Upstream', 'Downstream', 'OverlapStart', 'OverlapEnd','PeakInsideFeature', 'FeatureInsidePeak']
			
 
				+  distance: [100000, 100000]
			
 
				 
			
 
				 #######################
			
 
				 # diff analysis
			
@@ -108,5 +111,5 @@ diff_peaks:
 
				 # ChIP-seq 需要进行, ATAC-seq 不需要 
			
 
				 #######################
			
 
				 cross_correlation:
			
 
				-  do: yes
			
 
				+  do: no
			
 
				   spp: "-s=0:5:500"