|
@@ -59,6 +59,16 @@ with open("sample_sheet.csv", 'w') as f:
|
|
|
##########################################################
|
|
|
# rule all: 最终想要生成的文件
|
|
|
##########################################################
|
|
|
+def get_motifs_from_meme(meme_file):
|
|
|
+ motifs = []
|
|
|
+ with open(meme_file, 'r') as file:
|
|
|
+ for line in file:
|
|
|
+ if line.startswith("MOTIF"):
|
|
|
+ parts = line.split()
|
|
|
+ motif_name = parts[2] + "_" + parts[1] # 第二列和第三列的组合
|
|
|
+ motifs.append(motif_name)
|
|
|
+ return motifs
|
|
|
+
|
|
|
rule all:
|
|
|
input:
|
|
|
#####################################
|
|
@@ -105,6 +115,11 @@ rule all:
|
|
|
"quality_control/estimate_read_filtering.tsv",
|
|
|
# cross correlation 质控表格
|
|
|
"quality_control/cross_correlation_summary.tsv" if config["cross_correlation"]["do"] else [],
|
|
|
+ #####################################
|
|
|
+ # 足迹分析
|
|
|
+ #####################################
|
|
|
+ "footprinting/bindetect_results.txt",
|
|
|
+ expand("footprinting/{motif}/{motif}_Plot{plot}.pdf", motif=get_motifs_from_meme("motif_databases/JASPAR/JASPAR2022_CORE_plants_non-redundant_v2.meme"), plot=["Aggregate", "Heatmap"])
|
|
|
|
|
|
##########################################################
|
|
|
# 使用fastp进行原始数据质控和过滤,自动判断 SE?PE?
|
|
@@ -366,7 +381,7 @@ rule callpeak_with_macs3:
|
|
|
threads: 1
|
|
|
params:
|
|
|
control=lambda wildcards: f"-c clean_bams/{config['samples'][wildcards.replicate]}_final.bam" if config['samples'][wildcards.replicate] else "",
|
|
|
- format="BAMPE" if SEQ_TYPE == 'PE' else "BAM",
|
|
|
+ format="BAMPE" if SEQ_TYPE == 'PE' and '--nomodel' not in config["macs3"]["extra"] else "BAM",
|
|
|
gsize=config["gsize"],
|
|
|
PEAK_TYPE="--broad" if PEAK_TYPE == "broad" else "",
|
|
|
extra = config["macs3"]["extra"] if config["macs3"]["extra"] else ""
|
|
@@ -672,6 +687,89 @@ rule combine_fastp_reports:
|
|
|
Rscript {params.peaksnake_home}/scripts/combine_fastp_report.R --input {input} --output quality_control/ 1>{log} 2>&1
|
|
|
"""
|
|
|
|
|
|
+##########################################################
|
|
|
+# 使用 TOBIAS 进行足迹分析
|
|
|
+##########################################################
|
|
|
+rule merge_replicate_bams:
|
|
|
+ input:
|
|
|
+ bams=lambda wildcards: expand("clean_bams/{sample}_{rep}_final.bam", sample = wildcards.sample, rep=SAMPLE_TO_REPLICATE[wildcards.sample])
|
|
|
+ output:
|
|
|
+ bam="clean_bams/{sample}_merged.bam"
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ samtools merge -o {output.bam} {input.bams}
|
|
|
+ samtools index {output.bam}
|
|
|
+ """
|
|
|
+
|
|
|
+rule ATACorrect:
|
|
|
+ input:
|
|
|
+ bam="clean_bams/{sample}_merged.bam",
|
|
|
+ genome=config["genome"],
|
|
|
+ merged_peaks_bed="clean_peaks/merge/merged_peaks.bed"
|
|
|
+ output:
|
|
|
+ bw="footprinting/{sample}_corrected.bw"
|
|
|
+ params:
|
|
|
+ prefix="{sample}"
|
|
|
+ threads: 8
|
|
|
+ log: "logs/{sample}_ATACorrect.log"
|
|
|
+ singularity:
|
|
|
+ PEAKSNAKE_HOME + "/sifs/tobias_20231231.sif"
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ TOBIAS ATACorrect --window 10 --bam {input.bam} --genome {input.genome} --peaks {input.merged_peaks_bed} --outdir footprinting --prefix {params.prefix} --cores {threads} 1>{log} 2>&1
|
|
|
+ """
|
|
|
+
|
|
|
+rule FootprintScores:
|
|
|
+ input:
|
|
|
+ bw="footprinting/{sample}_corrected.bw",
|
|
|
+ merged_peaks_bed="clean_peaks/merge/merged_peaks.bed"
|
|
|
+ output:
|
|
|
+ bw="footprinting/{sample}_footprints.bw"
|
|
|
+ params:
|
|
|
+ prefix={sample}
|
|
|
+ threads: 8
|
|
|
+ log: "logs/{sample}_FootprintScores.log"
|
|
|
+ singularity:
|
|
|
+ PEAKSNAKE_HOME + "/sifs/tobias_20231231.sif"
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ TOBIAS FootprintScores --window 10 --signal {input.bw} --regions {input.merged_peaks_bed} --output {output.bw} --cores {threads} 1>{log} 2>&1
|
|
|
+ """
|
|
|
+
|
|
|
+rule BINDetect:
|
|
|
+ input:
|
|
|
+ bws=expand("footprinting/{sample}_footprints.bw", sample=SAMPLE_TO_REPLICATE.keys()),
|
|
|
+ merged_peaks_bed="clean_peaks/merge/merged_peaks.bed",
|
|
|
+ genome=config["genome"]
|
|
|
+ output:
|
|
|
+ "footprinting/bindetect_results.txt"
|
|
|
+ params:
|
|
|
+ samples=" ".join(SAMPLE_TO_REPLICATE.keys())
|
|
|
+ threads: 8
|
|
|
+ log: "logs/BINDetect.log"
|
|
|
+ singularity:
|
|
|
+ PEAKSNAKE_HOME + "/sifs/tobias_20231231.sif"
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ TOBIAS BINDetect --motifs motif_databases/JASPAR/JASPAR2022_CORE_plants_non-redundant_v2.meme --signals {input.bws} --genome {input.genome} --peaks {input.merged_peaks_bed} --outdir footprinting --cond_names {params.samples} --cores {threads}
|
|
|
+ """
|
|
|
+
|
|
|
+rule plot_per_motif:
|
|
|
+ input:
|
|
|
+ motif_bed="footprinting/{motif}/beds/{motif}_all.bed",
|
|
|
+ bws=expand("footprinting/{sample}_corrected.bw", sample=SAMPLE_TO_REPLICATE.keys())
|
|
|
+ output:
|
|
|
+ aggregate_pdf="footprinting/{motif}/{motif}_PlotAggregate.pdf",
|
|
|
+ heatmap_pdf="footprinting/{motif}/{motif}_PlotHeatmap.pdf"
|
|
|
+ threads: 1
|
|
|
+ singularity:
|
|
|
+ PEAKSNAKE_HOME + "/sifs/tobias_20231231.sif"
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ TOBIAS PlotAggregate --TFBS {input.motif_bed} --signals {input.bws} --output {output.aggregate_pdf} --share_y both --plot_boundaries --signal-on-x
|
|
|
+ TOBIAS PlotHeatmap --TFBS {input.motif_bed} --signals {input.bws} --output {output.heatmap_pdf} --sort_by -2
|
|
|
+ """
|
|
|
+
|
|
|
##########################################################
|
|
|
# cross correlation
|
|
|
##########################################################
|