|
@@ -79,14 +79,11 @@ rule all:
|
|
expand("clean_peaks/cutoff/{replicate}_peaks.{PEAK_TYPE}Peak", replicate = REPLICATES, PEAK_TYPE = PEAK_TYPE),
|
|
expand("clean_peaks/cutoff/{replicate}_peaks.{PEAK_TYPE}Peak", replicate = REPLICATES, PEAK_TYPE = PEAK_TYPE),
|
|
# 通过 intersect or idr 进行 peak 筛选
|
|
# 通过 intersect or idr 进行 peak 筛选
|
|
expand("clean_peaks/{m}/{sample}_peaks.{PEAK_TYPE}Peak", sample=SAMPLE_TO_REPLICATE.keys(), m = PEAK_SELECTION, PEAK_TYPE = PEAK_TYPE),
|
|
expand("clean_peaks/{m}/{sample}_peaks.{PEAK_TYPE}Peak", sample=SAMPLE_TO_REPLICATE.keys(), m = PEAK_SELECTION, PEAK_TYPE = PEAK_TYPE),
|
|
|
|
+ # 提取序列
|
|
|
|
+ expand("clean_peaks/{m}/{sample}_peaks.fa", sample=SAMPLE_TO_REPLICATE.keys(), m = PEAK_SELECTION),
|
|
# 所有样本共识peaks
|
|
# 所有样本共识peaks
|
|
"clean_peaks/merge/merged_peaks.bed" if len(SAMPLE_TO_REPLICATE) > 2 else [],
|
|
"clean_peaks/merge/merged_peaks.bed" if len(SAMPLE_TO_REPLICATE) > 2 else [],
|
|
#####################################
|
|
#####################################
|
|
- # Motif 分析
|
|
|
|
- #####################################
|
|
|
|
- expand("motif_analysis/{sample}_summit.fa", sample = SAMPLE_TO_REPLICATE.keys()),
|
|
|
|
- expand("motif_analysis/{sample}/combined.meme", sample = SAMPLE_TO_REPLICATE.keys()) if config["motif"]["do"] else [],
|
|
|
|
- #####################################
|
|
|
|
# Peak 注释
|
|
# Peak 注释
|
|
#####################################
|
|
#####################################
|
|
expand("peak_annotation/{sample}_peaks_allhits.txt", sample = SAMPLE_TO_REPLICATE.keys()) if config["uropa"]["do"] else [],
|
|
expand("peak_annotation/{sample}_peaks_allhits.txt", sample = SAMPLE_TO_REPLICATE.keys()) if config["uropa"]["do"] else [],
|
|
@@ -469,39 +466,29 @@ rule select_peaks_by_idr:
|
|
"""
|
|
"""
|
|
|
|
|
|
##########################################################
|
|
##########################################################
|
|
-# Motif 分析
|
|
|
|
|
|
+# 提取序列
|
|
##########################################################
|
|
##########################################################
|
|
-rule extract_peak_summit_fasta:
|
|
|
|
|
|
+rule extract_peak_sequence:
|
|
input:
|
|
input:
|
|
Peak="clean_peaks/" + PEAK_SELECTION + "/{sample}_peaks." + PEAK_TYPE + "Peak"
|
|
Peak="clean_peaks/" + PEAK_SELECTION + "/{sample}_peaks." + PEAK_TYPE + "Peak"
|
|
output:
|
|
output:
|
|
- summit_fa="motif_analysis/{sample}_summit.fa"
|
|
|
|
|
|
+ peak_fa="clean_peaks/" + PEAK_SELECTION + "/{sample}_peaks.fa"
|
|
params:
|
|
params:
|
|
top_n=config["motif"]["top_n"],
|
|
top_n=config["motif"]["top_n"],
|
|
summit_flank=config["motif"]["summit_flank"],
|
|
summit_flank=config["motif"]["summit_flank"],
|
|
- genome=config["genome"]
|
|
|
|
|
|
+ genome=config["genome"],
|
|
|
|
+ summit_fa="clean_peaks/" + PEAK_SELECTION + "/{sample}_summit.fa"
|
|
shell:
|
|
shell:
|
|
"""
|
|
"""
|
|
set +o pipefail;
|
|
set +o pipefail;
|
|
- sort -k8,8nr {input.Peak} | head -n {params.top_n} | awk -v OFS='\\t' '{{print $1,$2+$10-{params.summit_flank},$2+$10+{params.summit_flank} + 1}}' | awk '$2>=0' | bedtools getfasta -fi {params.genome} -bed - > {output.summit_fa}
|
|
|
|
- """
|
|
|
|
|
|
|
|
-rule motif_discovery:
|
|
|
|
- input:
|
|
|
|
- summit_fa="motif_analysis/{sample}_summit.fa"
|
|
|
|
- output:
|
|
|
|
- "motif_analysis/{sample}/combined.meme"
|
|
|
|
- log: "logs/{sample}_motif_discovery.log"
|
|
|
|
- params:
|
|
|
|
- motif_databases=config['motif']['motif_databases'],
|
|
|
|
- outdir="motif_analysis/{sample}",
|
|
|
|
- extra=config['motif']['extra'] if config['motif']['extra'] else ""
|
|
|
|
- singularity:
|
|
|
|
- PEAKSNAKE_HOME + "/sifs/memesuite_latest.sif"
|
|
|
|
- threads: 4
|
|
|
|
- shell:
|
|
|
|
- """
|
|
|
|
- meme-chip -meme-p {threads} -db {params.motif_databases} -oc {params.outdir} {params.extra} {input.summit_fa} 1>{log} 2>&1
|
|
|
|
|
|
+ # Sorting and extracting fasta for peak
|
|
|
|
+ sort -k8,8nr {input.Peak} | head -n {params.top_n} | bedtools getfasta -fi {params.genome} -bed - > {output.peak_fa}
|
|
|
|
+
|
|
|
|
+ # Handling narrow peaks
|
|
|
|
+ if [[ "{PEAK_TYPE}" == "narrow" ]]; then
|
|
|
|
+ sort -k8,8nr {input.Peak} | head -n {params.top_n} | awk -v OFS='\\t' -v flank={params.summit_flank} '{{print $1, $2+$10-flank, $2+$10+flank+1}}' | awk '$2>=0' | bedtools getfasta -fi {params.genome} -bed - > {params.summit_fa}
|
|
|
|
+ fi
|
|
"""
|
|
"""
|
|
|
|
|
|
##########################################################
|
|
##########################################################
|
|
@@ -516,6 +503,7 @@ rule peak_annotation_with_uropa:
|
|
finalhits="peak_annotation/{sample}_peaks_finalhits.txt"
|
|
finalhits="peak_annotation/{sample}_peaks_finalhits.txt"
|
|
log: "logs/{sample}_peak_annotation_with_uropa.log"
|
|
log: "logs/{sample}_peak_annotation_with_uropa.log"
|
|
params:
|
|
params:
|
|
|
|
+ feature=config["uropa"]["feature"],
|
|
feature_anchor=config["uropa"]["feature_anchor"],
|
|
feature_anchor=config["uropa"]["feature_anchor"],
|
|
distance=config["uropa"]["distance"],
|
|
distance=config["uropa"]["distance"],
|
|
relative_location=config["uropa"]["relative_location"]
|
|
relative_location=config["uropa"]["relative_location"]
|
|
@@ -523,7 +511,7 @@ rule peak_annotation_with_uropa:
|
|
PEAKSNAKE_HOME + "/sifs/uropa_4.0.3--pyhdfd78af_0.sif"
|
|
PEAKSNAKE_HOME + "/sifs/uropa_4.0.3--pyhdfd78af_0.sif"
|
|
shell:
|
|
shell:
|
|
"""
|
|
"""
|
|
- uropa --bed {input.Peak} --gtf {input.gtf} --feature gene --feature-anchor {params.feature_anchor} --distance {params.distance} --relative-location {params.relative_location} --outdir peak_annotation 1>{log} 2>&1
|
|
|
|
|
|
+ uropa --bed {input.Peak} --gtf {input.gtf} --feature {params.feature} --feature-anchor {params.feature_anchor} --distance {params.distance} --relative-location {params.relative_location} --outdir peak_annotation 1>{log} 2>&1
|
|
"""
|
|
"""
|
|
|
|
|
|
##########################################################
|
|
##########################################################
|