| 
					
				 | 
			
			
				@@ -79,14 +79,11 @@ rule all: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         expand("clean_peaks/cutoff/{replicate}_peaks.{PEAK_TYPE}Peak", replicate = REPLICATES, PEAK_TYPE = PEAK_TYPE), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 	# 通过 intersect or idr 进行 peak 筛选 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         expand("clean_peaks/{m}/{sample}_peaks.{PEAK_TYPE}Peak", sample=SAMPLE_TO_REPLICATE.keys(), m = PEAK_SELECTION, PEAK_TYPE = PEAK_TYPE), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 提取序列 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        expand("clean_peaks/{m}/{sample}_peaks.fa", sample=SAMPLE_TO_REPLICATE.keys(), m = PEAK_SELECTION), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 所有样本共识peaks 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         "clean_peaks/merge/merged_peaks.bed" if len(SAMPLE_TO_REPLICATE) > 2 else [], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         ##################################### 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # Motif 分析 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ##################################### 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        expand("motif_analysis/{sample}_summit.fa", sample = SAMPLE_TO_REPLICATE.keys()), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        expand("motif_analysis/{sample}/combined.meme", sample = SAMPLE_TO_REPLICATE.keys()) if config["motif"]["do"] else [], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        ##################################### 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # Peak 注释 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         ##################################### 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         expand("peak_annotation/{sample}_peaks_allhits.txt", sample = SAMPLE_TO_REPLICATE.keys()) if config["uropa"]["do"] else [], 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -469,39 +466,29 @@ rule select_peaks_by_idr: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 ########################################################## 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-# Motif 分析 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# 提取序列 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 ########################################################## 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-rule extract_peak_summit_fasta: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+rule extract_peak_sequence: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     input: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         Peak="clean_peaks/" + PEAK_SELECTION + "/{sample}_peaks." + PEAK_TYPE + "Peak" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     output: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        summit_fa="motif_analysis/{sample}_summit.fa" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        peak_fa="clean_peaks/" + PEAK_SELECTION + "/{sample}_peaks.fa" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     params: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         top_n=config["motif"]["top_n"], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         summit_flank=config["motif"]["summit_flank"], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        genome=config["genome"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        genome=config["genome"], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        summit_fa="clean_peaks/" + PEAK_SELECTION + "/{sample}_summit.fa" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     shell: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         set +o pipefail; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sort -k8,8nr {input.Peak} | head -n {params.top_n} | awk -v OFS='\\t' '{{print $1,$2+$10-{params.summit_flank},$2+$10+{params.summit_flank} + 1}}' | awk '$2>=0' | bedtools getfasta -fi {params.genome} -bed - > {output.summit_fa} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-rule motif_discovery: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    input: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        summit_fa="motif_analysis/{sample}_summit.fa" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    output: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        "motif_analysis/{sample}/combined.meme" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    log: "logs/{sample}_motif_discovery.log" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    params: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        motif_databases=config['motif']['motif_databases'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        outdir="motif_analysis/{sample}", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        extra=config['motif']['extra'] if config['motif']['extra'] else "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    singularity: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        PEAKSNAKE_HOME + "/sifs/memesuite_latest.sif" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    threads: 4 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    shell: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        meme-chip -meme-p {threads} -db {params.motif_databases} -oc {params.outdir} {params.extra} {input.summit_fa} 1>{log} 2>&1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # Sorting and extracting fasta for peak 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        sort -k8,8nr {input.Peak} | head -n {params.top_n} | bedtools getfasta -fi {params.genome} -bed - > {output.peak_fa} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # Handling narrow peaks 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if [[ "{PEAK_TYPE}" == "narrow" ]]; then 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            sort -k8,8nr {input.Peak} | head -n {params.top_n} | awk -v OFS='\\t' -v flank={params.summit_flank} '{{print $1, $2+$10-flank, $2+$10+flank+1}}' | awk '$2>=0' | bedtools getfasta -fi {params.genome} -bed - > {params.summit_fa} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        fi 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 ########################################################## 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -516,6 +503,7 @@ rule peak_annotation_with_uropa: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         finalhits="peak_annotation/{sample}_peaks_finalhits.txt" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     log: "logs/{sample}_peak_annotation_with_uropa.log" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     params: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        feature=config["uropa"]["feature"], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         feature_anchor=config["uropa"]["feature_anchor"], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         distance=config["uropa"]["distance"], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         relative_location=config["uropa"]["relative_location"] 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -523,7 +511,7 @@ rule peak_annotation_with_uropa: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         PEAKSNAKE_HOME + "/sifs/uropa_4.0.3--pyhdfd78af_0.sif" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     shell: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        uropa --bed {input.Peak} --gtf {input.gtf} --feature gene --feature-anchor {params.feature_anchor} --distance {params.distance} --relative-location {params.relative_location} --outdir peak_annotation 1>{log} 2>&1  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        uropa --bed {input.Peak} --gtf {input.gtf} --feature {params.feature} --feature-anchor {params.feature_anchor} --distance {params.distance} --relative-location {params.relative_location} --outdir peak_annotation 1>{log} 2>&1  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 ########################################################## 
			 |