Kaynağa Gözat

remove motif analysis

zhangxudong 11 ay önce
ebeveyn
işleme
f2de233778
3 değiştirilmiş dosya ile 21 ekleme ve 37 silme
  1. 16 28
      Snakefile
  2. 4 9
      config.yaml
  3. 1 0
      run.sh

+ 16 - 28
Snakefile

@@ -79,14 +79,11 @@ rule all:
         expand("clean_peaks/cutoff/{replicate}_peaks.{PEAK_TYPE}Peak", replicate = REPLICATES, PEAK_TYPE = PEAK_TYPE),
 	# 通过 intersect or idr 进行 peak 筛选
         expand("clean_peaks/{m}/{sample}_peaks.{PEAK_TYPE}Peak", sample=SAMPLE_TO_REPLICATE.keys(), m = PEAK_SELECTION, PEAK_TYPE = PEAK_TYPE),
+        # 提取序列
+        expand("clean_peaks/{m}/{sample}_peaks.fa", sample=SAMPLE_TO_REPLICATE.keys(), m = PEAK_SELECTION),
         # 所有样本共识peaks
         "clean_peaks/merge/merged_peaks.bed" if len(SAMPLE_TO_REPLICATE) > 2 else [],
         #####################################
-        # Motif 分析
-        #####################################
-        expand("motif_analysis/{sample}_summit.fa", sample = SAMPLE_TO_REPLICATE.keys()),
-        expand("motif_analysis/{sample}/combined.meme", sample = SAMPLE_TO_REPLICATE.keys()) if config["motif"]["do"] else [],
-        #####################################
         # Peak 注释
         #####################################
         expand("peak_annotation/{sample}_peaks_allhits.txt", sample = SAMPLE_TO_REPLICATE.keys()) if config["uropa"]["do"] else [],
@@ -469,39 +466,29 @@ rule select_peaks_by_idr:
         """
 
 ##########################################################
-# Motif 分析
+# 提取序列
 ##########################################################
-rule extract_peak_summit_fasta:
+rule extract_peak_sequence:
     input:
         Peak="clean_peaks/" + PEAK_SELECTION + "/{sample}_peaks." + PEAK_TYPE + "Peak"
     output:
-        summit_fa="motif_analysis/{sample}_summit.fa"
+        peak_fa="clean_peaks/" + PEAK_SELECTION + "/{sample}_peaks.fa"
     params:
         top_n=config["motif"]["top_n"],
         summit_flank=config["motif"]["summit_flank"],
-        genome=config["genome"]
+        genome=config["genome"],
+        summit_fa="clean_peaks/" + PEAK_SELECTION + "/{sample}_summit.fa"
     shell:
         """
         set +o pipefail;
-        sort -k8,8nr {input.Peak} | head -n {params.top_n} | awk -v OFS='\\t' '{{print $1,$2+$10-{params.summit_flank},$2+$10+{params.summit_flank} + 1}}' | awk '$2>=0' | bedtools getfasta -fi {params.genome} -bed - > {output.summit_fa}
-        """
 
-rule motif_discovery:
-    input:
-        summit_fa="motif_analysis/{sample}_summit.fa"
-    output:
-        "motif_analysis/{sample}/combined.meme"
-    log: "logs/{sample}_motif_discovery.log"
-    params:
-        motif_databases=config['motif']['motif_databases'],
-        outdir="motif_analysis/{sample}",
-        extra=config['motif']['extra'] if config['motif']['extra'] else ""
-    singularity:
-        PEAKSNAKE_HOME + "/sifs/memesuite_latest.sif"
-    threads: 4
-    shell:
-        """
-        meme-chip -meme-p {threads} -db {params.motif_databases} -oc {params.outdir} {params.extra} {input.summit_fa} 1>{log} 2>&1
+        # Sorting and extracting fasta for peak
+        sort -k8,8nr {input.Peak} | head -n {params.top_n} | bedtools getfasta -fi {params.genome} -bed - > {output.peak_fa}
+
+        # Handling narrow peaks
+        if [[ "{PEAK_TYPE}" == "narrow" ]]; then
+            sort -k8,8nr {input.Peak} | head -n {params.top_n} | awk -v OFS='\\t' -v flank={params.summit_flank} '{{print $1, $2+$10-flank, $2+$10+flank+1}}' | awk '$2>=0' | bedtools getfasta -fi {params.genome} -bed - > {params.summit_fa}
+        fi
         """
 
 ##########################################################
@@ -516,6 +503,7 @@ rule peak_annotation_with_uropa:
         finalhits="peak_annotation/{sample}_peaks_finalhits.txt"
     log: "logs/{sample}_peak_annotation_with_uropa.log"
     params:
+        feature=config["uropa"]["feature"],
         feature_anchor=config["uropa"]["feature_anchor"],
         distance=config["uropa"]["distance"],
         relative_location=config["uropa"]["relative_location"]
@@ -523,7 +511,7 @@ rule peak_annotation_with_uropa:
         PEAKSNAKE_HOME + "/sifs/uropa_4.0.3--pyhdfd78af_0.sif"
     shell:
         """
-        uropa --bed {input.Peak} --gtf {input.gtf} --feature gene --feature-anchor {params.feature_anchor} --distance {params.distance} --relative-location {params.relative_location} --outdir peak_annotation 1>{log} 2>&1 
+        uropa --bed {input.Peak} --gtf {input.gtf} --feature {params.feature} --feature-anchor {params.feature_anchor} --distance {params.distance} --relative-location {params.relative_location} --outdir peak_annotation 1>{log} 2>&1 
         """
 
 ##########################################################

+ 4 - 9
config.yaml

@@ -77,17 +77,11 @@ idr:
   idr_threshold: 0.05
 
 #######################
-# Motif
-# ChIP-seq 一般需要对 Peak summit 进行motif 分析
-# ATAC-seq 一般不需要
-# motif_databases 仅支持相对路径
+# 提取序列用于 motif 分析
 #######################
 motif:
-  do: yes
   top_n: 1000
-  summit_flank: 200
-  motif_databases: JASPAR2022_CORE_plants_non-redundant_v2.meme
-  extra:  
+  summit_flank: 50
 
 #######################
 # Peank 注释
@@ -96,9 +90,10 @@ motif:
 #######################
 uropa:
   do: yes
+  feature: ['gene']
   feature_anchor: ['start'] # ['start', 'center', 'end']
   relative_location: ['Upstream', 'OverlapStart'] # ['Upstream', 'Downstream', 'OverlapStart', 'OverlapEnd','PeakInsideFeature', 'FeatureInsidePeak']
-  distance: [100000, 100000]
+  distance: [5000, 5000]
 
 #######################
 # diff analysis

+ 1 - 0
run.sh

@@ -0,0 +1 @@
+nohup snakemake --cores 46 --use-singularity --keep-going &