|
@@ -0,0 +1,487 @@
|
|
|
+import os
|
|
|
+import yaml
|
|
|
+import pandas as pd
|
|
|
+
|
|
|
+configfile: "config.yaml"
|
|
|
+##########################################################
|
|
|
+# 全局变量和样本信息
|
|
|
+##########################################################
|
|
|
+HICSNAKE_HOME = config["HICSNAKE_HOME"] if config["HICSNAKE_HOME"] else os.getcwd()
|
|
|
+GENOME = config["genome"]
|
|
|
+RESOLUTIONS = config["resolutions"]
|
|
|
+BASE_RESOLUTION = min(RESOLUTIONS)
|
|
|
+
|
|
|
+with open('chromosomes.txt', 'r') as file:
|
|
|
+ CHROMOSOMES = [line.strip() for line in file]
|
|
|
+
|
|
|
+
|
|
|
+meta_data = pd.read_csv('samples.txt', sep='\t')
|
|
|
+REPLICATES = sorted(meta_data['replicate'].tolist())
|
|
|
+SAMPLES2REPLICATES = meta_data.groupby('sample')['replicate'].apply(list).to_dict()
|
|
|
+SAMPLES = SAMPLES2REPLICATES.keys()
|
|
|
+
|
|
|
+contrasts_data = pd.read_csv('contrasts.txt', sep='\t', header=None)
|
|
|
+CONTRASTS = [f"{row[0]}_vs_{row[1]}" for row in contrasts_data.itertuples(index=False)]
|
|
|
+
|
|
|
+##########################################################
|
|
|
+# rule all: 最终想要生成的文件
|
|
|
+##########################################################
|
|
|
+rule all:
|
|
|
+ input:
|
|
|
+ #第一步: 测序数据的过滤和质控
|
|
|
+ #"clean_data/fastp_summary.tsv",
|
|
|
+ #第二步:比对到参考基因组
|
|
|
+ #expand("aligned/{replicate}_R{i}.bam", replicate=REPLICATES, i=[1,2]),
|
|
|
+ #第三步: 构建接触矩阵
|
|
|
+ #expand("Matrix/Raw/{replicate}.cool", replicate=REPLICATES),
|
|
|
+ #expand("Matrix/Merged/{sample}.cool", sample=SAMPLES),
|
|
|
+ #expand("Matrix/Normalized/{sample}.cool", sample=SAMPLES),
|
|
|
+ #第四步: 格式转换
|
|
|
+ expand("Matrix/Final/{sample}.{format}", sample=SAMPLES, format=["mcool"]),
|
|
|
+ #第五步: 接触矩阵质控
|
|
|
+ "Matrix/Raw/hicQC.html",
|
|
|
+ "Matrix/Raw/SCC.csv",
|
|
|
+ expand("Matrix/Final/{sample}_res.csv", sample=SAMPLES),
|
|
|
+ #第六步: AB compartment, TADs, Loops
|
|
|
+ expand("Matrix/Diff/{resolution}/{contrast}.cool", resolution=config["TAD_resolution"], contrast=CONTRASTS),
|
|
|
+ expand("Compartments/CALDER2/{resolution}/{sample}/sub_compartments/all_sub_compartments.bed", resolution=config["Compartment_resolution"], sample=SAMPLES),
|
|
|
+ expand("TADs/HiCExplorer/{resolution}/{sample}_domains.bed", resolution=config["TAD_resolution"], sample=SAMPLES),
|
|
|
+ expand("TADs/HiCExplorer/{resolution}/{contrast}_accepted.diff_tad", resolution=config["TAD_resolution"],contrast=CONTRASTS),
|
|
|
+ expand("Loops/Mustache/{resolution}/{sample}.tsv", resolution=config["Loop_resolution"], sample=SAMPLES),
|
|
|
+ expand("Loops/Mustache/{resolution}/{contrast}.diffloop1", resolution=config["Loop_resolution"], contrast=CONTRASTS),
|
|
|
+ # 可视化
|
|
|
+ expand("plots/{resolution}/{sample}_plotmatrix_allchr.png", resolution=config["Plot_matrix_resolution"], sample=SAMPLES),
|
|
|
+ expand("plots/{resolution}/{sample}_plotmatrix_perchr.png", resolution=config["Plot_matrix_resolution"], sample=SAMPLES)
|
|
|
+
|
|
|
+
|
|
|
+#********************************************************#
|
|
|
+# 第一步: 测序数据的过滤和质控 #
|
|
|
+#********************************************************#
|
|
|
+
|
|
|
+# 使用fastp进行原始数据质控和过滤,自动判断 SE?PE?
|
|
|
+rule fastp_quality_control:
|
|
|
+ input:
|
|
|
+ read1="raw_data/{replicate}_R1.fastq.gz",
|
|
|
+ read2="raw_data/{replicate}_R2.fastq.gz"
|
|
|
+ output:
|
|
|
+ read1="clean_data/{replicate}_R1.fastq.gz",
|
|
|
+ read2="clean_data/{replicate}_R2.fastq.gz",
|
|
|
+ html_report="clean_data/{replicate}_fastp.html",
|
|
|
+ json_report="clean_data/{replicate}_fastp.json"
|
|
|
+ log: "logs/{replicate}_fastp_quality_control.log"
|
|
|
+ threads: 3
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/commonTools_20240327.sif"
|
|
|
+ params:
|
|
|
+ fastp=config["fastp"]
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ fastp -i {input.read1} -o {output.read1} -I {input.read2} -O {output.read2} --html {output.html_report} --json {output.json_report} --thread {threads} {params.fastp} 1>{log} 2>&1
|
|
|
+ """
|
|
|
+
|
|
|
+# 合并fastp质控结果生成表格
|
|
|
+rule combine_fastp_reports:
|
|
|
+ input:
|
|
|
+ expand("clean_data/{replicate}_fastp.json", replicate=REPLICATES)
|
|
|
+ output:
|
|
|
+ "clean_data/fastp_summary.tsv"
|
|
|
+ log: "logs/combine_fastp_reports.log"
|
|
|
+ params:
|
|
|
+ hicsnake_home=HICSNAKE_HOME
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ Rscript {params.hicsnake_home}/scripts/combine_fastp_report.R --input {input} --output clean_data/ 1>{log} 2>&1
|
|
|
+ """
|
|
|
+
|
|
|
+#********************************************************#
|
|
|
+# 第二步:比对到参考基因组 #
|
|
|
+#********************************************************#
|
|
|
+# 根据基因组构建 bwa index
|
|
|
+rule bwa_index:
|
|
|
+ input:
|
|
|
+ genome = "ref/" + GENOME + ".fa"
|
|
|
+ output:
|
|
|
+ genome_index = "ref/genome.bwt"
|
|
|
+ log: "logs/bwa_index.log"
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/commonTools_20240327.sif"
|
|
|
+ threads: 6
|
|
|
+ shell:
|
|
|
+ "bwa index -p ref/genome {input.genome} 1>{log} 2>&1"
|
|
|
+
|
|
|
+# 使用 bwa 比对
|
|
|
+rule bwa:
|
|
|
+ input:
|
|
|
+ genome_index="ref/genome.bwt",
|
|
|
+ read="clean_data/{replicate}_R{i}.fastq.gz",
|
|
|
+ output:
|
|
|
+ sam="aligned/{replicate}_R{i}.sam"
|
|
|
+ threads: 20
|
|
|
+ log: "logs/{replicate}_R{i}_bwa.log"
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/commonTools_20240327.sif"
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ bwa mem -A1 -B4 -E50 -L0 -t {threads} -o {output.sam} ref/genome {input.read} 2>{log}
|
|
|
+ """
|
|
|
+
|
|
|
+rule sam2bam:
|
|
|
+ input:
|
|
|
+ sam="aligned/{replicate}_R{i}.sam"
|
|
|
+ output:
|
|
|
+ bam="aligned/{replicate}_R{i}.bam"
|
|
|
+ threads: 6
|
|
|
+ log: "logs/{replicate}_R{i}_sam2bam.log"
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/commonTools_20240327.sif"
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ samtools view -hb --threads {threads} -o {output.bam} {input.sam}
|
|
|
+ """
|
|
|
+
|
|
|
+#********************************************************#
|
|
|
+# 第三步: 构建接触矩阵 #
|
|
|
+#********************************************************#
|
|
|
+
|
|
|
+# 生成酶切位点
|
|
|
+rule hicFindRestSite:
|
|
|
+ input:
|
|
|
+ genome="ref/" + GENOME + ".fa"
|
|
|
+ output:
|
|
|
+ rest_site="ref/rest_site.bed"
|
|
|
+ log: "logs/hicFindRestSite.log"
|
|
|
+ params:
|
|
|
+ restriction_sequence=config["restriction_sequence"]
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ hicFindRestSite --fasta {input.genome} --searchPattern {params.restriction_sequence} -o {output.rest_site} 1>{log} 2>&1
|
|
|
+ """
|
|
|
+
|
|
|
+# 生成 cool 格式矩阵 hicBuildMatrix
|
|
|
+rule hicBuildMatrix:
|
|
|
+ input:
|
|
|
+ read1_bam="aligned/{replicate}_R1.bam",
|
|
|
+ read2_bam="aligned/{replicate}_R2.bam",
|
|
|
+ rest_site="ref/rest_site.bed"
|
|
|
+ output:
|
|
|
+ bam="Matrix/Raw/{replicate}.bam",
|
|
|
+ cool="Matrix/Raw/{replicate}.cool",
|
|
|
+ log="Matrix/Raw/{replicate}/QC.log"
|
|
|
+ log: "logs/{replicate}_hicBuildMatrix.log"
|
|
|
+ params:
|
|
|
+ genome="ref/" + GENOME + ".fa",
|
|
|
+ base_resolution=BASE_RESOLUTION,
|
|
|
+ hicBuildMatrix=config["hicBuildMatrix"]
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
|
|
|
+ threads: 6
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ hicBuildMatrix --samFiles {input.read1_bam} {input.read2_bam} \
|
|
|
+ --genomeAssembly {params.genome} \
|
|
|
+ --outBam {output.bam} \
|
|
|
+ --outFileName {output.cool} \
|
|
|
+ --QCfolder Matrix/Raw/{wildcards.replicate} \
|
|
|
+ --restrictionCutFile {input.rest_site} \
|
|
|
+ --binSize {params.base_resolution} \
|
|
|
+ --threads {threads} {params.hicBuildMatrix} 1>{log} 2>&1
|
|
|
+ """
|
|
|
+
|
|
|
+# 合并生物学重复的矩阵
|
|
|
+rule hicSumMatrices:
|
|
|
+ input:
|
|
|
+ cools=lambda wildcards: expand("Matrix/Raw/{replicate}.cool", replicate=SAMPLES2REPLICATES[wildcards.sample])
|
|
|
+ output:
|
|
|
+ cool="Matrix/Merged/{sample}.cool"
|
|
|
+ log: "logs/{sample}_hicSumMatrices.log"
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
|
|
|
+ threads: 3
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ if [ $(echo {input.cools} | wc -w) -gt 1 ]; then
|
|
|
+ hicSumMatrices --matrices {input.cools} --outFileName {output.cool} 1>{log} 2>&1
|
|
|
+ else
|
|
|
+ cp {input.cools[0]} {output.cool} 1>{log} 2>&1
|
|
|
+ fi
|
|
|
+ """
|
|
|
+
|
|
|
+# 归一化接触矩阵
|
|
|
+rule hicNormalize:
|
|
|
+ input:
|
|
|
+ cools=expand("Matrix/Merged/{sample}.cool", sample=SAMPLES, allow_missing=True)
|
|
|
+ output:
|
|
|
+ cools=expand("Matrix/Normalized/{sample}.cool", sample=SAMPLES, allow_missing=True)
|
|
|
+ params:
|
|
|
+ hicNormalize=config["hicNormalize"]
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
|
|
|
+ threads: 1
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ if [ $(echo {input.cools} | wc -w) -gt 1 ]; then
|
|
|
+ hicNormalize --matrices {input.cools} --outFileName {output.cools} {params.hicNormalize}
|
|
|
+ else
|
|
|
+ cp {input.cools[0]} {output.cools[0]}
|
|
|
+ fi
|
|
|
+ """
|
|
|
+
|
|
|
+#********************************************************#
|
|
|
+# 第四步: 格式转换 #
|
|
|
+#********************************************************#
|
|
|
+
|
|
|
+# 转换为 mcool 格式
|
|
|
+rule cooler_zoomify:
|
|
|
+ input:
|
|
|
+ cool="Matrix/Normalized/{sample}.cool"
|
|
|
+ output:
|
|
|
+ mcool="Matrix/Final/{sample}.mcool"
|
|
|
+ log: "logs/{sample}_cooler_zoomify.log"
|
|
|
+ params:
|
|
|
+ resolutions=",".join(map(str, RESOLUTIONS))
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/cooler_20240402.sif"
|
|
|
+ threads: 4
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ cooler zoomify {input.cool} -n {threads} --resolutions {params.resolutions} -o Matrix/Final/{wildcards.sample}_nobalanced.mcool
|
|
|
+ cooler zoomify {input.cool} -n {threads} --balance --resolutions {params.resolutions} -o {output.mcool}
|
|
|
+ """
|
|
|
+
|
|
|
+# 转换为 ginteractions 格式
|
|
|
+rule convert_to_ginteractions:
|
|
|
+ input:
|
|
|
+ cool="Matrix/Normalized/{sample}.cool"
|
|
|
+ output:
|
|
|
+ ginteractions="Matrix/Final/{sample}.ginteractions.tsv"
|
|
|
+ log: "logs/{sample}_hicConvertFormat_ginteractions.log"
|
|
|
+ params:
|
|
|
+ resolutions=RESOLUTIONS,
|
|
|
+ prefix="Matrix/Final/{sample}.ginteractions"
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
|
|
|
+ threads: 1
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ hicConvertFormat --matrices {input.cool} --outFileName {params.prefix} --inputFormat cool --outputFormat ginteractions 1>{log} 2>&1
|
|
|
+ """
|
|
|
+
|
|
|
+# 转换为 hic 格式
|
|
|
+rule convert_to_hic:
|
|
|
+ input:
|
|
|
+ ginteractions="Matrix/Final/{sample}.ginteractions.tsv",
|
|
|
+ chrom_sizes = "ref/" + GENOME + ".chrom.sizes"
|
|
|
+ output:
|
|
|
+ hic="Matrix/Final/{sample}.hic"
|
|
|
+ log: "logs/{sample}_convert_to_hic.log"
|
|
|
+ params:
|
|
|
+ resolutions=",".join(str(r) for r in RESOLUTIONS)
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/juicer_20240401"
|
|
|
+ threads: 1
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ awk -F "\\t" '{{print 0, $1, $2, 0, 0, $4, $5, 1, $7}}' {input.ginteractions} > Matrix/Final/{wildcards.sample}.short
|
|
|
+ sort -k2,2d -k6,6d Matrix/Final/{wildcards.sample}.short > Matrix/Final/{wildcards.sample}.short.sorted
|
|
|
+ java -Xms6g -Xmx40g -jar /opt/juicer_tools.jar pre -r {params.resolutions} Matrix/Final/{wildcards.sample}.short.sorted {output.hic} {input.chrom_sizes}
|
|
|
+ """
|
|
|
+
|
|
|
+#********************************************************#
|
|
|
+# 第五步: 接触矩阵质控 #
|
|
|
+#********************************************************#
|
|
|
+
|
|
|
+# 使用 hicQC 进行REPLICATE 层次质控
|
|
|
+rule hicQC:
|
|
|
+ input:
|
|
|
+ logs=expand("Matrix/Raw/{replicate}/QC.log", replicate=REPLICATES)
|
|
|
+ output:
|
|
|
+ html="Matrix/Raw/hicQC.html"
|
|
|
+ params:
|
|
|
+ REPLICATES=REPLICATES
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
|
|
|
+ threads: 20
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ hicQC --logfiles {input.logs} --labels {params.REPLICATES} --outputFolder Matrix/Raw
|
|
|
+ """
|
|
|
+
|
|
|
+# 计算 SCC
|
|
|
+rule hicrep:
|
|
|
+ input:
|
|
|
+ cools=expand("Matrix/Raw/{replicate}.cool", replicate=REPLICATES)
|
|
|
+ output:
|
|
|
+ scc="Matrix/Raw/SCC.csv"
|
|
|
+ params:
|
|
|
+ replicates=REPLICATES,
|
|
|
+ hicrep=config["hicrep"]
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicrep_20240423.sif"
|
|
|
+ threads: 4
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ python scripts/scc.py --workers {threads} {params.hicrep} --output {output.scc} --sample_names {params.replicates} --files {input.cools}
|
|
|
+ """
|
|
|
+
|
|
|
+# 评估数据分辨率
|
|
|
+rule estimate_resolution:
|
|
|
+ input:
|
|
|
+ mcool="Matrix/Final/{sample}.mcool"
|
|
|
+ output:
|
|
|
+ res="Matrix/Final/{sample}_res.csv"
|
|
|
+ params:
|
|
|
+ hicres=config["hicres"]
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/cooler_20240402.sif"
|
|
|
+ threads: 4
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ python scripts/hicres.py {input.mcool} --threads {threads} --output {output.res} {params.hicres}
|
|
|
+ """
|
|
|
+
|
|
|
+# hicPlotDistVsCounts: 距离衰减曲线, 查看样本/重复差异
|
|
|
+rule hicPlotDistVsCounts:
|
|
|
+ input:
|
|
|
+ cool=expand("Matrix/Raw/{replicate}.cool", replicate=REPLICATES)
|
|
|
+ output:
|
|
|
+ pdf="Matrix/Raw/DistVsCounts.pdf"
|
|
|
+ params:
|
|
|
+ replicates=REPLICATES,
|
|
|
+ hicPlotDistVsCounts=config["hicPlotDistVsCounts"]
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
|
|
|
+ threads: 2
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ hicPlotDistVsCounts --matrices {input.cool} --labels {params.replicates} --plotFile {output.pdf} {params.hicPlotDistVsCounts}
|
|
|
+ """
|
|
|
+#********************************************************#
|
|
|
+# 第七步: 三维结构预测 #
|
|
|
+#********************************************************#
|
|
|
+rule calder2:
|
|
|
+ input:
|
|
|
+ mcool="Matrix/Final/{sample}.mcool",
|
|
|
+ output:
|
|
|
+ "Compartments/CALDER2/{resolution}/{sample}/sub_compartments/all_sub_compartments.bed"
|
|
|
+ params:
|
|
|
+ calder=config["calder"],
|
|
|
+ resolution="{resolution}"
|
|
|
+ threads: 4
|
|
|
+ log: "logs/{sample}_{resolution}_calder2.log"
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ Rscript scripts/calder -i {input.mcool} -t mcool -b {params.resolution} -p {threads} -o Compartments/CALDER2/{wildcards.resolution}/{wildcards.sample} {params.calder}
|
|
|
+ """
|
|
|
+
|
|
|
+# 使用 HiCExplorer 检测 TADs
|
|
|
+rule hicFindTADs:
|
|
|
+ input:
|
|
|
+ mcool="Matrix/Final/{sample}.mcool"
|
|
|
+ output:
|
|
|
+ boundaries="TADs/HiCExplorer/{resolution}/{sample}_boundaries.bed",
|
|
|
+ domains="TADs/HiCExplorer/{resolution}/{sample}_domains.bed",
|
|
|
+ score="TADs/HiCExplorer/{resolution}/{sample}_score.bedgraph"
|
|
|
+ params:
|
|
|
+ hicFindTADs=config["hicFindTADs"],
|
|
|
+ prefix="TADs/HiCExplorer/{resolution}/{sample}",
|
|
|
+ resolution="{resolution}"
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
|
|
|
+ threads: 4
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ hicFindTADs -p {threads} --matrix {input.mcool}::/resolutions/{params.resolution} --outPrefix {params.prefix} {params.hicFindTADs}
|
|
|
+ """
|
|
|
+
|
|
|
+# 使用 HiCExplorer 检测差异 TADs
|
|
|
+rule hicDifferentialTAD:
|
|
|
+ input:
|
|
|
+ treatment_mcool="Matrix/Final/{treatment}.mcool",
|
|
|
+ control_mcool="Matrix/Final/{control}.mcool",
|
|
|
+ treatment_tads="TADs/HiCExplorer/{resolution}/{treatment}_domains.bed"
|
|
|
+ output:
|
|
|
+ accepted_diff_tad="TADs/HiCExplorer/{resolution}/{treatment}_vs_{control}_accepted.diff_tad"
|
|
|
+ params:
|
|
|
+ hicDifferentialTAD=config["hicDifferentialTAD"],
|
|
|
+ prefix="TADs/HiCExplorer/{resolution}/{treatment}_vs_{control}",
|
|
|
+ resolution="{resolution}"
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
|
|
|
+ threads: 4
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ hicDifferentialTAD --targetMatrix {input.treatment_mcool}::/resolutions/{params.resolution} --controlMatrix {input.control_mcool}::/resolutions/{params.resolution} --tadDomains {input.treatment_tads} --outFileNamePrefix {params.prefix} {params.hicDifferentialTAD} --threads {threads}
|
|
|
+ """
|
|
|
+
|
|
|
+# 使用 mustache 检测 Loops
|
|
|
+rule mustache:
|
|
|
+ input:
|
|
|
+ mcool="Matrix/Final/{sample}.mcool"
|
|
|
+ output:
|
|
|
+ loops="Loops/Mustache/{resolution}/{sample}.tsv"
|
|
|
+ params:
|
|
|
+ mustache=config["mustache"],
|
|
|
+ resolution="{resolution}"
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/mustache_20240410.sif"
|
|
|
+ threads: 6
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ mustache --file {input.mcool} --resolution {params.resolution} --outfile {output.loops} --processes {threads} {params.mustache}
|
|
|
+ """
|
|
|
+
|
|
|
+# 使用 mustache 检测差异 Loops
|
|
|
+rule diff_mustache:
|
|
|
+ input:
|
|
|
+ treatment_mcool="Matrix/Final/{treatment}.mcool",
|
|
|
+ control_mcool="Matrix/Final/{control}.mcool",
|
|
|
+ output:
|
|
|
+ diffloop1="Loops/Mustache/{resolution}/{treatment}_vs_{control}.diffloop1",
|
|
|
+ diffloop2="Loops/Mustache/{resolution}/{treatment}_vs_{control}.diffloop2"
|
|
|
+ params:
|
|
|
+ diff_mustache=config["diff_mustache"],
|
|
|
+ prefix="Loops/Mustache/{resolution}/{treatment}_vs_{control}",
|
|
|
+ resolution="{resolution}"
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/mustache_20240410.sif"
|
|
|
+ threads: 6
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ python /opt/conda/lib/python3.12/site-packages/mustache/diff_mustache.py -f1 {input.treatment_mcool} -f2 {input.control_mcool} --resolution {params.resolution} --outfile {params.prefix} --processes {threads} {params.diff_mustache}
|
|
|
+ """
|
|
|
+
|
|
|
+#********************************************************#
|
|
|
+# 可视化 #
|
|
|
+#********************************************************#
|
|
|
+rule hicPlotMatrix:
|
|
|
+ input:
|
|
|
+ mcool="Matrix/Final/{sample}.mcool"
|
|
|
+ output:
|
|
|
+ allchr_png="plots/{resolution}/{sample}_plotmatrix_allchr.png",
|
|
|
+ perchr_png="plots/{resolution}/{sample}_plotmatrix_perchr.png"
|
|
|
+ params:
|
|
|
+ resolution="{resolution}"
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ hicPlotMatrix -m {input.mcool}::/resolutions/{params.resolution} -o {output.allchr_png} --log1p --colorMap YlOrRd --clearMaskedBins --log1p
|
|
|
+ hicPlotMatrix -m {input.mcool}::/resolutions/{params.resolution} -o {output.perchr_png} --log1p --colorMap YlOrRd --clearMaskedBins --perChromosome --log1p
|
|
|
+ """
|
|
|
+
|
|
|
+# 使用 hicCompareMatrices 比较矩阵
|
|
|
+rule hicCompareMatrices:
|
|
|
+ input:
|
|
|
+ treatment_mcool="Matrix/Final/{treatment}.mcool",
|
|
|
+ control_mcool="Matrix/Final/{control}.mcool",
|
|
|
+ output:
|
|
|
+ cool="Matrix/Diff/{resolution}/{treatment}_vs_{control}.cool"
|
|
|
+ params:
|
|
|
+ resolution="{resolution}"
|
|
|
+ singularity:
|
|
|
+ HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
|
|
|
+ threads: 6
|
|
|
+ shell:
|
|
|
+ """
|
|
|
+ hicCompareMatrices --matrices {input.treatment_mcool}::resolutions/{params.resolution} {input.control_mcool}::resolutions/{params.resolution} --operation log2ratio -o {output.cool}
|
|
|
+ """
|