|  | @@ -0,0 +1,487 @@
 | 
	
		
			
				|  |  | +import os
 | 
	
		
			
				|  |  | +import yaml
 | 
	
		
			
				|  |  | +import pandas as pd
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +configfile: "config.yaml"
 | 
	
		
			
				|  |  | +##########################################################
 | 
	
		
			
				|  |  | +# 全局变量和样本信息
 | 
	
		
			
				|  |  | +##########################################################
 | 
	
		
			
				|  |  | +HICSNAKE_HOME = config["HICSNAKE_HOME"] if config["HICSNAKE_HOME"] else os.getcwd()
 | 
	
		
			
				|  |  | +GENOME = config["genome"]
 | 
	
		
			
				|  |  | +RESOLUTIONS = config["resolutions"]
 | 
	
		
			
				|  |  | +BASE_RESOLUTION = min(RESOLUTIONS)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +with open('chromosomes.txt', 'r') as file:
 | 
	
		
			
				|  |  | +    CHROMOSOMES = [line.strip() for line in file]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +meta_data = pd.read_csv('samples.txt', sep='\t')
 | 
	
		
			
				|  |  | +REPLICATES = sorted(meta_data['replicate'].tolist())
 | 
	
		
			
				|  |  | +SAMPLES2REPLICATES = meta_data.groupby('sample')['replicate'].apply(list).to_dict()
 | 
	
		
			
				|  |  | +SAMPLES = SAMPLES2REPLICATES.keys()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +contrasts_data = pd.read_csv('contrasts.txt', sep='\t', header=None)
 | 
	
		
			
				|  |  | +CONTRASTS = [f"{row[0]}_vs_{row[1]}" for row in contrasts_data.itertuples(index=False)]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +##########################################################
 | 
	
		
			
				|  |  | +# rule all: 最终想要生成的文件
 | 
	
		
			
				|  |  | +##########################################################
 | 
	
		
			
				|  |  | +rule all:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        #第一步: 测序数据的过滤和质控
 | 
	
		
			
				|  |  | +        #"clean_data/fastp_summary.tsv",
 | 
	
		
			
				|  |  | +        #第二步:比对到参考基因组
 | 
	
		
			
				|  |  | +        #expand("aligned/{replicate}_R{i}.bam", replicate=REPLICATES, i=[1,2]),
 | 
	
		
			
				|  |  | +        #第三步: 构建接触矩阵
 | 
	
		
			
				|  |  | +        #expand("Matrix/Raw/{replicate}.cool", replicate=REPLICATES),
 | 
	
		
			
				|  |  | +        #expand("Matrix/Merged/{sample}.cool", sample=SAMPLES),
 | 
	
		
			
				|  |  | +        #expand("Matrix/Normalized/{sample}.cool", sample=SAMPLES),
 | 
	
		
			
				|  |  | +        #第四步: 格式转换
 | 
	
		
			
				|  |  | +        expand("Matrix/Final/{sample}.{format}", sample=SAMPLES, format=["mcool"]),
 | 
	
		
			
				|  |  | +        #第五步: 接触矩阵质控
 | 
	
		
			
				|  |  | +        "Matrix/Raw/hicQC.html",
 | 
	
		
			
				|  |  | +        "Matrix/Raw/SCC.csv",
 | 
	
		
			
				|  |  | +        expand("Matrix/Final/{sample}_res.csv", sample=SAMPLES),
 | 
	
		
			
				|  |  | +        #第六步: AB compartment, TADs, Loops
 | 
	
		
			
				|  |  | +        expand("Matrix/Diff/{resolution}/{contrast}.cool", resolution=config["TAD_resolution"], contrast=CONTRASTS),
 | 
	
		
			
				|  |  | +        expand("Compartments/CALDER2/{resolution}/{sample}/sub_compartments/all_sub_compartments.bed", resolution=config["Compartment_resolution"], sample=SAMPLES),
 | 
	
		
			
				|  |  | +        expand("TADs/HiCExplorer/{resolution}/{sample}_domains.bed", resolution=config["TAD_resolution"], sample=SAMPLES),
 | 
	
		
			
				|  |  | +        expand("TADs/HiCExplorer/{resolution}/{contrast}_accepted.diff_tad", resolution=config["TAD_resolution"],contrast=CONTRASTS),
 | 
	
		
			
				|  |  | +        expand("Loops/Mustache/{resolution}/{sample}.tsv", resolution=config["Loop_resolution"], sample=SAMPLES),
 | 
	
		
			
				|  |  | +        expand("Loops/Mustache/{resolution}/{contrast}.diffloop1", resolution=config["Loop_resolution"], contrast=CONTRASTS),
 | 
	
		
			
				|  |  | +        # 可视化
 | 
	
		
			
				|  |  | +        expand("plots/{resolution}/{sample}_plotmatrix_allchr.png", resolution=config["Plot_matrix_resolution"], sample=SAMPLES),
 | 
	
		
			
				|  |  | +        expand("plots/{resolution}/{sample}_plotmatrix_perchr.png", resolution=config["Plot_matrix_resolution"], sample=SAMPLES)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | + 
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +#            第一步: 测序数据的过滤和质控                #
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 使用fastp进行原始数据质控和过滤,自动判断 SE?PE?
 | 
	
		
			
				|  |  | +rule fastp_quality_control:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        read1="raw_data/{replicate}_R1.fastq.gz", 
 | 
	
		
			
				|  |  | +        read2="raw_data/{replicate}_R2.fastq.gz"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        read1="clean_data/{replicate}_R1.fastq.gz", 
 | 
	
		
			
				|  |  | +        read2="clean_data/{replicate}_R2.fastq.gz", 
 | 
	
		
			
				|  |  | +        html_report="clean_data/{replicate}_fastp.html",
 | 
	
		
			
				|  |  | +        json_report="clean_data/{replicate}_fastp.json"
 | 
	
		
			
				|  |  | +    log: "logs/{replicate}_fastp_quality_control.log"
 | 
	
		
			
				|  |  | +    threads: 3
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/commonTools_20240327.sif"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        fastp=config["fastp"]
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        fastp -i {input.read1} -o {output.read1} -I {input.read2} -O {output.read2} --html {output.html_report} --json {output.json_report} --thread {threads} {params.fastp} 1>{log} 2>&1
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 合并fastp质控结果生成表格
 | 
	
		
			
				|  |  | +rule combine_fastp_reports:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        expand("clean_data/{replicate}_fastp.json", replicate=REPLICATES)
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        "clean_data/fastp_summary.tsv"
 | 
	
		
			
				|  |  | +    log: "logs/combine_fastp_reports.log"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        hicsnake_home=HICSNAKE_HOME
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        Rscript {params.hicsnake_home}/scripts/combine_fastp_report.R --input {input} --output clean_data/ 1>{log} 2>&1
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +#             第二步:比对到参考基因组                    #
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +# 根据基因组构建 bwa index
 | 
	
		
			
				|  |  | +rule bwa_index:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        genome = "ref/" + GENOME + ".fa"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        genome_index = "ref/genome.bwt"
 | 
	
		
			
				|  |  | +    log: "logs/bwa_index.log"
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/commonTools_20240327.sif"
 | 
	
		
			
				|  |  | +    threads: 6
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        "bwa index -p ref/genome {input.genome} 1>{log} 2>&1"
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 使用 bwa 比对 
 | 
	
		
			
				|  |  | +rule bwa:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        genome_index="ref/genome.bwt",
 | 
	
		
			
				|  |  | +        read="clean_data/{replicate}_R{i}.fastq.gz", 
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        sam="aligned/{replicate}_R{i}.sam"
 | 
	
		
			
				|  |  | +    threads: 20
 | 
	
		
			
				|  |  | +    log: "logs/{replicate}_R{i}_bwa.log"
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/commonTools_20240327.sif"
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        bwa mem -A1 -B4  -E50 -L0 -t {threads} -o {output.sam} ref/genome {input.read} 2>{log}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +rule sam2bam:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        sam="aligned/{replicate}_R{i}.sam"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        bam="aligned/{replicate}_R{i}.bam"
 | 
	
		
			
				|  |  | +    threads: 6
 | 
	
		
			
				|  |  | +    log: "logs/{replicate}_R{i}_sam2bam.log"
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/commonTools_20240327.sif"
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        samtools view -hb --threads {threads} -o {output.bam} {input.sam}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +#             第三步: 构建接触矩阵                       #
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 生成酶切位点
 | 
	
		
			
				|  |  | +rule hicFindRestSite:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        genome="ref/" + GENOME + ".fa"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        rest_site="ref/rest_site.bed"
 | 
	
		
			
				|  |  | +    log: "logs/hicFindRestSite.log"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        restriction_sequence=config["restriction_sequence"]
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        hicFindRestSite --fasta {input.genome} --searchPattern {params.restriction_sequence} -o {output.rest_site} 1>{log} 2>&1
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 生成 cool 格式矩阵 hicBuildMatrix
 | 
	
		
			
				|  |  | +rule hicBuildMatrix:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        read1_bam="aligned/{replicate}_R1.bam",
 | 
	
		
			
				|  |  | +        read2_bam="aligned/{replicate}_R2.bam",
 | 
	
		
			
				|  |  | +        rest_site="ref/rest_site.bed"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        bam="Matrix/Raw/{replicate}.bam",
 | 
	
		
			
				|  |  | +        cool="Matrix/Raw/{replicate}.cool",
 | 
	
		
			
				|  |  | +        log="Matrix/Raw/{replicate}/QC.log"
 | 
	
		
			
				|  |  | +    log: "logs/{replicate}_hicBuildMatrix.log"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        genome="ref/" + GENOME + ".fa",
 | 
	
		
			
				|  |  | +        base_resolution=BASE_RESOLUTION,
 | 
	
		
			
				|  |  | +        hicBuildMatrix=config["hicBuildMatrix"]
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
 | 
	
		
			
				|  |  | +    threads: 6
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        hicBuildMatrix --samFiles {input.read1_bam} {input.read2_bam} \
 | 
	
		
			
				|  |  | +            --genomeAssembly {params.genome} \
 | 
	
		
			
				|  |  | +            --outBam {output.bam} \
 | 
	
		
			
				|  |  | +            --outFileName {output.cool} \
 | 
	
		
			
				|  |  | +            --QCfolder Matrix/Raw/{wildcards.replicate} \
 | 
	
		
			
				|  |  | +            --restrictionCutFile {input.rest_site} \
 | 
	
		
			
				|  |  | +            --binSize {params.base_resolution} \
 | 
	
		
			
				|  |  | +            --threads {threads} {params.hicBuildMatrix} 1>{log} 2>&1
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 合并生物学重复的矩阵
 | 
	
		
			
				|  |  | +rule hicSumMatrices:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        cools=lambda wildcards: expand("Matrix/Raw/{replicate}.cool", replicate=SAMPLES2REPLICATES[wildcards.sample])
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        cool="Matrix/Merged/{sample}.cool"
 | 
	
		
			
				|  |  | +    log: "logs/{sample}_hicSumMatrices.log"
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
 | 
	
		
			
				|  |  | +    threads: 3
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        if [ $(echo {input.cools} | wc -w) -gt 1 ]; then
 | 
	
		
			
				|  |  | +            hicSumMatrices --matrices {input.cools} --outFileName {output.cool} 1>{log} 2>&1
 | 
	
		
			
				|  |  | +        else
 | 
	
		
			
				|  |  | +            cp {input.cools[0]} {output.cool} 1>{log} 2>&1
 | 
	
		
			
				|  |  | +        fi
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 归一化接触矩阵
 | 
	
		
			
				|  |  | +rule hicNormalize:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        cools=expand("Matrix/Merged/{sample}.cool", sample=SAMPLES, allow_missing=True)
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        cools=expand("Matrix/Normalized/{sample}.cool", sample=SAMPLES, allow_missing=True)
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        hicNormalize=config["hicNormalize"]
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
 | 
	
		
			
				|  |  | +    threads: 1
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        if [ $(echo {input.cools} | wc -w) -gt 1 ]; then
 | 
	
		
			
				|  |  | +            hicNormalize --matrices {input.cools} --outFileName {output.cools} {params.hicNormalize}
 | 
	
		
			
				|  |  | +        else
 | 
	
		
			
				|  |  | +            cp {input.cools[0]} {output.cools[0]}
 | 
	
		
			
				|  |  | +        fi
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +#                 第四步: 格式转换                       #
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 转换为 mcool 格式
 | 
	
		
			
				|  |  | +rule cooler_zoomify:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        cool="Matrix/Normalized/{sample}.cool"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        mcool="Matrix/Final/{sample}.mcool"
 | 
	
		
			
				|  |  | +    log: "logs/{sample}_cooler_zoomify.log"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        resolutions=",".join(map(str, RESOLUTIONS))
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/cooler_20240402.sif"
 | 
	
		
			
				|  |  | +    threads: 4
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        cooler zoomify {input.cool} -n {threads} --resolutions {params.resolutions} -o Matrix/Final/{wildcards.sample}_nobalanced.mcool
 | 
	
		
			
				|  |  | +        cooler zoomify {input.cool} -n {threads} --balance --resolutions {params.resolutions} -o {output.mcool}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 转换为 ginteractions 格式
 | 
	
		
			
				|  |  | +rule convert_to_ginteractions:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        cool="Matrix/Normalized/{sample}.cool"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        ginteractions="Matrix/Final/{sample}.ginteractions.tsv"
 | 
	
		
			
				|  |  | +    log: "logs/{sample}_hicConvertFormat_ginteractions.log"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        resolutions=RESOLUTIONS,
 | 
	
		
			
				|  |  | +        prefix="Matrix/Final/{sample}.ginteractions"
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
 | 
	
		
			
				|  |  | +    threads: 1
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        hicConvertFormat --matrices {input.cool} --outFileName {params.prefix} --inputFormat cool --outputFormat ginteractions 1>{log} 2>&1
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 转换为 hic 格式
 | 
	
		
			
				|  |  | +rule convert_to_hic:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        ginteractions="Matrix/Final/{sample}.ginteractions.tsv",
 | 
	
		
			
				|  |  | +        chrom_sizes = "ref/" + GENOME + ".chrom.sizes"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        hic="Matrix/Final/{sample}.hic"
 | 
	
		
			
				|  |  | +    log: "logs/{sample}_convert_to_hic.log"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        resolutions=",".join(str(r) for r in RESOLUTIONS)
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/juicer_20240401"
 | 
	
		
			
				|  |  | +    threads: 1
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        awk -F "\\t" '{{print 0, $1, $2, 0, 0, $4, $5, 1, $7}}' {input.ginteractions} > Matrix/Final/{wildcards.sample}.short
 | 
	
		
			
				|  |  | +        sort -k2,2d -k6,6d Matrix/Final/{wildcards.sample}.short > Matrix/Final/{wildcards.sample}.short.sorted
 | 
	
		
			
				|  |  | +        java -Xms6g -Xmx40g -jar /opt/juicer_tools.jar pre -r {params.resolutions} Matrix/Final/{wildcards.sample}.short.sorted {output.hic} {input.chrom_sizes}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +#                    第五步: 接触矩阵质控                #
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 使用 hicQC 进行REPLICATE 层次质控
 | 
	
		
			
				|  |  | +rule hicQC:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        logs=expand("Matrix/Raw/{replicate}/QC.log", replicate=REPLICATES)
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        html="Matrix/Raw/hicQC.html"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        REPLICATES=REPLICATES
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
 | 
	
		
			
				|  |  | +    threads: 20
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        hicQC --logfiles {input.logs} --labels {params.REPLICATES} --outputFolder Matrix/Raw
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 计算 SCC
 | 
	
		
			
				|  |  | +rule hicrep:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        cools=expand("Matrix/Raw/{replicate}.cool", replicate=REPLICATES)
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        scc="Matrix/Raw/SCC.csv"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        replicates=REPLICATES,
 | 
	
		
			
				|  |  | +        hicrep=config["hicrep"]
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicrep_20240423.sif"
 | 
	
		
			
				|  |  | +    threads: 4
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        python scripts/scc.py --workers {threads} {params.hicrep} --output {output.scc} --sample_names {params.replicates} --files {input.cools}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 评估数据分辨率
 | 
	
		
			
				|  |  | +rule estimate_resolution:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        mcool="Matrix/Final/{sample}.mcool"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        res="Matrix/Final/{sample}_res.csv"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        hicres=config["hicres"]
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/cooler_20240402.sif"
 | 
	
		
			
				|  |  | +    threads: 4
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        python scripts/hicres.py {input.mcool} --threads {threads} --output {output.res} {params.hicres}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# hicPlotDistVsCounts: 距离衰减曲线, 查看样本/重复差异
 | 
	
		
			
				|  |  | +rule hicPlotDistVsCounts:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        cool=expand("Matrix/Raw/{replicate}.cool", replicate=REPLICATES)
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        pdf="Matrix/Raw/DistVsCounts.pdf"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        replicates=REPLICATES,
 | 
	
		
			
				|  |  | +        hicPlotDistVsCounts=config["hicPlotDistVsCounts"]
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
 | 
	
		
			
				|  |  | +    threads: 2
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        hicPlotDistVsCounts --matrices {input.cool} --labels {params.replicates} --plotFile {output.pdf} {params.hicPlotDistVsCounts}
 | 
	
		
			
				|  |  | +        """ 
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +#                第七步: 三维结构预测                    #
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +rule calder2:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        mcool="Matrix/Final/{sample}.mcool",
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        "Compartments/CALDER2/{resolution}/{sample}/sub_compartments/all_sub_compartments.bed"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        calder=config["calder"],
 | 
	
		
			
				|  |  | +        resolution="{resolution}"
 | 
	
		
			
				|  |  | +    threads: 4
 | 
	
		
			
				|  |  | +    log: "logs/{sample}_{resolution}_calder2.log"
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        Rscript scripts/calder -i {input.mcool} -t mcool -b {params.resolution} -p {threads} -o Compartments/CALDER2/{wildcards.resolution}/{wildcards.sample} {params.calder}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 使用 HiCExplorer 检测 TADs
 | 
	
		
			
				|  |  | +rule hicFindTADs:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        mcool="Matrix/Final/{sample}.mcool"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        boundaries="TADs/HiCExplorer/{resolution}/{sample}_boundaries.bed",
 | 
	
		
			
				|  |  | +        domains="TADs/HiCExplorer/{resolution}/{sample}_domains.bed",
 | 
	
		
			
				|  |  | +        score="TADs/HiCExplorer/{resolution}/{sample}_score.bedgraph"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        hicFindTADs=config["hicFindTADs"],
 | 
	
		
			
				|  |  | +        prefix="TADs/HiCExplorer/{resolution}/{sample}",
 | 
	
		
			
				|  |  | +        resolution="{resolution}"
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
 | 
	
		
			
				|  |  | +    threads: 4
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        hicFindTADs -p {threads} --matrix {input.mcool}::/resolutions/{params.resolution} --outPrefix {params.prefix} {params.hicFindTADs}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 使用 HiCExplorer 检测差异 TADs
 | 
	
		
			
				|  |  | +rule hicDifferentialTAD:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        treatment_mcool="Matrix/Final/{treatment}.mcool",
 | 
	
		
			
				|  |  | +        control_mcool="Matrix/Final/{control}.mcool",
 | 
	
		
			
				|  |  | +        treatment_tads="TADs/HiCExplorer/{resolution}/{treatment}_domains.bed"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        accepted_diff_tad="TADs/HiCExplorer/{resolution}/{treatment}_vs_{control}_accepted.diff_tad"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        hicDifferentialTAD=config["hicDifferentialTAD"],
 | 
	
		
			
				|  |  | +        prefix="TADs/HiCExplorer/{resolution}/{treatment}_vs_{control}",
 | 
	
		
			
				|  |  | +        resolution="{resolution}"
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
 | 
	
		
			
				|  |  | +    threads: 4
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        hicDifferentialTAD --targetMatrix {input.treatment_mcool}::/resolutions/{params.resolution} --controlMatrix {input.control_mcool}::/resolutions/{params.resolution} --tadDomains {input.treatment_tads} --outFileNamePrefix {params.prefix} {params.hicDifferentialTAD} --threads {threads}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 使用 mustache 检测 Loops
 | 
	
		
			
				|  |  | +rule mustache:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        mcool="Matrix/Final/{sample}.mcool"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        loops="Loops/Mustache/{resolution}/{sample}.tsv"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        mustache=config["mustache"],
 | 
	
		
			
				|  |  | +        resolution="{resolution}"
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/mustache_20240410.sif"
 | 
	
		
			
				|  |  | +    threads: 6
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        mustache --file {input.mcool} --resolution {params.resolution} --outfile {output.loops} --processes {threads} {params.mustache}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 使用 mustache 检测差异 Loops
 | 
	
		
			
				|  |  | +rule diff_mustache:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        treatment_mcool="Matrix/Final/{treatment}.mcool",
 | 
	
		
			
				|  |  | +        control_mcool="Matrix/Final/{control}.mcool",
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        diffloop1="Loops/Mustache/{resolution}/{treatment}_vs_{control}.diffloop1",
 | 
	
		
			
				|  |  | +        diffloop2="Loops/Mustache/{resolution}/{treatment}_vs_{control}.diffloop2"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        diff_mustache=config["diff_mustache"],
 | 
	
		
			
				|  |  | +        prefix="Loops/Mustache/{resolution}/{treatment}_vs_{control}",
 | 
	
		
			
				|  |  | +        resolution="{resolution}"
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/mustache_20240410.sif"
 | 
	
		
			
				|  |  | +    threads: 6
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        python /opt/conda/lib/python3.12/site-packages/mustache/diff_mustache.py -f1 {input.treatment_mcool} -f2 {input.control_mcool} --resolution {params.resolution} --outfile {params.prefix} --processes {threads} {params.diff_mustache}
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +#                       可视化                           #
 | 
	
		
			
				|  |  | +#********************************************************#
 | 
	
		
			
				|  |  | +rule hicPlotMatrix:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        mcool="Matrix/Final/{sample}.mcool"
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        allchr_png="plots/{resolution}/{sample}_plotmatrix_allchr.png",
 | 
	
		
			
				|  |  | +        perchr_png="plots/{resolution}/{sample}_plotmatrix_perchr.png"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        resolution="{resolution}"
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        hicPlotMatrix -m {input.mcool}::/resolutions/{params.resolution} -o {output.allchr_png} --log1p --colorMap YlOrRd --clearMaskedBins --log1p
 | 
	
		
			
				|  |  | +        hicPlotMatrix -m {input.mcool}::/resolutions/{params.resolution} -o {output.perchr_png} --log1p --colorMap YlOrRd --clearMaskedBins --perChromosome --log1p
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 使用 hicCompareMatrices 比较矩阵
 | 
	
		
			
				|  |  | +rule hicCompareMatrices:
 | 
	
		
			
				|  |  | +    input:
 | 
	
		
			
				|  |  | +        treatment_mcool="Matrix/Final/{treatment}.mcool",
 | 
	
		
			
				|  |  | +        control_mcool="Matrix/Final/{control}.mcool",
 | 
	
		
			
				|  |  | +    output:
 | 
	
		
			
				|  |  | +        cool="Matrix/Diff/{resolution}/{treatment}_vs_{control}.cool"
 | 
	
		
			
				|  |  | +    params:
 | 
	
		
			
				|  |  | +        resolution="{resolution}"
 | 
	
		
			
				|  |  | +    singularity:
 | 
	
		
			
				|  |  | +        HICSNAKE_HOME + "/sifs/hicexplorer_20240327.sif"
 | 
	
		
			
				|  |  | +    threads: 6
 | 
	
		
			
				|  |  | +    shell:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        hicCompareMatrices --matrices {input.treatment_mcool}::resolutions/{params.resolution} {input.control_mcool}::resolutions/{params.resolution} --operation log2ratio -o {output.cool}
 | 
	
		
			
				|  |  | +        """
 |