import pandas as pd ###################################################### # read samples.txt to dict ###################################################### SAMPLES = pd.read_csv('samples.txt', header=None, sep="\t").set_index(1, drop=False)[0].to_dict() ###################################################### # rules ###################################################### rule all: input: "Quantification/my.gene.counts.matrix", # gene counts matrix "Quantification/my.gene.TMM.EXPR.matrix", # TMP and TMM normalized matrix rule hisat2_build: input: genome = "ref/genome.fasta" output: expand("ref/genome.fasta.{index}.ht2", index = range(1,9)) log: "Mapping/hisat2-build.log" shell: "hisat2-build {input.genome} {input.genome} 1>{log} 2>&1" rule hisat2: input: expand("ref/genome.fasta.{index}.ht2", index = range(1,9)), left = "Data/{sample}_1.fq.gz", right = "Data/{sample}_2.fq.gz" output: bam = "Mapping/{sample}.bam" threads: 6 log: "Mapping/{sample}.log" shell: "hisat2 --new-summary -p {threads} -x ref/genome.fasta" " -1 {input.left} -2 {input.right}" " 2>{log} | " " samtools view -Sb - | " " samtools sort -o {output.bam} -" rule featureCounts: input: bam = "Mapping/{sample}.bam", gtf = "ref/genome.gtf" output: count = "Quantification/{sample}.count" threads: 6 shell: "Rscript script/run-featurecounts.R -b {input.bam} -g {input.gtf} -o " + "Quantification/{wildcards.sample}" rule merge_featureCounts: input: counts = expand("Quantification/{sample}.count", sample = ['Col-0-0h-rep1', 'Col-0-0h-rep2', 'Col-0-24h-rep1', 'Col-0-24h-rep2']) output: gene_expr_matrix = "Quantification/my.gene.TMM.EXPR.matrix", gene_counts_matrix = "Quantification/my.gene.counts.matrix" shell: "perl script/abundance_estimates_to_matrix.pl --est_method featureCounts" " --out_prefix Quantification/my.gene" " {input.counts};"