|
@@ -1,62 +0,0 @@
|
|
|
-import pandas as pd
|
|
|
-
|
|
|
-######################################################
|
|
|
-# read samples.txt to dict
|
|
|
-######################################################
|
|
|
-SAMPLES = pd.read_csv('samples.txt', header=None, sep="\t").set_index(1, drop=False)[0].to_dict()
|
|
|
-
|
|
|
-
|
|
|
-######################################################
|
|
|
-# rules
|
|
|
-######################################################
|
|
|
-rule all:
|
|
|
- input:
|
|
|
- "Quantification/my.gene.counts.matrix", # gene counts matrix
|
|
|
- "Quantification/my.gene.TMM.EXPR.matrix", # TMP and TMM normalized matrix
|
|
|
-rule hisat2_build:
|
|
|
- input:
|
|
|
- genome = "ref/genome.fasta"
|
|
|
- output:
|
|
|
- expand("ref/genome.fasta.{index}.ht2", index = range(1,9))
|
|
|
- log:
|
|
|
- "Mapping/hisat2-build.log"
|
|
|
- shell:
|
|
|
- "hisat2-build {input.genome} {input.genome} 1>{log} 2>&1"
|
|
|
-
|
|
|
-rule hisat2:
|
|
|
- input:
|
|
|
- expand("ref/genome.fasta.{index}.ht2", index = range(1,9)),
|
|
|
- left = "Data/{sample}_1.fq.gz",
|
|
|
- right = "Data/{sample}_2.fq.gz"
|
|
|
- output:
|
|
|
- bam = "Mapping/{sample}.bam"
|
|
|
- threads: 6
|
|
|
- log: "Mapping/{sample}.log"
|
|
|
- shell:
|
|
|
- "hisat2 --new-summary -p {threads} -x ref/genome.fasta"
|
|
|
- " -1 {input.left} -2 {input.right}"
|
|
|
- " 2>{log} | "
|
|
|
- " samtools view -Sb - | "
|
|
|
- " samtools sort -o {output.bam} -"
|
|
|
-
|
|
|
-rule featureCounts:
|
|
|
- input:
|
|
|
- bam = "Mapping/{sample}.bam",
|
|
|
- gtf = "ref/genome.gtf"
|
|
|
- output:
|
|
|
- count = "Quantification/{sample}.count"
|
|
|
- threads: 6
|
|
|
- shell:
|
|
|
- "Rscript script/run-featurecounts.R -b {input.bam} -g {input.gtf} -o " +
|
|
|
- "Quantification/{wildcards.sample}"
|
|
|
-
|
|
|
-rule merge_featureCounts:
|
|
|
- input:
|
|
|
- counts = expand("Quantification/{sample}.count", sample = ['Col-0-0h-rep1', 'Col-0-0h-rep2', 'Col-0-24h-rep1', 'Col-0-24h-rep2'])
|
|
|
- output:
|
|
|
- gene_expr_matrix = "Quantification/my.gene.TMM.EXPR.matrix",
|
|
|
- gene_counts_matrix = "Quantification/my.gene.counts.matrix"
|
|
|
- shell:
|
|
|
- "perl script/abundance_estimates_to_matrix.pl --est_method featureCounts"
|
|
|
- " --out_prefix Quantification/my.gene"
|
|
|
- " {input.counts};"
|