import pandas as pd

######################################################
# read samples.txt to dict
######################################################
SAMPLES = pd.read_csv('samples.txt', header=None, sep="\t").set_index(1, drop=False)[0].to_dict()


######################################################
# rules
######################################################
rule all:
    input:
        "Quantification/my.gene.counts.matrix",             										# gene counts matrix
        "Quantification/my.gene.TMM.EXPR.matrix",           										# TMP and TMM normalized matrix
rule hisat2_build:
    input:
        genome = "ref/genome.fasta"
    output:
        expand("ref/genome.fasta.{index}.ht2", index = range(1,9))
    log:
        "Mapping/hisat2-build.log"
    shell:
        "hisat2-build {input.genome} {input.genome} 1>{log} 2>&1"

rule hisat2:
    input:
        expand("ref/genome.fasta.{index}.ht2", index = range(1,9)),
        left = "Data/{sample}_1.fq.gz",
        right = "Data/{sample}_2.fq.gz"
    output:
        bam = "Mapping/{sample}.bam"
    threads: 6
    log: "Mapping/{sample}.log"
    shell:
        "hisat2 --new-summary -p {threads} -x ref/genome.fasta" 
        " -1 {input.left} -2 {input.right}"
        " 2>{log} | "
        " samtools view -Sb - | "
        " samtools sort -o {output.bam} -" 

rule featureCounts:
    input:
        bam = "Mapping/{sample}.bam",
        gtf = "ref/genome.gtf"
    output:
        count = "Quantification/{sample}.count"
    threads: 6
    shell:
        "Rscript script/run-featurecounts.R -b {input.bam} -g {input.gtf} -o " +
        "Quantification/{wildcards.sample}"

rule merge_featureCounts:
    input:
        counts = expand("Quantification/{sample}.count", sample = ['Col-0-0h-rep1', 'Col-0-0h-rep2', 'Col-0-24h-rep1', 'Col-0-24h-rep2'])
    output:
        gene_expr_matrix = "Quantification/my.gene.TMM.EXPR.matrix",
        gene_counts_matrix = "Quantification/my.gene.counts.matrix"
    shell:
        "perl script/abundance_estimates_to_matrix.pl --est_method featureCounts"
        " --out_prefix Quantification/my.gene"
        " {input.counts};"