Snakefile.test 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import pandas as pd
  2. ######################################################
  3. # read samples.txt to dict
  4. ######################################################
  5. SAMPLES = pd.read_csv('samples.txt', header=None, sep="\t").set_index(1, drop=False)[0].to_dict()
  6. ######################################################
  7. # rules
  8. ######################################################
  9. rule all:
  10. input:
  11. "Quantification/my.gene.counts.matrix", # gene counts matrix
  12. "Quantification/my.gene.TMM.EXPR.matrix", # TMP and TMM normalized matrix
  13. rule hisat2_build:
  14. input:
  15. genome = "ref/genome.fasta"
  16. output:
  17. expand("ref/genome.fasta.{index}.ht2", index = range(1,9))
  18. log:
  19. "Mapping/hisat2-build.log"
  20. shell:
  21. "hisat2-build {input.genome} {input.genome} 1>{log} 2>&1"
  22. rule hisat2:
  23. input:
  24. expand("ref/genome.fasta.{index}.ht2", index = range(1,9)),
  25. left = "Data/{sample}_1.fq.gz",
  26. right = "Data/{sample}_2.fq.gz"
  27. output:
  28. bam = "Mapping/{sample}.bam"
  29. threads: 6
  30. log: "Mapping/{sample}.log"
  31. shell:
  32. "hisat2 --new-summary -p {threads} -x ref/genome.fasta"
  33. " -1 {input.left} -2 {input.right}"
  34. " 2>{log} | "
  35. " samtools view -Sb - | "
  36. " samtools sort -o {output.bam} -"
  37. rule featureCounts:
  38. input:
  39. bam = "Mapping/{sample}.bam",
  40. gtf = "ref/genome.gtf"
  41. output:
  42. count = "Quantification/{sample}.count"
  43. threads: 6
  44. shell:
  45. "Rscript script/run-featurecounts.R -b {input.bam} -g {input.gtf} -o " +
  46. "Quantification/{wildcards.sample}"
  47. rule merge_featureCounts:
  48. input:
  49. counts = expand("Quantification/{sample}.count", sample = ['Col-0-0h-rep1', 'Col-0-0h-rep2', 'Col-0-24h-rep1', 'Col-0-24h-rep2'])
  50. output:
  51. gene_expr_matrix = "Quantification/my.gene.TMM.EXPR.matrix",
  52. gene_counts_matrix = "Quantification/my.gene.counts.matrix"
  53. shell:
  54. "perl script/abundance_estimates_to_matrix.pl --est_method featureCounts"
  55. " --out_prefix Quantification/my.gene"
  56. " {input.counts};"