ValidateFastaTest.php 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. <?php
  2. namespace Tests;
  3. use StatonLab\TripalTestSuite\DBTransaction;
  4. use StatonLab\TripalTestSuite\TripalTestCase;
  5. class ValidateFastaTest extends TripalTestCase {
  6. // Uncomment to auto start and rollback db transactions per test method.
  7. // use DBTransaction;
  8. /**
  9. * Test validate_fasta_sequence() detects invalid sequence.
  10. *
  11. * @dataProvider provideInvalidFasta
  12. */
  13. public function testInvalidFasta($type, $sequence) {
  14. $this->assertFalse(validate_fasta_sequence($type, $sequence),
  15. "Failed to detect invalid FASTA sequence.");
  16. }
  17. /**
  18. * Test validate_fasta_sequence() works for valid sequence.
  19. *
  20. * @dataProvider provideValidFasta
  21. */
  22. public function testValidFasta($type, $sequence) {
  23. $this->assertTrue(validate_fasta_sequence($type, $sequence),
  24. "Unable to pass valid fasta sequence");
  25. }
  26. /**
  27. * Provide invalid fasta entries to test.
  28. */
  29. public function provideInvalidFasta() {
  30. return [
  31. ['nucleotide',
  32. '>good first record
  33. ATCGACTAGCTACGATCGACTAGCAGTCAGTACTGACGTACTACGATCGACTAGCATGCT
  34. GCATCGATCGATCGACTATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGACT
  35. > space before definition
  36. ATTTGGGTGGTTGGGGTTCCCCCACACCGTGGGGTTTGGCAAAACCCGTGGGGCCACACA
  37. CACACCCCTGGGGTGTGACCCGTAACGCGAATATGCGCGATATTACGGCGCGCGATATTA'
  38. ],
  39. ['nucleotide',
  40. '>Has incorrect characters
  41. ACGTGCATGCATCGACACTACGACTACGACTACTAGCTCGACTGATCGACATCGATCGAT
  42. ACGTCAGCTACGGCGCGGCATGATCGAZZZZZZZZZZZZZZZZZZTCGCAACCCCTTTCA'
  43. ],
  44. ['nucleotide',
  45. '>Has special characters
  46. AACGTACGATCGACTAGCTACGATCGATCGACTAGCTAGCATCGATCGATCGATCGATCG
  47. AATTGTCAACGT%%#^&^&%%$$#@@!#%^&*()@ACACGTAGCTAGGCCAACGTGCAAA'
  48. ],
  49. ['nucleotide',
  50. '>Has numbers
  51. ACGTACGATCGATCGACTAGCTAGCATCGATCGATCGATCAGCATCGATCGATCGACTCG
  52. ACGTACGATCGATCGA2425345345ACGATCGACTAGCTAGCATCGATCGATCAGTCAG'
  53. ],
  54. ['protein',
  55. '>good first record
  56. ACGTURYKMSWBDACGTURYKMSWBDHACGTURYKMSWBDHVNXACGTURYKMSWBDHVNXVNXHVNX
  57. ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXX
  58. > space before identifier
  59. ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXX
  60. ACGTURYKMSWBDHVNACGTACGTURYKMACGTURYKMSWBDHVNXSWBDHVNXURYKMSWBDHVNXX'
  61. ],
  62. ['protein',
  63. '>Has incorrect characters
  64. ACGTURYKMSWBDACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXHVNX
  65. ACGTURYKMSWBDHVACGTURYKMSWBZZZZZZZZZZZZZZZZZZDHVNACGTURYKMSWBDHVNXNZ'
  66. ],
  67. ['protein',
  68. '>Has special characters
  69. ACGTURYKMSACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXWBDHVNX
  70. ACGTURYKMSWBACGTURYKMSWB%#$%#$%#$^$%^%&^%&*&(*)()(!!#@#$#^DHVNXDHVNX'
  71. ],
  72. ['protein',
  73. '>Has numbers
  74. ACGTURYKMSWBDHVACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXNX
  75. ACGTURYKMSWACGTURYKMSWB13212324ACGTURYKMSWBDHVNX324443556DHVNXBDHVNX'
  76. ],
  77. ];
  78. }
  79. /**
  80. * Provide Valid FASTA entries to test.
  81. */
  82. public function provideValidFasta() {
  83. return [
  84. ['nucleotide',
  85. '>identifier description
  86. ACGACTGTACGCGAGCTACGTACGTAGCATGCATCGATCGATCGATCGATCGATCGATC
  87. AGCTAGTCAGCATCGATGCATGCATCGACGTAATCGAGCGTAGCGAGCTAGTCATACGT'
  88. ],
  89. ['nucleotide',
  90. '>identifier
  91. ACGTACGATCGATCGATCGATCGATCGATCGACATGCTACGATCGATCGATCGATCGG'
  92. ],
  93. ['nucleotide',
  94. '>mutlifasta description
  95. AGCTAGCATCGATCGATCAGCTAGCATCGATCGATCGACTAGCTAGCATCGATCGATC
  96. CGATCGATCAGCTAGCTACGATCGATCGATCGATCGACTAGCTACGATCGATCGATCG
  97. CGATCGACTAGCGTACGATCGATCGATCGATCGATCGATCGATCACGATCAGCTACGT
  98. >multifasta description
  99. GCTACGATCGATCAGCTATCGACTATCGACGTATCGATGGAGTCATGCAGTCATGCAG
  100. ACGCTACGATCAGCGTACGATCGATCGATCGATCGATCGATCGATCGATCGGTGCGTC'
  101. ],
  102. ['nucleotide',
  103. '>with spaces in sequence
  104. ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA
  105. ACAGCATCAGCTAG ACGATCAGCTAGCTACGATCGATCG ACTGATCAGCATGCAT
  106. AACGTACGATCAGCTAGCATGCAT ATCGATCAGCTAGCTACGATCGATCGATCAG'
  107. ],
  108. ['nucleotide',
  109. '>masked sequence
  110. acgatcgatcgactagctacgatcgacatcgatcatgAACGTGTGGGGTGTGTGCAa
  111. cagctagctagcatcgatcgatcgatcagctagcatgctacgagtcagcatcgtgca'
  112. ],
  113. ['protein',
  114. '>identier description
  115. ACGTURYKMSWBDHACGTURYKACGTURYKMSWBACGTURYKNXMSWBDHVNXVNX
  116. YKMSWACGTURYKMSWBDHVNXBDHVNXACGTURYACGTURYKMSWBDSWBDHATK'
  117. ],
  118. ['protein',
  119. '>identifier
  120. ACGTURYKMSWBACGTURYKMSWBDHVNXDACGTURYKMSWBDHVNXHVACGTURY
  121. ACGTURYKMSWBDHVNACGTURYKMSWBDACGTURYKMSWBDHVNXHVACGTURXX'
  122. ],
  123. ['protein',
  124. '>multifasta description
  125. ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHACGTURYKMSWBDHVNXVNXXX
  126. ACGTURYKMSWBDACGTURYKMSWBACGTURYKMSWBDHACGTURYKMSWBDHVNXVNXDHVNXHVNX
  127. >multifasta description
  128. ACGTURYKMSWBACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXDHVNX
  129. ACGTURYKMSWBDHVNACGTURYKMSACGTURYKMSWBDACGTURYKMSWBDHVNXHVNXWBDHVNXX'
  130. ],
  131. ['protein',
  132. '>with spaces in sequence
  133. ACGTURYKMSWBDACGTURY KMSWBDHACGTURYKMSWBDH VACGTURYKMSW BDHVNXNXVNXHVNX
  134. ACACGTURYKMSWBDHVGTURY K MSWBDHVNX ACGTURYKMSWBDH VACGTURYKMSWBDHVNXNX'
  135. ],
  136. ['protein',
  137. '>masked sequence
  138. acgturykmswbdhvacgturykmswbdhvacgturykmswbdhvnacgturykmswbdhAXNvnxxnxnx
  139. acgturykmswbacgturykmswbdhvnxacgtuTTUrykmswbdhvnxdacgturykmswbdhvnxhvnx'
  140. ]
  141. ];
  142. }
  143. }