ValidateFastaTest.php 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. <?php
  2. namespace Tests;
  3. use StatonLab\TripalTestSuite\DBTransaction;
  4. use StatonLab\TripalTestSuite\TripalTestCase;
  5. class ValidateFastaTest extends TripalTestCase {
  6. // Uncomment to auto start and rollback db transactions per test method.
  7. // use DBTransaction;
  8. /**
  9. * Test validate_fasta_sequence() detects invalid sequence.
  10. *
  11. * @dataProvider provideInvalidFasta
  12. */
  13. public function testInvalidFasta($type, $sequence) {
  14. $this->assertFalse(validate_fasta_sequence($type, $sequence),
  15. "Failed to detect invalid FASTA sequence.");
  16. }
  17. /**
  18. * Test validate_fasta_sequence() works for valid sequence.
  19. *
  20. * @dataProvider provideValidFasta
  21. */
  22. public function testValidFasta($type, $sequence) {
  23. $this->assertTrue(validate_fasta_sequence($type, $sequence),
  24. "Unable to pass valid fasta sequence");
  25. }
  26. /**
  27. * Provide invalid fasta entries to test.
  28. */
  29. public function provideInvalidFasta() {
  30. return [
  31. ['nucleotide',
  32. '>good first record
  33. ATCGACTAGCTACGATCGACTAGCAGTCAGTACTGACGTACTACGATCGACTAGCATGCT
  34. GCATCGATCGATCGACTATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGACT
  35. > space before definition
  36. ATTTGGGTGGTTGGGGTTCCCCCACACCGTGGGGTTTGGCAAAACCCGTGGGGCCACACA
  37. CACACCCCTGGGGTGTGACCCGTAACGCGAATATGCGCGATATTACGGCGCGCGATATTA'
  38. ],
  39. ['nucleotide',
  40. '>Has incorrect characters
  41. ACGTGCATGCATCGACACTACGACTACGACTACTAGCTCGACTGATCGACATCGATCGAT
  42. ACGTCAGCTACGGCGCGGCATGATCGAZZZZZZZZZZZZZZZZZZTCGCAACCCCTTTCA'
  43. ],
  44. ['nucleotide',
  45. '>Has special characters
  46. AACGTACGATCGACTAGCTACGATCGATCGACTAGCTAGCATCGATCGATCGATCGATCG
  47. AATTGTCAACGT%%#^&^&%%$$#@@!#%^&*()@ACACGTAGCTAGGCCAACGTGCAAA'
  48. ],
  49. ['nucleotide',
  50. '>Has numbers
  51. ACGTACGATCGATCGACTAGCTAGCATCGATCGATCGATCAGCATCGATCGATCGACTCG
  52. ACGTACGATCGATCGA2425345345ACGATCGACTAGCTAGCATCGATCGATCAGTCAG'
  53. ],
  54. ['protein',
  55. '>good first record
  56. ACGTURYKMSWBDACGTURYKMSWBDHACGTURYKMSWBDHVNXACGTURYKMSWBDHVNXVNXHVNX
  57. ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXX
  58. > space before identifier
  59. ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXX
  60. ACGTURYKMSWBDHVNACGTACGTURYKMACGTURYKMSWBDHVNXSWBDHVNXURYKMSWBDHVNXX'
  61. ],
  62. ['protein',
  63. '>Has incorrect characters
  64. ACGTURYKMSWBDACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXHVNX
  65. ACGTURYKMSWBDHVACGTURYKMSWBZZZZZZZZZZZZZZZZZZDHVNACGTURYKMSWBDHVNXNZ'
  66. ],
  67. ['protein',
  68. '>Has special characters
  69. ACGTURYKMSACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXWBDHVNX
  70. ACGTURYKMSWBACGTURYKMSWB%#$%#$%#$^$%^%&^%&*&(*)()(!!#@#$#^DHVNXDHVNX'
  71. ],
  72. ['protein',
  73. '>Has numbers
  74. ACGTURYKMSWBDHVACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXNX
  75. ACGTURYKMSWACGTURYKMSWB13212324ACGTURYKMSWBDHVNX324443556DHVNXBDHVNX'
  76. ],
  77. ];
  78. }
  79. /**
  80. * Provide Valid FASTA entries to test.
  81. */
  82. public function provideValidFasta() {
  83. return [
  84. ['nucleotide',
  85. '>identifier description
  86. ACGACTGTACGCGAGCTACGTACGTAGCATGCATCGATCGATCGATCGATCGATCGATC
  87. AGCTAGTCAGCATCGATGCATGCATCGACGTAATCGAGCGTAGCGAGCTAGTCATACGT'
  88. ],
  89. ['nucleotide',
  90. '>identifier
  91. ACGTACGATCGATCGATCGATCGATCGATCGACATGCTACGATCGATCGATCGATCGG'
  92. ],
  93. ['nucleotide',
  94. '>mutlifasta description
  95. AGCTAGCATCGATCGATCAGCTAGCATCGATCGATCGACTAGCTAGCATCGATCGATC
  96. CGATCGATCAGCTAGCTACGATCGATCGATCGATCGACTAGCTACGATCGATCGATCG
  97. CGATCGACTAGCGTACGATCGATCGATCGATCGATCGATCGATCACGATCAGCTACGT
  98. >multifasta description
  99. GCTACGATCGATCAGCTATCGACTATCGACGTATCGATGGAGTCATGCAGTCATGCAG
  100. ACGCTACGATCAGCGTACGATCGATCGATCGATCGATCGATCGATCGATCGGTGCGTC'
  101. ],
  102. ['nucleotide',
  103. '>with spaces in sequence
  104. ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA
  105. ACAGCATCAGCTAG ACGATCAGCTAGCTACGATCGATCG ACTGATCAGCATGCAT
  106. AACGTACGATCAGCTAGCATGCAT ATCGATCAGCTAGCTACGATCGATCGATCAG'
  107. ],
  108. ['nucleotide',
  109. '>masked sequence
  110. acgatcgatcgactagctacgatcgacatcgatcatgAACGTGTGGGGTGTGTGCAa
  111. cagctagctagcatcgatcgatcgatcagctagcatgctacgagtcagcatcgtgca'
  112. ],
  113. ['nucleotide',
  114. '>gi|123456|gb|ABC12345.1|description [Genus species]
  115. ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA
  116. ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA',
  117. ],
  118. ['protein',
  119. '>identier description
  120. SVSGIRKVQRAEGPATVLAIGTANPPNCIDQSTYADYYFRVTNSEHMTDLKKKFQRIC
  121. ERTQIKNRHMYLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQP
  122. MSKITHLIFCTTSGVALPGV'
  123. ],
  124. ['protein',
  125. '>identifier
  126. NKDARVLIVCSENTAVTFRGPSETDMDSLVGQALFADGAAAIIIGSDPVPEVEKPIFELV
  127. STDQKLVPGSHGAIGGLLREVGLTFYLNKSVPDIISQ'
  128. ],
  129. ['protein',
  130. '>multifasta description
  131. ERTQIKNRHMYLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQP
  132. MSKITHLIFCTTSGVALPGVDYELIVLLGLDPCVKRYMMYHQGCFAGGTVLRLAKDLAEN
  133. NKDARVLIVCSENTAVTFRGPSETDMDSLVGQ
  134. >multifasta description
  135. NKDARVLIVCSENTAVTFRGPSETDMDSLVGQALFADGAAAIIIGSDPVPEVEKPIFELV
  136. STDQKLVPGSHGAIGGLLREVGLTFYLNKSVPDIISQNINDALNKAFDPLGISDYNSIFW
  137. IAHPGGRAILDQVEQKVNLKPEKMKATRDVLSNYGNMSSACV'
  138. ],
  139. ['protein',
  140. '>with spaces in sequence
  141. AEGPATVLAIGTANPPNCI DQS TY ADYYFRVTNSEHMTDLKKKFQRIC
  142. ERTQIKNRHMYLTEEILK ENPNMC AYKAPS LDAREDMMIREVPRVGKEAATKAIKEWGQP
  143. MSKITHLIFCTTSGVALPGVDYEL IV'
  144. ],
  145. ['protein',
  146. '>masked sequence
  147. GGLLREVGLTFYLNKSVPDIISQNINDALsqngglrevknldynsifwNKAFDPLGISDYNSIFW
  148. IAHPGGRAILDQVEQKVNLKPEKMKATRDVLSNYGNMSSAC'
  149. ],
  150. ['protein',
  151. '>gi|123456|gb|ABC12345.1|description [Genus species]
  152. ERTQIKNRHMYLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQP
  153. MSKITHLIFCTTSGVALPGVDYELIVL'
  154. ],
  155. ['protein',
  156. '>gi|166477|gb|AAA96434.1| resveratrol synthase [Arachis hypogaea]
  157. MVSVSGIRKVQRAEGPATVLAIGTANPPNCIDQSTYADYYFRVTNSEHMTDLKKKFQRICERTQIKNRHM
  158. YLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQPMSKITHLIFCTTSGVALPGV
  159. DYELIVLLGLDPCVKRYMMYHQGCFAGGTVLRLAKDLAENNKDARVLIVCSENTAVTFRGPSETDMDSLV
  160. GQALFADGAAAIIIGSDPVPEVEKPIFELVSTDQKLVPGSHGAIGGLLREVGLTFYLNKSVPDIISQNIN
  161. DALNKAFDPLGISDYNSIFWIAHPGGRAILDQVEQKVNLKPEKMKATRDVLSNYGNMSSACVFFIMDLMR
  162. KRSLEEGLKTTGEGLDWGVLFGFGPGLTIETVVLRSVAI',
  163. ]
  164. ];
  165. }
  166. }