assertFalse(validate_fasta_sequence($type, $sequence), "Failed to detect invalid FASTA sequence."); } /** * Test validate_fasta_sequence() works for valid sequence. * * @dataProvider provideValidFasta */ public function testValidFasta($type, $sequence) { $this->assertTrue(validate_fasta_sequence($type, $sequence), "Unable to pass valid fasta sequence"); } /** * Provide invalid fasta entries to test. */ public function provideInvalidFasta() { return [ ['nucleotide', '>good first record ATCGACTAGCTACGATCGACTAGCAGTCAGTACTGACGTACTACGATCGACTAGCATGCT GCATCGATCGATCGACTATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGACT > space before definition ATTTGGGTGGTTGGGGTTCCCCCACACCGTGGGGTTTGGCAAAACCCGTGGGGCCACACA CACACCCCTGGGGTGTGACCCGTAACGCGAATATGCGCGATATTACGGCGCGCGATATTA' ], ['nucleotide', '>Has incorrect characters ACGTGCATGCATCGACACTACGACTACGACTACTAGCTCGACTGATCGACATCGATCGAT ACGTCAGCTACGGCGCGGCATGATCGAZZZZZZZZZZZZZZZZZZTCGCAACCCCTTTCA' ], ['nucleotide', '>Has special characters AACGTACGATCGACTAGCTACGATCGATCGACTAGCTAGCATCGATCGATCGATCGATCG AATTGTCAACGT%%#^&^&%%$$#@@!#%^&*()@ACACGTAGCTAGGCCAACGTGCAAA' ], ['nucleotide', '>Has numbers ACGTACGATCGATCGACTAGCTAGCATCGATCGATCGATCAGCATCGATCGATCGACTCG ACGTACGATCGATCGA2425345345ACGATCGACTAGCTAGCATCGATCGATCAGTCAG' ], ['protein', '>good first record ACGTURYKMSWBDACGTURYKMSWBDHACGTURYKMSWBDHVNXACGTURYKMSWBDHVNXVNXHVNX ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXX > space before identifier ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXX ACGTURYKMSWBDHVNACGTACGTURYKMACGTURYKMSWBDHVNXSWBDHVNXURYKMSWBDHVNXX' ], ['protein', '>Has incorrect characters ACGTURYKMSWBDACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXHVNX ACGTURYKMSWBDHVACGTURYKMSWBZZZZZZZZZZZZZZZZZZDHVNACGTURYKMSWBDHVNXNZ' ], ['protein', '>Has special characters ACGTURYKMSACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXWBDHVNX ACGTURYKMSWBACGTURYKMSWB%#$%#$%#$^$%^%&^%&*&(*)()(!!#@#$#^DHVNXDHVNX' ], ['protein', '>Has numbers ACGTURYKMSWBDHVACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXNX ACGTURYKMSWACGTURYKMSWB13212324ACGTURYKMSWBDHVNX324443556DHVNXBDHVNX' ], ]; } /** * Provide Valid FASTA entries to test. */ public function provideValidFasta() { return [ ['nucleotide', '>identifier description ACGACTGTACGCGAGCTACGTACGTAGCATGCATCGATCGATCGATCGATCGATCGATC AGCTAGTCAGCATCGATGCATGCATCGACGTAATCGAGCGTAGCGAGCTAGTCATACGT' ], ['nucleotide', '>identifier ACGTACGATCGATCGATCGATCGATCGATCGACATGCTACGATCGATCGATCGATCGG' ], ['nucleotide', '>mutlifasta description AGCTAGCATCGATCGATCAGCTAGCATCGATCGATCGACTAGCTAGCATCGATCGATC CGATCGATCAGCTAGCTACGATCGATCGATCGATCGACTAGCTACGATCGATCGATCG CGATCGACTAGCGTACGATCGATCGATCGATCGATCGATCGATCACGATCAGCTACGT >multifasta description GCTACGATCGATCAGCTATCGACTATCGACGTATCGATGGAGTCATGCAGTCATGCAG ACGCTACGATCAGCGTACGATCGATCGATCGATCGATCGATCGATCGATCGGTGCGTC' ], ['nucleotide', '>with spaces in sequence ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA ACAGCATCAGCTAG ACGATCAGCTAGCTACGATCGATCG ACTGATCAGCATGCAT AACGTACGATCAGCTAGCATGCAT ATCGATCAGCTAGCTACGATCGATCGATCAG' ], ['nucleotide', '>masked sequence acgatcgatcgactagctacgatcgacatcgatcatgAACGTGTGGGGTGTGTGCAa cagctagctagcatcgatcgatcgatcagctagcatgctacgagtcagcatcgtgca' ], ['nucleotide', '>gi|123456|gb|ABC12345.1|description [Genus species] ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA', ], ['protein', '>identier description SVSGIRKVQRAEGPATVLAIGTANPPNCIDQSTYADYYFRVTNSEHMTDLKKKFQRIC ERTQIKNRHMYLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQP MSKITHLIFCTTSGVALPGV' ], ['protein', '>identifier NKDARVLIVCSENTAVTFRGPSETDMDSLVGQALFADGAAAIIIGSDPVPEVEKPIFELV STDQKLVPGSHGAIGGLLREVGLTFYLNKSVPDIISQ' ], ['protein', '>multifasta description ERTQIKNRHMYLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQP MSKITHLIFCTTSGVALPGVDYELIVLLGLDPCVKRYMMYHQGCFAGGTVLRLAKDLAEN NKDARVLIVCSENTAVTFRGPSETDMDSLVGQ >multifasta description NKDARVLIVCSENTAVTFRGPSETDMDSLVGQALFADGAAAIIIGSDPVPEVEKPIFELV STDQKLVPGSHGAIGGLLREVGLTFYLNKSVPDIISQNINDALNKAFDPLGISDYNSIFW IAHPGGRAILDQVEQKVNLKPEKMKATRDVLSNYGNMSSACV' ], ['protein', '>with spaces in sequence AEGPATVLAIGTANPPNCI DQS TY ADYYFRVTNSEHMTDLKKKFQRIC ERTQIKNRHMYLTEEILK ENPNMC AYKAPS LDAREDMMIREVPRVGKEAATKAIKEWGQP MSKITHLIFCTTSGVALPGVDYEL IV' ], ['protein', '>masked sequence GGLLREVGLTFYLNKSVPDIISQNINDALsqngglrevknldynsifwNKAFDPLGISDYNSIFW IAHPGGRAILDQVEQKVNLKPEKMKATRDVLSNYGNMSSAC' ], ['protein', '>gi|123456|gb|ABC12345.1|description [Genus species] ERTQIKNRHMYLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQP MSKITHLIFCTTSGVALPGVDYELIVL' ], ['protein', '>gi|166477|gb|AAA96434.1| resveratrol synthase [Arachis hypogaea] MVSVSGIRKVQRAEGPATVLAIGTANPPNCIDQSTYADYYFRVTNSEHMTDLKKKFQRICERTQIKNRHM YLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQPMSKITHLIFCTTSGVALPGV DYELIVLLGLDPCVKRYMMYHQGCFAGGTVLRLAKDLAENNKDARVLIVCSENTAVTFRGPSETDMDSLV GQALFADGAAAIIIGSDPVPEVEKPIFELVSTDQKLVPGSHGAIGGLLREVGLTFYLNKSVPDIISQNIN DALNKAFDPLGISDYNSIFWIAHPGGRAILDQVEQKVNLKPEKMKATRDVLSNYGNMSSACVFFIMDLMR KRSLEEGLKTTGEGLDWGVLFGFGPGLTIETVVLRSVAI', ] ]; } }