|
@@ -0,0 +1,149 @@
|
|
|
+<?php
|
|
|
+namespace Tests;
|
|
|
+
|
|
|
+use StatonLab\TripalTestSuite\DBTransaction;
|
|
|
+use StatonLab\TripalTestSuite\TripalTestCase;
|
|
|
+
|
|
|
+class ValidateFastaTest extends TripalTestCase {
|
|
|
+ // Uncomment to auto start and rollback db transactions per test method.
|
|
|
+ // use DBTransaction;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Test validate_fasta_sequence() detects invalid sequence.
|
|
|
+ *
|
|
|
+ * @dataProvider provideInvalidFasta
|
|
|
+ */
|
|
|
+ public function testInvalidFasta($type, $sequence) {
|
|
|
+ $this->assertFalse(validate_fasta_sequence($type, $sequence),
|
|
|
+ "Failed to detect invalid FASTA sequence.");
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Test validate_fasta_sequence() works for valid sequence.
|
|
|
+ *
|
|
|
+ * @dataProvider provideValidFasta
|
|
|
+ */
|
|
|
+ public function testValidFasta($type, $sequence) {
|
|
|
+ $this->assertTrue(validate_fasta_sequence($type, $sequence),
|
|
|
+ "Unable to pass valid fasta sequence");
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Provide invalid fasta entries to test.
|
|
|
+ */
|
|
|
+ public function provideInvalidFasta() {
|
|
|
+ return [
|
|
|
+ ['nucleotide',
|
|
|
+'>good first record
|
|
|
+ATCGACTAGCTACGATCGACTAGCAGTCAGTACTGACGTACTACGATCGACTAGCATGCT
|
|
|
+GCATCGATCGATCGACTATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGACT
|
|
|
+> space before definition
|
|
|
+ATTTGGGTGGTTGGGGTTCCCCCACACCGTGGGGTTTGGCAAAACCCGTGGGGCCACACA
|
|
|
+CACACCCCTGGGGTGTGACCCGTAACGCGAATATGCGCGATATTACGGCGCGCGATATTA'
|
|
|
+ ],
|
|
|
+ ['nucleotide',
|
|
|
+'>Has incorrect characters
|
|
|
+ACGTGCATGCATCGACACTACGACTACGACTACTAGCTCGACTGATCGACATCGATCGAT
|
|
|
+ACGTCAGCTACGGCGCGGCATGATCGAZZZZZZZZZZZZZZZZZZTCGCAACCCCTTTCA'
|
|
|
+ ],
|
|
|
+ ['nucleotide',
|
|
|
+'>Has special characters
|
|
|
+AACGTACGATCGACTAGCTACGATCGATCGACTAGCTAGCATCGATCGATCGATCGATCG
|
|
|
+AATTGTCAACGT%%#^&^&%%$$#@@!#%^&*()@ACACGTAGCTAGGCCAACGTGCAAA'
|
|
|
+ ],
|
|
|
+ ['nucleotide',
|
|
|
+'>Has numbers
|
|
|
+ACGTACGATCGATCGACTAGCTAGCATCGATCGATCGATCAGCATCGATCGATCGACTCG
|
|
|
+ACGTACGATCGATCGA2425345345ACGATCGACTAGCTAGCATCGATCGATCAGTCAG'
|
|
|
+ ],
|
|
|
+ ['protein',
|
|
|
+'>good first record
|
|
|
+ACGTURYKMSWBDACGTURYKMSWBDHACGTURYKMSWBDHVNXACGTURYKMSWBDHVNXVNXHVNX
|
|
|
+ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXX
|
|
|
+> space before identifier
|
|
|
+ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXX
|
|
|
+ACGTURYKMSWBDHVNACGTACGTURYKMACGTURYKMSWBDHVNXSWBDHVNXURYKMSWBDHVNXX'
|
|
|
+ ],
|
|
|
+ ['protein',
|
|
|
+'>Has incorrect characters
|
|
|
+ACGTURYKMSWBDACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXHVNX
|
|
|
+ACGTURYKMSWBDHVACGTURYKMSWBZZZZZZZZZZZZZZZZZZDHVNACGTURYKMSWBDHVNXNZ'
|
|
|
+ ],
|
|
|
+ ['protein',
|
|
|
+'>Has special characters
|
|
|
+ACGTURYKMSACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXWBDHVNX
|
|
|
+ACGTURYKMSWBACGTURYKMSWB%#$%#$%#$^$%^%&^%&*&(*)()(!!#@#$#^DHVNXDHVNX'
|
|
|
+ ],
|
|
|
+ ['protein',
|
|
|
+'>Has numbers
|
|
|
+ACGTURYKMSWBDHVACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHVNXXXNX
|
|
|
+ACGTURYKMSWACGTURYKMSWB13212324ACGTURYKMSWBDHVNX324443556DHVNXBDHVNX'
|
|
|
+ ],
|
|
|
+ ];
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Provide Valid FASTA entries to test.
|
|
|
+ */
|
|
|
+ public function provideValidFasta() {
|
|
|
+ return [
|
|
|
+ ['nucleotide',
|
|
|
+'>identifier description
|
|
|
+ACGACTGTACGCGAGCTACGTACGTAGCATGCATCGATCGATCGATCGATCGATCGATC
|
|
|
+AGCTAGTCAGCATCGATGCATGCATCGACGTAATCGAGCGTAGCGAGCTAGTCATACGT'
|
|
|
+ ],
|
|
|
+ ['nucleotide',
|
|
|
+'>identifier
|
|
|
+ACGTACGATCGATCGATCGATCGATCGATCGACATGCTACGATCGATCGATCGATCGG'
|
|
|
+ ],
|
|
|
+ ['nucleotide',
|
|
|
+'>mutlifasta description
|
|
|
+AGCTAGCATCGATCGATCAGCTAGCATCGATCGATCGACTAGCTAGCATCGATCGATC
|
|
|
+CGATCGATCAGCTAGCTACGATCGATCGATCGATCGACTAGCTACGATCGATCGATCG
|
|
|
+CGATCGACTAGCGTACGATCGATCGATCGATCGATCGATCGATCACGATCAGCTACGT
|
|
|
+>multifasta description
|
|
|
+GCTACGATCGATCAGCTATCGACTATCGACGTATCGATGGAGTCATGCAGTCATGCAG
|
|
|
+ACGCTACGATCAGCGTACGATCGATCGATCGATCGATCGATCGATCGATCGGTGCGTC'
|
|
|
+ ],
|
|
|
+ ['nucleotide',
|
|
|
+'>with spaces in sequence
|
|
|
+ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA
|
|
|
+ACAGCATCAGCTAG ACGATCAGCTAGCTACGATCGATCG ACTGATCAGCATGCAT
|
|
|
+AACGTACGATCAGCTAGCATGCAT ATCGATCAGCTAGCTACGATCGATCGATCAG'
|
|
|
+ ],
|
|
|
+ ['nucleotide',
|
|
|
+'>masked sequence
|
|
|
+acgatcgatcgactagctacgatcgacatcgatcatgAACGTGTGGGGTGTGTGCAa
|
|
|
+cagctagctagcatcgatcgatcgatcagctagcatgctacgagtcagcatcgtgca'
|
|
|
+ ],
|
|
|
+ ['protein',
|
|
|
+'>identier description
|
|
|
+ACGTURYKMSWBDHACGTURYKACGTURYKMSWBACGTURYKNXMSWBDHVNXVNX
|
|
|
+YKMSWACGTURYKMSWBDHVNXBDHVNXACGTURYACGTURYKMSWBDSWBDHATK'
|
|
|
+ ],
|
|
|
+ ['protein',
|
|
|
+'>identifier
|
|
|
+ACGTURYKMSWBACGTURYKMSWBDHVNXDACGTURYKMSWBDHVNXHVACGTURY
|
|
|
+ACGTURYKMSWBDHVNACGTURYKMSWBDACGTURYKMSWBDHVNXHVACGTURXX'
|
|
|
+ ],
|
|
|
+ ['protein',
|
|
|
+'>multifasta description
|
|
|
+ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHACGTURYKMSWBDHVNXVNXXX
|
|
|
+ACGTURYKMSWBDACGTURYKMSWBACGTURYKMSWBDHACGTURYKMSWBDHVNXVNXDHVNXHVNX
|
|
|
+>multifasta description
|
|
|
+ACGTURYKMSWBACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXDHVNX
|
|
|
+ACGTURYKMSWBDHVNACGTURYKMSACGTURYKMSWBDACGTURYKMSWBDHVNXHVNXWBDHVNXX'
|
|
|
+ ],
|
|
|
+ ['protein',
|
|
|
+'>with spaces in sequence
|
|
|
+ACGTURYKMSWBDACGTURY KMSWBDHACGTURYKMSWBDH VACGTURYKMSW BDHVNXNXVNXHVNX
|
|
|
+ ACACGTURYKMSWBDHVGTURY K MSWBDHVNX ACGTURYKMSWBDH VACGTURYKMSWBDHVNXNX'
|
|
|
+ ],
|
|
|
+ ['protein',
|
|
|
+'>masked sequence
|
|
|
+acgturykmswbdhvacgturykmswbdhvacgturykmswbdhvnacgturykmswbdhAXNvnxxnxnx
|
|
|
+acgturykmswbacgturykmswbdhvnxacgtuTTUrykmswbdhvnxdacgturykmswbdhvnxhvnx'
|
|
|
+ ]
|
|
|
+ ];
|
|
|
+ }
|
|
|
+}
|