Browse Source

Fix amino acid regex.

Lacey Sanderson 6 years ago
parent
commit
37c955a866
2 changed files with 37 additions and 14 deletions
  1. 1 1
      api/blast_ui.api.inc
  2. 36 13
      tests/ValidateFastaTest.php

+ 1 - 1
api/blast_ui.api.inc

@@ -347,7 +347,7 @@ function validate_fasta_sequence($type, $sequence) {
   //Includes IUPAC codes.
   $fastaSeqRegEx = ($type == 'nucleotide')
                    ? '/^[ATCGNUKMBVSWDYRHatcgnukmbvswdyrh\[\/\]\s\n\r]*$/'
-                   : '/^[acgturykmswbdhvnxACGTURYKMSWBDHVNX\*\-\s\n\r]*$/';
+                   : '/^[acdefghiklmnpqrstvwyACDEFGHIKLMNPQRSTVWY\*\-\s\n\r]*$/';
   $defRegEx      = '/^>\S.*/';
 
   // For each line of the sequence.

+ 36 - 13
tests/ValidateFastaTest.php

@@ -116,33 +116,56 @@ AACGTACGATCAGCTAGCATGCAT   ATCGATCAGCTAGCTACGATCGATCGATCAG'
 acgatcgatcgactagctacgatcgacatcgatcatgAACGTGTGGGGTGTGTGCAa
 cagctagctagcatcgatcgatcgatcagctagcatgctacgagtcagcatcgtgca'
       ],
-      ['protein',
+      ['nucleotide',
+'>gi|123456|gb|ABC12345.1|description [Genus species]
+ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA
+ACGATCGATCGATCGATAGCTACGATCGATCGATCGATCAGCTAGCTACGATCGATCA',
+     ],
+     ['protein',
 '>identier description
-ACGTURYKMSWBDHACGTURYKACGTURYKMSWBACGTURYKNXMSWBDHVNXVNX
-YKMSWACGTURYKMSWBDHVNXBDHVNXACGTURYACGTURYKMSWBDSWBDHATK'
+SVSGIRKVQRAEGPATVLAIGTANPPNCIDQSTYADYYFRVTNSEHMTDLKKKFQRIC
+ERTQIKNRHMYLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQP
+MSKITHLIFCTTSGVALPGV'
       ],
       ['protein',
 '>identifier
-ACGTURYKMSWBACGTURYKMSWBDHVNXDACGTURYKMSWBDHVNXHVACGTURY
-ACGTURYKMSWBDHVNACGTURYKMSWBDACGTURYKMSWBDHVNXHVACGTURXX'
+NKDARVLIVCSENTAVTFRGPSETDMDSLVGQALFADGAAAIIIGSDPVPEVEKPIFELV
+STDQKLVPGSHGAIGGLLREVGLTFYLNKSVPDIISQ'
       ],
       ['protein',
 '>multifasta description
-ACGTURYKMSWBDHVNACGTURYKMSWBDHVNACGTURYKMSWBDHACGTURYKMSWBDHVNXVNXXX
-ACGTURYKMSWBDACGTURYKMSWBACGTURYKMSWBDHACGTURYKMSWBDHVNXVNXDHVNXHVNX
+ERTQIKNRHMYLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQP
+MSKITHLIFCTTSGVALPGVDYELIVLLGLDPCVKRYMMYHQGCFAGGTVLRLAKDLAEN
+NKDARVLIVCSENTAVTFRGPSETDMDSLVGQ
 >multifasta description
-ACGTURYKMSWBACGTURYKMSWBDHVNACGTURYKMSWBDHVACGTURYKMSWBDHVNXNXXDHVNX
-ACGTURYKMSWBDHVNACGTURYKMSACGTURYKMSWBDACGTURYKMSWBDHVNXHVNXWBDHVNXX'
+NKDARVLIVCSENTAVTFRGPSETDMDSLVGQALFADGAAAIIIGSDPVPEVEKPIFELV
+STDQKLVPGSHGAIGGLLREVGLTFYLNKSVPDIISQNINDALNKAFDPLGISDYNSIFW
+IAHPGGRAILDQVEQKVNLKPEKMKATRDVLSNYGNMSSACV'
       ],
       ['protein',
 '>with spaces in sequence
-ACGTURYKMSWBDACGTURY KMSWBDHACGTURYKMSWBDH VACGTURYKMSW   BDHVNXNXVNXHVNX
- ACACGTURYKMSWBDHVGTURY  K MSWBDHVNX ACGTURYKMSWBDH  VACGTURYKMSWBDHVNXNX'
+AEGPATVLAIGTANPPNCI     DQS   TY ADYYFRVTNSEHMTDLKKKFQRIC
+ERTQIKNRHMYLTEEILK ENPNMC  AYKAPS LDAREDMMIREVPRVGKEAATKAIKEWGQP
+MSKITHLIFCTTSGVALPGVDYEL  IV'
       ],
       ['protein',
 '>masked sequence
-acgturykmswbdhvacgturykmswbdhvacgturykmswbdhvnacgturykmswbdhAXNvnxxnxnx
-acgturykmswbacgturykmswbdhvnxacgtuTTUrykmswbdhvnxdacgturykmswbdhvnxhvnx'
+GGLLREVGLTFYLNKSVPDIISQNINDALsqngglrevknldynsifwNKAFDPLGISDYNSIFW
+IAHPGGRAILDQVEQKVNLKPEKMKATRDVLSNYGNMSSAC'
+      ],
+      ['protein',
+'>gi|123456|gb|ABC12345.1|description [Genus species]
+ERTQIKNRHMYLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQP
+MSKITHLIFCTTSGVALPGVDYELIVL'
+      ],
+      ['protein',
+'>gi|166477|gb|AAA96434.1| resveratrol synthase [Arachis hypogaea]
+MVSVSGIRKVQRAEGPATVLAIGTANPPNCIDQSTYADYYFRVTNSEHMTDLKKKFQRICERTQIKNRHM
+YLTEEILKENPNMCAYKAPSLDAREDMMIREVPRVGKEAATKAIKEWGQPMSKITHLIFCTTSGVALPGV
+DYELIVLLGLDPCVKRYMMYHQGCFAGGTVLRLAKDLAENNKDARVLIVCSENTAVTFRGPSETDMDSLV
+GQALFADGAAAIIIGSDPVPEVEKPIFELVSTDQKLVPGSHGAIGGLLREVGLTFYLNKSVPDIISQNIN
+DALNKAFDPLGISDYNSIFWIAHPGGRAILDQVEQKVNLKPEKMKATRDVLSNYGNMSSACVFFIMDLMR
+KRSLEEGLKTTGEGLDWGVLFGFGPGLTIETVVLRSVAI',
       ]
     ];
   }