blast_ui.blastn.inc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. <?php
  2. /**
  3. * @file
  4. * Contains all functions for the Nucleotide BLAST
  5. */
  6. /**
  7. * Nucleotide BLAST Submission Form
  8. *
  9. * @see blast_nucleotide_form_validate()
  10. * @see blast_nucleotide_form_submit()
  11. */
  12. function blast_nucleotide_form($form, &$form_state) {
  13. // CSS support to the form
  14. $form['#attached']['css'] = array(
  15. drupal_get_path('module', 'blast_ui') . '/css/form.css',
  16. );
  17. // NUCLEOTIDE QUERY
  18. //.........................
  19. $form['query'] = array(
  20. '#type' => 'fieldset',
  21. '#title' => t('Enter Query Sequence'),
  22. '#description' => t('Enter one or more queries in the top text box or use the browse button to upload a file from your local disk. The file may contain a single sequence or a list of sequences. In both cases, the data must be in FASTA format. <a href="http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml" target="_blank">More information..</a> '),
  23. '#collapsible' => TRUE,
  24. '#collapsed' => FALSE,
  25. );
  26. $form['query']['example_sequence'] = array(
  27. '#type' => 'button',
  28. '#button_type'=> 'button',
  29. '#limit_validation_errors' => array(),
  30. '#value' => t('Example Sequence'),
  31. '#prefix' => '<div class="center">',
  32. '#suffix' => '</div>',
  33. '#validate' => array(),
  34. '#ajax' => array(
  35. 'callback' => 'ajax_nucleotide_text_area_callback',
  36. 'wrapper' => 'fasta_seq',
  37. 'method' => 'replace',
  38. 'effect' => 'fade',
  39. ),
  40. '#attributes' => array('onclick' => 'return false;'),
  41. );
  42. // Textfield for submitting a mult-FASTA query
  43. $form['query']['FASTA'] = array(
  44. '#type' => 'textarea',
  45. '#title' => t('Enter FASTA sequence(s)'),
  46. '#description'=>t('Enter query sequence(s) in the text area.'),
  47. '#prefix' => '<div id="fasta_seq">',
  48. '#suffix' => '</div>',
  49. );
  50. // Upload a file as an alternative to enter a query sequence
  51. $form['#attributes']['enctype'] = 'multipart/form-data';
  52. $form['query']['UPLOAD'] = array(
  53. '#prefix' => 'Or upload your query files: ',
  54. '#type' => 'file',
  55. '#description' => t('The file should be a plain-text FASTA file and not a .doc, .docx, etc. It cannot be greater than 10 Mb in size.'),
  56. );
  57. // BLAST DATABASE
  58. //.........................
  59. $form['DB'] = array(
  60. '#type' => 'fieldset',
  61. '#title' => t('Choose Search Set'),
  62. '#description' => t('Choose from one of the nucleotide BLAST databases listed below. You can also use the browse button to upload a file from your local disk. The file may contain a single sequence or a list of sequences. '),
  63. '#collapsible' => TRUE,
  64. '#collapsed' => FALSE,
  65. );
  66. $options = get_blast_database_options('n');
  67. $form['DB']['SELECT_DB'] = array(
  68. '#type' => 'select',
  69. '#title' => t('Nucleotide BLAST Databases:'),
  70. '#options' => $options,
  71. '#default_value' => 0,
  72. );
  73. // Upload a file as an alternative to enter a query sequence
  74. $form['#attributes']['enctype'] = 'multipart/form-data';
  75. $form['DB']['DBUPLOAD'] = array(
  76. '#prefix' => 'Or upload your own dataset: ',
  77. '#type' => 'file',
  78. '#description' => t('The file should be a plain-text FASTA file and not a .doc, .docx, etc. It cannot be greater than 10 Mb in size.'),
  79. );
  80. // ALGORITHM PARAMETERS
  81. //.........................
  82. $form['ALG'] = array(
  83. '#type' => 'fieldset',
  84. '#title' => t('Algorithm parameters'),
  85. '#collapsible' => TRUE,
  86. '#collapsed' => TRUE,
  87. );
  88. // General parameters
  89. $form['ALG']['GParam'] = array(
  90. '#type' => 'fieldset',
  91. '#title' => t('General parameters'),
  92. '#collapsible' => FALSE,
  93. );
  94. $form['ALG']['GParam']['maxTarget'] = array(
  95. '#type' => 'select',
  96. '#title' => t('Max target sequences:'),
  97. '#options' => array(
  98. 0 => t('10'),
  99. 1 => t('50'),
  100. 2 => t('100'),
  101. 3 => t('250'),
  102. 4 => t('500'),
  103. 5 => t('1000'),
  104. 6 => t('5000'),
  105. 7 => t('10000'),
  106. 8 => t('20000'),
  107. ),
  108. '#default_value' => 2,
  109. '#description' => t('Select the maximum number of aligned sequences to display'),
  110. );
  111. $form['ALG']['GParam']['shortQueries'] = array(
  112. '#type' => 'checkbox',
  113. '#title' => t('Automatically adjust parameters for short input sequences'),
  114. '#default_value' => TRUE,
  115. );
  116. $form['ALG']['GParam']['eVal'] = array(
  117. '#type' => 'textfield',
  118. '#title' => t('e-Value (Expected Threshold)'),
  119. '#default_value' => 10,
  120. '#size' => 12,
  121. '#maxlength' => 20,
  122. '#description' => t('Expected number of chance matches in a random model. This number should be give in a decimal format. <a href="http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml#expect" target="_blank">More Information</a> | <a href="https://www.youtube.com/watch?v=nO0wJgZRZJs" target="_blank">Expect value vedio tutorial</a>'),
  123. );
  124. $form['ALG']['GParam']['wordSize'] = array(
  125. '#type' => 'select',
  126. '#title' => t('Word size:'),
  127. '#options' => array(
  128. 0 => t('16'),
  129. 1 => t('20'),
  130. 2 => t('24'),
  131. 3 => t('28'),
  132. 4 => t('32'),
  133. 5 => t('48'),
  134. 6 => t('64'),
  135. 7 => t('128'),
  136. 8 => t('256'),
  137. ),
  138. '#default_value' => 3,
  139. '#description' => t('The length of the seed that initiates an alignment'),
  140. );
  141. $form['ALG']['GParam']['qRange'] = array(
  142. '#type' => 'textfield',
  143. '#title' => t('Max matches in a query range'),
  144. '#default_value' => 0,
  145. '#size' => 12,
  146. '#maxlength' => 20,
  147. '#description' => t('Limit the number of matches to a query range. This option is useful if many strong matches to one part of a query may prevent BLAST from presenting weaker matches to another part of the query.'),
  148. );
  149. // Scoring parameters
  150. $form['ALG']['SParam'] = array(
  151. '#type' => 'fieldset',
  152. '#title' => t('Scoring parameters'),
  153. '#collapsible' => FALSE,
  154. );
  155. $form['ALG']['SParam']['M&MScores'] = array(
  156. '#type' => 'select',
  157. '#title' => t('Match/Mismatch Scores:'),
  158. '#options' => array(
  159. 0 => t('1,-2'),
  160. 1 => t('1,-3'),
  161. 2 => t('1,-4'),
  162. 3 => t('2,-3'),
  163. 4 => t('4,-5'),
  164. 5 => t('1,-1'),
  165. ),
  166. '#default_value' => 0,
  167. '#description' => t('Reward and penalty for matching and mismatching bases.'),
  168. );
  169. $form['ALG']['SParam']['gapCost'] = array(
  170. '#type' => 'select',
  171. '#title' => t('Gap Costs:'),
  172. '#options' => array(
  173. 0 => t('Existence: 5 Extension: 2'),
  174. 1 => t('Existence: 2 Extension: 2'),
  175. 2 => t('Existence: 1 Extension: 2'),
  176. 3 => t('Existence: 0 Extension: 2'),
  177. 4 => t('Existence: 3 Extension: 1'),
  178. 5 => t('Existence: 2 Extension: 1'),
  179. 6 => t('Existence: 1 Extension: 1'),
  180. ),
  181. '#default_value' => 0,
  182. '#description' => t('Cost to create and extend a gap in an alignment. Linear costs are available only with megablast and are determined by the match/mismatch scores.'),
  183. );
  184. // Submit
  185. $form['submit'] = array(
  186. '#type' => 'submit',
  187. '#default_value' => ' BLAST ',
  188. );
  189. return $form;
  190. }
  191. /**
  192. * Form validation handler for blast_nucleotide_form().
  193. *
  194. * @see blast_nucleotide_form().
  195. */
  196. function blast_nucleotide_form_validate($form, &$form_state) {
  197. // Validate FASTA seq in textfield
  198. $fastaSeq = $form_state['input']['FASTA'];
  199. if (isset($fastaSeq)) {
  200. if (_validateFasta($fastaSeq)){
  201. form_set_error('nBLAST', t('Error: Failed to read the Blast query: Wrong format provided for FASTA nucleotide sequence'));
  202. }
  203. else {
  204. $form_state['qFlag'] = 'seqQuery';
  205. }
  206. }
  207. // Validate query upload
  208. $upQuery = file_save_upload('UPLOAD', array('file_validate_extensions' => array('txt fasta fa fna')), FILE_EXISTS_RENAME);
  209. if ($upQuery) {
  210. $upQuery_uri = $upQuery->uri;
  211. $form_state['upQuery_path'] = drupal_realpath($upQuery_uri);
  212. $upQuery_content = file_get_contents($form_state['upQuery_path']);
  213. if (_validateFasta($upQuery_content)){
  214. form_set_error('nBLAST', t('Error: Failed to upload the Blast query: Wrong format provided for FASTA nucleotide sequence'));
  215. }
  216. else {
  217. $form_state['qFlag'] = 'upQuery';
  218. }
  219. }
  220. // Validate blast database upload
  221. $upDB = file_save_upload('DBUPLOAD', array('file_validate_extensions' => array('txt fasta fa fna')), FILE_EXISTS_RENAME);
  222. if ($upDB) {
  223. $upDB_uri = $upDB->uri;
  224. $form_state['upDB_path'] = drupal_realpath($upDB_uri);
  225. $upDB_content = file_get_contents($form_state['upDB_path']);
  226. if (_validateFasta($upDB_content)){
  227. form_set_error('DB', t('Error: Failed to upload the Blast subject sequence file: Wrong format provided for FASTA nucleotide sequence'));
  228. }
  229. else {
  230. $form_state['dbFlag'] = 'upQuery';
  231. }
  232. }
  233. else {
  234. $form_state['dbFlag'] = 'blastdb';
  235. }
  236. }
  237. /**
  238. * Form submission handler for blast_nucleotide_form().
  239. *
  240. * @see blast_nucleotide_form().
  241. */
  242. function blast_nucleotide_form_submit($form, &$form_state) {
  243. $eVal = $form_state['values']['eVal'];
  244. $trgtKey = $form_state['values']['maxTarget'];
  245. $numAlign = $form['ALG']['GParam']['maxTarget']['#options'][$trgtKey];
  246. $wsKey = $form_state['values']['wordSize'];
  247. $wordSize = $form['ALG']['GParam']['wordSize']['#options'][$wsKey];
  248. // Expand Gap Cost key into open and extend penalties
  249. $gapKey = $form_state['values']['gapCost'];
  250. switch ($gapKey) {
  251. case 0:
  252. $gapOpen = 5;
  253. $gapExtend = 2;
  254. break;
  255. case 1:
  256. $gapOpen = 2;
  257. $gapExtend = 2;
  258. break;
  259. case 2:
  260. $gapOpen = 1;
  261. $gapExtend = 2;
  262. break;
  263. case 3:
  264. $gapOpen = 0;
  265. $gapExtend = 2;
  266. break;
  267. case 4:
  268. $gapOpen = 3;
  269. $gapExtend = 1;
  270. break;
  271. case 5:
  272. $gapOpen = 2;
  273. $gapExtend = 1;
  274. break;
  275. case 6:
  276. $gapOpen = 1;
  277. $gapExtend = 1;
  278. break;
  279. }
  280. // Epand Match/Mismatch option into penalty/reward values
  281. // @todo Amir: Is the switch supposed to be for $scoreKey?
  282. $scoreKey = $form_state['values']['M&MScores'];
  283. switch ($gapKey) {
  284. case 0:
  285. $penalty = -2;
  286. $reward = 1;
  287. break;
  288. case 1:
  289. $penalty = -3;
  290. $reward = 1;
  291. break;
  292. case 2:
  293. $penalty = -4;
  294. $reward = 1;
  295. break;
  296. case 3:
  297. $penalty = -3;
  298. $reward = 2;
  299. break;
  300. case 4:
  301. $penalty = -5;
  302. $reward = 4;
  303. break;
  304. case 5:
  305. $penalty = -1;
  306. $reward = 1;
  307. break;
  308. }
  309. // If the query was submitted via the texrfield then create a file containing it
  310. if ( isset($form_state['qFlag']) ) {
  311. if ( $form_state['qFlag'] == 'seqQuery' ) {
  312. $seq_content = $form_state['values']['FASTA'];
  313. $query = "/tmp/user__query_file.fasta";
  314. file_put_contents ( $query , $seq_content);
  315. }
  316. elseif ( $form_state['qFlag'] == 'upQuery' ) {
  317. $query = $form_state['upQuery_path'];
  318. }
  319. }
  320. // If the BLAST database was uploaded then use it to run the BLAST
  321. if ( $form_state['dbFlag'] == 'upQuery') {
  322. // Since we only support using the -db flag (not -subject) we need to create a
  323. // blast database for the FASTA uploaded.
  324. // NOTE: We can't support subject because we need to generate the ASN.1+ format
  325. // to provide multiple download type options from the same BLAST
  326. $blastdb_with_path = $form_state['upDB_path'];
  327. system("makeblastdb -in $blastdb_with_path -dbtype nucl -parse_seqids");
  328. //$blast_subj_cmd = "blastn -query $query -subject $subjectSeq -out sites/default/files/$subSeqOut.blastn.html -evalue $eVal -word_size $wordSize -gapopen $gapOpen -gapextend $gapExtend -penalty $penalty -reward $reward -num_alignments 100 -html";
  329. //system($blast_subj_cmd);
  330. }
  331. // Otherwise, we are using one of the website provided BLAST databases so form the
  332. // BLAST command accordingly
  333. elseif ($form_state['dbFlag'] == 'blastdb') {
  334. $selected_db = $form_state['values']['SELECT_DB'];
  335. $blastdb_node = node_load($selected_db);
  336. $blastdb_with_path = $blastdb_node->db_path;
  337. }
  338. // Actually submit the BLAST Tripal Job
  339. // NOTE: Tripal jobs needs to be executed from the command-line before it will be run!!
  340. $blastdb_with_suffix = $blastdb_with_path . '.nsq';
  341. if (is_readable($blastdb_with_suffix)) {
  342. global $user;
  343. $output_filestub = date('YMd_His');
  344. $job_args = array(
  345. 'program' => 'blastn',
  346. 'query' => $query,
  347. 'database' => $blastdb_with_path,
  348. 'output_filename' => $output_filestub,
  349. 'options' => array(
  350. 'evalue' => $eVal,
  351. 'word_size' => $wordSize,
  352. 'gapopen' => $gapOpen,
  353. 'gapextend' => $gapExtend,
  354. 'penalty' => $penalty,
  355. 'reward' => $reward
  356. )
  357. );
  358. $job_id = tripal_add_job("BLAST (blastn): $query",'blast_job','run_BLAST_tripal_job', $job_args, $user->uid);
  359. // Redirect to the BLAST results page
  360. drupal_goto("blast/report/$job_id");
  361. }
  362. else {
  363. $dbfile_uploaded_msg = ($form_state['dbFlag'] == 'upQuery') ? 'The BLAST database was submitted via user upload.' : 'Existing BLAST Database was chosen';
  364. tripal_report_error(
  365. 'blast_ui',
  366. TRIPAL_ERROR,
  367. "BLAST database %db unaccessible. $dbfile_uploaded_msg",
  368. array('%db' => $blastdb_with_path)
  369. );
  370. drupal_set_message('BLAST database unaccessible. Please contact the site administrator.','error');
  371. }
  372. }
  373. // call back function for example sequence
  374. function ajax_nucleotide_text_area_callback($form, $form_state) {
  375. $element = $form['query']['FASTA']; // Get example Protein sequence
  376. $element['#value'] = '>lipoxygenase Glyma15g03040
  377. TTTCGTATGA GATTAAAATG TGTGAAATTT TGTTTGATAG GACATGGGAA AGGAAAAGTT GGAAAGGCTA CAAATTTAAG AGGACAAGTG TCGTTACCAA CCTTGGGAGC TGGCGAAGAT GCATACGATG TTCATTTTGA ATGGGACAGT GACTTCGGAA TTCCCGGTGC ATTTTACATT AAGAACTTCA TGCAAGTTGA GTTCTATCTC AAGTCTCTAA CTCTCGAAGA CATTCCAAAC CACGGAACCA TTCACTTCGT ATGCAACTCC TGGGTTTACA ACTCAAAATC CTACCATTCT GATCGCATTT TCTTTGCCAA CAATGTAAGC TACTTAAATA CTGTTATACA TTGTCTAACA TCTTGTTAGA GTCTTGCATG ATGTGTACCG TTTATTGTTG TTGTTGAACT TTACCACATG GCATGGATGC AAAAGTTGTT ATACACATAA ATTATAATGC AGACATATCT TCCAAGCGAG ACACCGGCTC CACTTGTCAA GTACAGAGAA GAAGAATTGA AGAATGTAAG AGGGGATGGA ACTGGTGAGC GCAAGGAATG GGATAGGATC TATGATTATG ATGTCTACAA TGACTTGGGC GATCCAGATA AGGGTGAAAA GTATGCACGC CCCGTTCTTG GAGGTTCTGC CTTACCTTAC CCTCGCAGAG GAAGAACCGG AAGAGGAAAA ACTAGAAAAG GTTTCTCACT AGTCACTAAT TTATTACTTT TTAATGTTTG TTTTTAGGCA TCTTTTCTGA TGAAATGTAT ACTTTTGATG TTTTTTTGTT TTAGCATAAC TGAATTAGTA AAGTGTGTTG TGTTCCTTAG AAGTTAGAAA AGTACTAAGT ATAAGGTCTT TGAGTTGTCG TCTTTATCTT AACAGATCCC AACAGTGAGA AGCCCAGTGA TTTTGTTTAC CTTCCGAGAG ATGAAGCATT TGGTCACTTG AAGTCATCAG ATTTTCTCGT TTATGGAATC AAATCAGTGG CTCAAGACGT CTTGCCCGTG TTGACTGATG CGTTTGATGG CAATCTTTTG AGCCTTGAGT TTGATAACTT TGCTGAAGTG CGCAAACTCT ATGAAGGTGG AGTTACACTA CCTACAAACT TTCTTAGCAA GATCGCCCCT ATACCAGTGG TCAAGGAAAT TTTTCGAACT GATGGCGAAC AGTTCCTCAA GTATCCACCA CCTAAAGTGA TGCAGGGTAT GCTACATATT TTGAATATGT AGAATATTAT CAATATACTC CTGTTTTTAT TCAACATATT TAATCACATG GATGAATTTT TGAACTGTTA TTATTTGGTG AAGTGGATAA GTCTGCATGG ATGACTGATG AAGAATTTGC TAGAGAAACG ATTGCTGGTG TTAATCCAAA TGTCATTAAG ATTCTTGAGG TAACAAACTA CATGCAACCA AAAGCTTGGA TGCACTTTGG TTGTCAAATT TGAGATTGCT AACCTGATGT CATTGTGTGC TAAATATATT CCAGGAGTTC CCACCACGTA GCAAGCTAGA TTCTCAAGCC TACGGTGATC ATACCTCTAT AATAACAAAA CAACACTTGG AGCCTAACTT GGGTGGGCTC ACCGTTGAGC AGGTAATGAT ATTAGTATTT AGTTTCACAT TAAAGTTATT TCACTATAAT GTTTCTGCTA CTGCTTTGTA ACTATTTTTG TTTTTTTATT TTTTTATTGT GATTCCAATG AACAACAATC AACCAATTAA TTAATTAATT AATTATACTT GGTTGAACAG GCTATCCAGA GCAAGAAGTT GTTCATCTTA GATCACCATG ACTATCTCAT TCCATATTTG AGGAAAATAA ATGCAACTAC CACAAAGACT TACGCTACAA GAACCATATT TTTCTTGAAA AGTGATGGAA CCTTGACGCC ATTGGCCATT GAGTTAAGTA AGCCGCATCC TCAGGGTGAA GGATATGGTC CTGTTAGCGA AGTCTACGTG CCTTCGAGCG AGGGAGTTGA AGCTTACATT TGGTTACTGG CAAAGGCTTA TGTTGTTGTG AATGATTCGT GCTACCATCA ACTCGTCAGC CACTGGTATG ATATTGTCAA TTGGCAATGA GATGCATACA ATTAAATTTG CCAATATATT TATAAATAGA TGATTAACAT ATTAACGTGG ATGGCAGGCT AAACACTCAT GCGGTTGTTG AGCCATTCGT CATAGCAACA AACAGGCATC TGAGCGTGGT TCACCCTATT TACAAACTTT TGTTTCCTCA CTATCGTGAC ACCATGAACA TTAATTCACT TGCCCGCAAA TCCTTGGTCA ATGCAGACGG TATTATAGAG AAAACATTCT TGTGGGGTAG GTACTCTTTG GAAATGTCTG CTGTTATTTA CAAGGACTGG GTTTTCACTG ATCAAGCATT GCCTAATGAT CTTGTCAAGA GGTATGTAAT CTTGTAAAGT TTATTAACAA ATTAGAAATA TATATTGCTG AAAAAATAAA GAATCTAAAT CTATTTAAGT CATCAATGAC ATAAGAAGAA AAAAAGGCCT CAGGCTTGAT ATAACTCTTG TGCTAATCAA TGTTATTTGG GTGCTTTTAA TTCAGAGGAG TTGCGGTTAA GGATCCATCT GCTCCCCATG GAGTTCGGCT TTTGATTGAG GACTATCCTT ATGCTTCTGA TGGGCTAGAG ATATGGGATG CCATCAAGTC CTGGGTGCAT GAATATGTCT CATTCTACTA CAAGTCAGAT GCAGCAATTC AACAAGATCC CGAGCTCCAA GCTTGGTGGA AAGAACTTGT CCAAGTGGGT CATGGTGATT TGAAAGATAA GCCATGGTGG CAAAAGATGC AAACTCGTGA AGAGTTGATT GAAGCCTCGG CTACCCTCGT ATGGATCGCT TCAGCCCTTC ATGCAGCTGT TAACTTTGGA CAGTATCCAT ATGGAGGTTT AATCCTAAAC AGGCCAACCA TTAGCAGGAG ATTCATGCCT GAGAAAGGGT CTGCTGAGTA TGCAGCTTTG GCTAAGAACC CTGAGAAGGA GTTTTTGAAG ACTATTACTG GCAAGAAGGA GACCCTCATT GACCTTACAA TTATAGAAAT CTTGTCAAGG CACACATCTG ATGAGTTCTA CCTTGGGGAG AGAGATGGTG GTGACTATTG GACTTCTGAT GCTGGTCCAC TAGAGGCCTT CAAGAGGTTT GGAAAGAAGC TTCAAGAGAT TGAACAAAAG CTTATACAGA AGAACAAGGA CGAGACGTTG AGAAACCGCA GTGGGCCGGC TAAAATGCCT TACACTTTGC TCTATCCTTC TAGTGAGGAG GGATTGACTT TCAGAGGAAT TCCCAACAGT ATCTCCATCT AGGGGCTCTA TGGTTGTTGC AGTACTTTCC CTGGTTTTCA ATAAAAGTGA ATAGAAGGAA TATGATTCCT TCATCTACCT ATGTATGGTT AAGTTATGTA TTTGGGGTGA TTGAATGTTC TCGTGTTTGC TGTTAAGTGT TAATGAAGAA ATTCTGTTTG TATAAATTGC TGTTAAGTGT TCTCGCGCAT GTTTCAACAA TGATAGCTGG CATGGCAAAG TGCAAACTAG AATAAATCAT CATCTTATGT AAACGATTTC CATTACAGCA AGGGTTGTCC TTAAAATTTC ATTGCCCGTG TATTCATATT TTTGCATTGC CTCAAAGTTA CCACTTCCTC ACCTTAGTAT CTATTTTCTT AAAG';
  378. return $element;
  379. }
  380. /**
  381. * FASTA validating parser
  382. *
  383. * @param $sequence
  384. * A string of characters to be validated. A sequence in FASTA format begins with a single-line description, followed by lines of sequence data.
  385. * The description line is distinguished from the sequence data by a greater-than (">") symbol in the first column.
  386. * The word following the ">" symbol is the identifier of the sequence, and the rest of the line is the description (both are optional).
  387. * There should be no space between the ">" and the first letter of the identifier. The sequence ends if another line starting with a ">" appears;
  388. * this indicates the start of another sequence.
  389. *
  390. * @return
  391. * Return a boolean. 1 if the sequence does not pass the format valifation stage and 0 otherwise.
  392. *
  393. */
  394. function _validateFasta($sequence) {
  395. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  396. $fastaSeqRegEx = '/[^acgntuACGNTU\n\r]/';
  397. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  398. $flag = 1;
  399. } else {
  400. $flag = 0;
  401. }
  402. return $flag;
  403. }