blast_ui.blastn.inc 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. <?php
  2. /**
  3. * @file
  4. * Contains all functions for the Nucleotide BLAST
  5. */
  6. /**
  7. * Nucleotide BLAST Submission Form
  8. *
  9. * @see blast_nucleotide_form_validate()
  10. * @see blast_nucleotide_form_submit()
  11. */
  12. function blast_nucleotide_form($form, &$form_state) {
  13. // CSS support to the form
  14. $form['#attached']['css'] = array(
  15. drupal_get_path('module', 'blast_ui') . '/css/form.css',
  16. );
  17. // NUCLEOTIDE QUERY
  18. //.........................
  19. $form['query'] = array(
  20. '#type' => 'fieldset',
  21. '#title' => t('Enter Query Sequence'),
  22. '#description' => t('Enter one or more queries in the top text box or use the browse button to upload a file from your local disk. The file may contain a single sequence or a list of sequences. In both cases, the data must be in FASTA format. <a href="http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml" target="_blank">More information..</a> '),
  23. '#collapsible' => TRUE,
  24. '#collapsed' => FALSE,
  25. );
  26. $form['query']['example_sequence'] = array(
  27. '#type' => 'button',
  28. '#button_type'=> 'button',
  29. '#limit_validation_errors' => array(),
  30. '#value' => t('Example Sequence'),
  31. '#prefix' => '<div class="center">',
  32. '#suffix' => '</div>',
  33. '#validate' => array(),
  34. '#ajax' => array(
  35. 'callback' => 'ajax_nucleotide_text_area_callback',
  36. 'wrapper' => 'fasta_seq',
  37. 'method' => 'replace',
  38. 'effect' => 'fade',
  39. ),
  40. '#attributes' => array('onclick' => 'return false;'),
  41. );
  42. // Textfield for submitting a mult-FASTA query
  43. $form['query']['FASTA'] = array(
  44. '#type' => 'textarea',
  45. '#title' => t('Enter FASTA sequence(s)'),
  46. '#description'=>t('Enter query sequence(s) in the text area.'),
  47. '#prefix' => '<div id="fasta_seq">',
  48. '#suffix' => '</div>',
  49. );
  50. // Upload a file as an alternative to enter a query sequence
  51. $form['#attributes']['enctype'] = 'multipart/form-data';
  52. $form['query']['UPLOAD'] = array(
  53. '#title' => 'Or upload your own query FASTA: ',
  54. '#type' => 'managed_file',
  55. '#description' => t('The file should be a plain-text FASTA
  56. (.fasta, .fna, .fa) file. In other words, it cannot have formatting as is the
  57. case with MS Word (.doc, .docx) or Rich Text Format (.rtf). It cannot be greater
  58. than %max_size in size. <strong>Don\'t forget to press the Upload button before
  59. attempting to submit your BLAST.</strong>',
  60. array(
  61. '%max_size' => round(file_upload_max_size() / 1024 / 1024,1) . 'MB'
  62. )
  63. ),
  64. '#upload_validators' => array(
  65. 'file_validate_extensions' => array('fasta fna fa'),
  66. 'file_validate_size' => array(file_upload_max_size()),
  67. ),
  68. );
  69. // BLAST DATABASE
  70. //.........................
  71. $form['DB'] = array(
  72. '#type' => 'fieldset',
  73. '#title' => t('Choose Search Set'),
  74. '#description' => t('Choose from one of the nucleotide BLAST databases listed below. You can also use the browse button to upload a file from your local disk. The file may contain a single sequence or a list of sequences. '),
  75. '#collapsible' => TRUE,
  76. '#collapsed' => FALSE,
  77. );
  78. $options = get_blast_database_options('n');
  79. $form['DB']['SELECT_DB'] = array(
  80. '#type' => 'select',
  81. '#title' => t('Nucleotide BLAST Databases:'),
  82. '#options' => $options,
  83. '#default_value' => 0,
  84. );
  85. // Upload a file as an alternative to selecting an existing BLAST database
  86. $form['#attributes']['enctype'] = 'multipart/form-data';
  87. $form['DB']['DBUPLOAD'] = array(
  88. '#title' => 'Or upload your own dataset: ',
  89. '#type' => 'managed_file',
  90. '#description' => t('The file should be a plain-text FASTA
  91. (.fasta, .fna, .fa) file. In other words, it cannot have formatting as is the
  92. case with MS Word (.doc, .docx) or Rich Text Format (.rtf). It cannot be greater
  93. than %max_size in size. <strong>Don\'t forget to press the Upload button before
  94. attempting to submit your BLAST.</strong>',
  95. array(
  96. '%max_size' => round(file_upload_max_size() / 1024 / 1024,1) . 'MB'
  97. )
  98. ),
  99. '#upload_validators' => array(
  100. 'file_validate_extensions' => array('fasta fna fa'),
  101. 'file_validate_size' => array(file_upload_max_size()),
  102. ),
  103. );
  104. // ALGORITHM PARAMETERS
  105. //.........................
  106. $form['ALG'] = array(
  107. '#type' => 'fieldset',
  108. '#title' => t('Algorithm parameters'),
  109. '#collapsible' => TRUE,
  110. '#collapsed' => TRUE,
  111. );
  112. // General parameters
  113. $form['ALG']['GParam'] = array(
  114. '#type' => 'fieldset',
  115. '#title' => t('General parameters'),
  116. '#collapsible' => FALSE,
  117. );
  118. $form['ALG']['GParam']['maxTarget'] = array(
  119. '#type' => 'select',
  120. '#title' => t('Max target sequences:'),
  121. '#options' => array(
  122. 0 => t('10'),
  123. 1 => t('50'),
  124. 2 => t('100'),
  125. 3 => t('250'),
  126. 4 => t('500'),
  127. 5 => t('1000'),
  128. 6 => t('5000'),
  129. 7 => t('10000'),
  130. 8 => t('20000'),
  131. ),
  132. '#default_value' => 2,
  133. '#description' => t('Select the maximum number of aligned sequences to display'),
  134. );
  135. $form['ALG']['GParam']['shortQueries'] = array(
  136. '#type' => 'checkbox',
  137. '#title' => t('Automatically adjust parameters for short input sequences'),
  138. '#default_value' => TRUE,
  139. );
  140. $form['ALG']['GParam']['eVal'] = array(
  141. '#type' => 'textfield',
  142. '#title' => t('e-Value (Expected Threshold)'),
  143. '#default_value' => 10,
  144. '#size' => 12,
  145. '#maxlength' => 20,
  146. '#description' => t('Expected number of chance matches in a random model. This number should be give in a decimal format. <a href="http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml#expect" target="_blank">More Information</a> | <a href="https://www.youtube.com/watch?v=nO0wJgZRZJs" target="_blank">Expect value vedio tutorial</a>'),
  147. );
  148. $form['ALG']['GParam']['wordSize'] = array(
  149. '#type' => 'select',
  150. '#title' => t('Word size:'),
  151. '#options' => array(
  152. 0 => t('16'),
  153. 1 => t('20'),
  154. 2 => t('24'),
  155. 3 => t('28'),
  156. 4 => t('32'),
  157. 5 => t('48'),
  158. 6 => t('64'),
  159. 7 => t('128'),
  160. 8 => t('256'),
  161. ),
  162. '#default_value' => 3,
  163. '#description' => t('The length of the seed that initiates an alignment'),
  164. );
  165. $form['ALG']['GParam']['qRange'] = array(
  166. '#type' => 'textfield',
  167. '#title' => t('Max matches in a query range'),
  168. '#default_value' => 0,
  169. '#size' => 12,
  170. '#maxlength' => 20,
  171. '#description' => t('Limit the number of matches to a query range. This option is useful if many strong matches to one part of a query may prevent BLAST from presenting weaker matches to another part of the query.'),
  172. );
  173. // Scoring parameters
  174. $form['ALG']['SParam'] = array(
  175. '#type' => 'fieldset',
  176. '#title' => t('Scoring parameters'),
  177. '#collapsible' => FALSE,
  178. );
  179. $form['ALG']['SParam']['M&MScores'] = array(
  180. '#type' => 'select',
  181. '#title' => t('Match/Mismatch Scores:'),
  182. '#options' => array(
  183. 0 => t('1,-2'),
  184. 1 => t('1,-3'),
  185. 2 => t('1,-4'),
  186. 3 => t('2,-3'),
  187. 4 => t('4,-5'),
  188. 5 => t('1,-1'),
  189. ),
  190. '#default_value' => 0,
  191. '#description' => t('Reward and penalty for matching and mismatching bases.'),
  192. );
  193. $form['ALG']['SParam']['gapCost'] = array(
  194. '#type' => 'select',
  195. '#title' => t('Gap Costs:'),
  196. '#options' => array(
  197. 0 => t('Existence: 5 Extension: 2'),
  198. 1 => t('Existence: 2 Extension: 2'),
  199. 2 => t('Existence: 1 Extension: 2'),
  200. 3 => t('Existence: 0 Extension: 2'),
  201. 4 => t('Existence: 3 Extension: 1'),
  202. 5 => t('Existence: 2 Extension: 1'),
  203. 6 => t('Existence: 1 Extension: 1'),
  204. ),
  205. '#default_value' => 0,
  206. '#description' => t('Cost to create and extend a gap in an alignment. Linear costs are available only with megablast and are determined by the match/mismatch scores.'),
  207. );
  208. // Submit
  209. $form['submit'] = array(
  210. '#type' => 'submit',
  211. '#default_value' => ' BLAST ',
  212. );
  213. return $form;
  214. }
  215. /**
  216. * Form validation handler for blast_nucleotide_form().
  217. *
  218. * @see blast_nucleotide_form().
  219. */
  220. function blast_nucleotide_form_validate($form, &$form_state) {
  221. // Validate Query
  222. //----------------
  223. // @todo: We are currently not validating uploaded files are valid FASTA.
  224. // First check to see if we have an upload & if so then validate it.
  225. $file = file_load($form_state['values']['UPLOAD']);
  226. // If the $file is populated then this is a newly uploaded, temporary file.
  227. if (is_object($file)) {
  228. $form_state['qFlag'] = 'upQuery';
  229. $form_state['upQuery_path'] = drupal_realpath($file->uri);
  230. }
  231. // Otherwise there was no file uploaded.
  232. // Check if there was a query sequence entered in the texfield.
  233. elseif (!empty($form_state['input']['FASTA'])) {
  234. // Check to ensure that the query sequence entered is valid FASTA.
  235. if (_validateFasta($form_state['input']['FASTA'])){
  236. form_set_error('nBLAST', t('You need to provide a valid FASTA sequence
  237. for the query.'));
  238. }
  239. else {
  240. $form_state['qFlag'] = 'seqQuery';
  241. }
  242. }
  243. // Otherwise they didn't enter a query!!
  244. else {
  245. form_set_error('query', t('No query sequence given. Only raw sequence or
  246. sequence of type FASTA can be read. Enter sequence in the box provided or
  247. upload a plain text file.'));
  248. }
  249. // Validate Database
  250. //-------------------
  251. // @todo: We are currently not validating uploaded files are valid FASTA.
  252. // First check to see if we have an upload & if so then validate it.
  253. $file = file_load($form_state['values']['DBUPLOAD']);
  254. // If the $file is populated then this is a newly uploaded, temporary file.
  255. if (is_object($file)) {
  256. $form_state['dbFlag'] = 'upDB';
  257. $form_state['upDB_path'] = drupal_realpath($file->uri);
  258. }
  259. // Otherwise there was no file uploaded
  260. // Check if there was a database choosen from the list instead
  261. elseif (!empty($form_state['values']['SELECT_DB'])) {
  262. $form_state['dbFlag'] = 'blastdb';
  263. }
  264. // Otherwise they didn't select a database!!
  265. else {
  266. form_set_error('DB', t('No database selected. Either choose a database from the list or upload one of your own.'));
  267. }
  268. }
  269. /**
  270. * Form submission handler for blast_nucleotide_form().
  271. *
  272. * @see blast_nucleotide_form().
  273. */
  274. function blast_nucleotide_form_submit($form, &$form_state) {
  275. $error = FALSE;
  276. $eVal = $form_state['values']['eVal'];
  277. $trgtKey = $form_state['values']['maxTarget'];
  278. $numAlign = $form['ALG']['GParam']['maxTarget']['#options'][$trgtKey];
  279. $wsKey = $form_state['values']['wordSize'];
  280. $wordSize = $form['ALG']['GParam']['wordSize']['#options'][$wsKey];
  281. // Expand Gap Cost key into open and extend penalties
  282. $gapKey = $form_state['values']['gapCost'];
  283. switch ($gapKey) {
  284. case 0:
  285. $gapOpen = 5;
  286. $gapExtend = 2;
  287. break;
  288. case 1:
  289. $gapOpen = 2;
  290. $gapExtend = 2;
  291. break;
  292. case 2:
  293. $gapOpen = 1;
  294. $gapExtend = 2;
  295. break;
  296. case 3:
  297. $gapOpen = 0;
  298. $gapExtend = 2;
  299. break;
  300. case 4:
  301. $gapOpen = 3;
  302. $gapExtend = 1;
  303. break;
  304. case 5:
  305. $gapOpen = 2;
  306. $gapExtend = 1;
  307. break;
  308. case 6:
  309. $gapOpen = 1;
  310. $gapExtend = 1;
  311. break;
  312. }
  313. // Epand Match/Mismatch option into penalty/reward values
  314. // @todo Amir: Is the switch supposed to be for $scoreKey?
  315. $scoreKey = $form_state['values']['M&MScores'];
  316. switch ($gapKey) {
  317. case 0:
  318. $penalty = -2;
  319. $reward = 1;
  320. break;
  321. case 1:
  322. $penalty = -3;
  323. $reward = 1;
  324. break;
  325. case 2:
  326. $penalty = -4;
  327. $reward = 1;
  328. break;
  329. case 3:
  330. $penalty = -3;
  331. $reward = 2;
  332. break;
  333. case 4:
  334. $penalty = -5;
  335. $reward = 4;
  336. break;
  337. case 5:
  338. $penalty = -1;
  339. $reward = 1;
  340. break;
  341. }
  342. // If the query was submitted via the texrfield then create a file containing it
  343. if ( isset($form_state['qFlag']) ) {
  344. if ( $form_state['qFlag'] == 'seqQuery' ) {
  345. $seq_content = $form_state['values']['FASTA'];
  346. $query = '/tmp/' . date('YMd_His') . '_query.fasta';
  347. file_put_contents ( $query , $seq_content);
  348. }
  349. elseif ( $form_state['qFlag'] == 'upQuery' ) {
  350. $query = $form_state['upQuery_path'];
  351. }
  352. }
  353. // If the BLAST database was uploaded then use it to run the BLAST
  354. if ( $form_state['dbFlag'] == 'upDB') {
  355. // Since we only support using the -db flag (not -subject) we need to create a
  356. // blast database for the FASTA uploaded.
  357. // NOTE: We can't support subject because we need to generate the ASN.1+ format
  358. // to provide multiple download type options from the same BLAST
  359. $blastdb_with_path = $form_state['upDB_path'];
  360. $result = NULL;
  361. exec("makeblastdb -in $blastdb_with_path -dbtype nucl -parse_seqids 2>&1", $result);
  362. // Check that the BLAST database was made correctly.
  363. $result = implode('<br />', $result);
  364. if (preg_match('/Error/', $result)) {
  365. drupal_set_message('Unable to generate a BLAST database from your uploaded
  366. FASTA sequence. Please check that your file is a valid FASTA file and that if
  367. your sequence headers include pipes (i.e.: | ) they adhere to '
  368. . l('NCBI standards.', 'http://www.ncbi.nlm.nih.gov/books/NBK21097/table/A632/?report=objectonly', array('attributes' => array('target' => '_blank'))),
  369. 'error'
  370. );
  371. $error = TRUE;
  372. }
  373. }
  374. // Otherwise, we are using one of the website provided BLAST databases so form the
  375. // BLAST command accordingly
  376. elseif ($form_state['dbFlag'] == 'blastdb') {
  377. $selected_db = $form_state['values']['SELECT_DB'];
  378. $blastdb_node = node_load($selected_db);
  379. $blastdb_with_path = $blastdb_node->db_path;
  380. }
  381. // Actually submit the BLAST Tripal Job
  382. // NOTE: Tripal jobs needs to be executed from the command-line before it will be run!!
  383. $blastdb_with_suffix = $blastdb_with_path . '.nsq';
  384. if (is_readable($blastdb_with_suffix)) {
  385. global $user;
  386. $output_filestub = date('YMd_His');
  387. $job_args = array(
  388. 'program' => 'blastn',
  389. 'query' => $query,
  390. 'database' => $blastdb_with_path,
  391. 'output_filename' => $output_filestub,
  392. 'options' => array(
  393. 'evalue' => $eVal,
  394. 'word_size' => $wordSize,
  395. 'gapopen' => $gapOpen,
  396. 'gapextend' => $gapExtend,
  397. 'penalty' => $penalty,
  398. 'reward' => $reward
  399. )
  400. );
  401. $job_id = tripal_add_job("BLAST (blastn): $query",'blast_job','run_BLAST_tripal_job', $job_args, $user->uid);
  402. // Redirect to the BLAST results page
  403. drupal_goto("blast/report/$job_id");
  404. }
  405. // We check if $error is set to TRUE because if so then the error has already
  406. // been reported.
  407. elseif (!$error) {
  408. $dbfile_uploaded_msg = ($form_state['dbFlag'] == 'upDB') ? 'The BLAST database was submitted via user upload.' : 'Existing BLAST Database was chosen';
  409. tripal_report_error(
  410. 'blast_ui',
  411. TRIPAL_ERROR,
  412. "BLAST database %db unaccessible. $dbfile_uploaded_msg",
  413. array('%db' => $blastdb_with_path)
  414. );
  415. drupal_set_message('BLAST database unaccessible. Please contact the site administrator.','error');
  416. }
  417. }
  418. // call back function for example sequence
  419. function ajax_nucleotide_text_area_callback($form, $form_state) {
  420. $element = $form['query']['FASTA']; // Get example Protein sequence
  421. $element['#value'] = '>lipoxygenase Glyma15g03040
  422. TTTCGTATGA GATTAAAATG TGTGAAATTT TGTTTGATAG GACATGGGAA AGGAAAAGTT GGAAAGGCTA CAAATTTAAG AGGACAAGTG TCGTTACCAA CCTTGGGAGC TGGCGAAGAT GCATACGATG TTCATTTTGA ATGGGACAGT GACTTCGGAA TTCCCGGTGC ATTTTACATT AAGAACTTCA TGCAAGTTGA GTTCTATCTC AAGTCTCTAA CTCTCGAAGA CATTCCAAAC CACGGAACCA TTCACTTCGT ATGCAACTCC TGGGTTTACA ACTCAAAATC CTACCATTCT GATCGCATTT TCTTTGCCAA CAATGTAAGC TACTTAAATA CTGTTATACA TTGTCTAACA TCTTGTTAGA GTCTTGCATG ATGTGTACCG TTTATTGTTG TTGTTGAACT TTACCACATG GCATGGATGC AAAAGTTGTT ATACACATAA ATTATAATGC AGACATATCT TCCAAGCGAG ACACCGGCTC CACTTGTCAA GTACAGAGAA GAAGAATTGA AGAATGTAAG AGGGGATGGA ACTGGTGAGC GCAAGGAATG GGATAGGATC TATGATTATG ATGTCTACAA TGACTTGGGC GATCCAGATA AGGGTGAAAA GTATGCACGC CCCGTTCTTG GAGGTTCTGC CTTACCTTAC CCTCGCAGAG GAAGAACCGG AAGAGGAAAA ACTAGAAAAG GTTTCTCACT AGTCACTAAT TTATTACTTT TTAATGTTTG TTTTTAGGCA TCTTTTCTGA TGAAATGTAT ACTTTTGATG TTTTTTTGTT TTAGCATAAC TGAATTAGTA AAGTGTGTTG TGTTCCTTAG AAGTTAGAAA AGTACTAAGT ATAAGGTCTT TGAGTTGTCG TCTTTATCTT AACAGATCCC AACAGTGAGA AGCCCAGTGA TTTTGTTTAC CTTCCGAGAG ATGAAGCATT TGGTCACTTG AAGTCATCAG ATTTTCTCGT TTATGGAATC AAATCAGTGG CTCAAGACGT CTTGCCCGTG TTGACTGATG CGTTTGATGG CAATCTTTTG AGCCTTGAGT TTGATAACTT TGCTGAAGTG CGCAAACTCT ATGAAGGTGG AGTTACACTA CCTACAAACT TTCTTAGCAA GATCGCCCCT ATACCAGTGG TCAAGGAAAT TTTTCGAACT GATGGCGAAC AGTTCCTCAA GTATCCACCA CCTAAAGTGA TGCAGGGTAT GCTACATATT TTGAATATGT AGAATATTAT CAATATACTC CTGTTTTTAT TCAACATATT TAATCACATG GATGAATTTT TGAACTGTTA TTATTTGGTG AAGTGGATAA GTCTGCATGG ATGACTGATG AAGAATTTGC TAGAGAAACG ATTGCTGGTG TTAATCCAAA TGTCATTAAG ATTCTTGAGG TAACAAACTA CATGCAACCA AAAGCTTGGA TGCACTTTGG TTGTCAAATT TGAGATTGCT AACCTGATGT CATTGTGTGC TAAATATATT CCAGGAGTTC CCACCACGTA GCAAGCTAGA TTCTCAAGCC TACGGTGATC ATACCTCTAT AATAACAAAA CAACACTTGG AGCCTAACTT GGGTGGGCTC ACCGTTGAGC AGGTAATGAT ATTAGTATTT AGTTTCACAT TAAAGTTATT TCACTATAAT GTTTCTGCTA CTGCTTTGTA ACTATTTTTG TTTTTTTATT TTTTTATTGT GATTCCAATG AACAACAATC AACCAATTAA TTAATTAATT AATTATACTT GGTTGAACAG GCTATCCAGA GCAAGAAGTT GTTCATCTTA GATCACCATG ACTATCTCAT TCCATATTTG AGGAAAATAA ATGCAACTAC CACAAAGACT TACGCTACAA GAACCATATT TTTCTTGAAA AGTGATGGAA CCTTGACGCC ATTGGCCATT GAGTTAAGTA AGCCGCATCC TCAGGGTGAA GGATATGGTC CTGTTAGCGA AGTCTACGTG CCTTCGAGCG AGGGAGTTGA AGCTTACATT TGGTTACTGG CAAAGGCTTA TGTTGTTGTG AATGATTCGT GCTACCATCA ACTCGTCAGC CACTGGTATG ATATTGTCAA TTGGCAATGA GATGCATACA ATTAAATTTG CCAATATATT TATAAATAGA TGATTAACAT ATTAACGTGG ATGGCAGGCT AAACACTCAT GCGGTTGTTG AGCCATTCGT CATAGCAACA AACAGGCATC TGAGCGTGGT TCACCCTATT TACAAACTTT TGTTTCCTCA CTATCGTGAC ACCATGAACA TTAATTCACT TGCCCGCAAA TCCTTGGTCA ATGCAGACGG TATTATAGAG AAAACATTCT TGTGGGGTAG GTACTCTTTG GAAATGTCTG CTGTTATTTA CAAGGACTGG GTTTTCACTG ATCAAGCATT GCCTAATGAT CTTGTCAAGA GGTATGTAAT CTTGTAAAGT TTATTAACAA ATTAGAAATA TATATTGCTG AAAAAATAAA GAATCTAAAT CTATTTAAGT CATCAATGAC ATAAGAAGAA AAAAAGGCCT CAGGCTTGAT ATAACTCTTG TGCTAATCAA TGTTATTTGG GTGCTTTTAA TTCAGAGGAG TTGCGGTTAA GGATCCATCT GCTCCCCATG GAGTTCGGCT TTTGATTGAG GACTATCCTT ATGCTTCTGA TGGGCTAGAG ATATGGGATG CCATCAAGTC CTGGGTGCAT GAATATGTCT CATTCTACTA CAAGTCAGAT GCAGCAATTC AACAAGATCC CGAGCTCCAA GCTTGGTGGA AAGAACTTGT CCAAGTGGGT CATGGTGATT TGAAAGATAA GCCATGGTGG CAAAAGATGC AAACTCGTGA AGAGTTGATT GAAGCCTCGG CTACCCTCGT ATGGATCGCT TCAGCCCTTC ATGCAGCTGT TAACTTTGGA CAGTATCCAT ATGGAGGTTT AATCCTAAAC AGGCCAACCA TTAGCAGGAG ATTCATGCCT GAGAAAGGGT CTGCTGAGTA TGCAGCTTTG GCTAAGAACC CTGAGAAGGA GTTTTTGAAG ACTATTACTG GCAAGAAGGA GACCCTCATT GACCTTACAA TTATAGAAAT CTTGTCAAGG CACACATCTG ATGAGTTCTA CCTTGGGGAG AGAGATGGTG GTGACTATTG GACTTCTGAT GCTGGTCCAC TAGAGGCCTT CAAGAGGTTT GGAAAGAAGC TTCAAGAGAT TGAACAAAAG CTTATACAGA AGAACAAGGA CGAGACGTTG AGAAACCGCA GTGGGCCGGC TAAAATGCCT TACACTTTGC TCTATCCTTC TAGTGAGGAG GGATTGACTT TCAGAGGAAT TCCCAACAGT ATCTCCATCT AGGGGCTCTA TGGTTGTTGC AGTACTTTCC CTGGTTTTCA ATAAAAGTGA ATAGAAGGAA TATGATTCCT TCATCTACCT ATGTATGGTT AAGTTATGTA TTTGGGGTGA TTGAATGTTC TCGTGTTTGC TGTTAAGTGT TAATGAAGAA ATTCTGTTTG TATAAATTGC TGTTAAGTGT TCTCGCGCAT GTTTCAACAA TGATAGCTGG CATGGCAAAG TGCAAACTAG AATAAATCAT CATCTTATGT AAACGATTTC CATTACAGCA AGGGTTGTCC TTAAAATTTC ATTGCCCGTG TATTCATATT TTTGCATTGC CTCAAAGTTA CCACTTCCTC ACCTTAGTAT CTATTTTCTT AAAG';
  423. return $element;
  424. }
  425. /**
  426. * FASTA validating parser
  427. *
  428. * @param $sequence
  429. * A string of characters to be validated. A sequence in FASTA format begins with a single-line description, followed by lines of sequence data.
  430. * The description line is distinguished from the sequence data by a greater-than (">") symbol in the first column.
  431. * The word following the ">" symbol is the identifier of the sequence, and the rest of the line is the description (both are optional).
  432. * There should be no space between the ">" and the first letter of the identifier. The sequence ends if another line starting with a ">" appears;
  433. * this indicates the start of another sequence.
  434. *
  435. * @return
  436. * Return a boolean. 1 if the sequence does not pass the format valifation stage and 0 otherwise.
  437. *
  438. */
  439. function _validateFasta($sequence) {
  440. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  441. $fastaSeqRegEx = '/[^acgntuACGNTU\n\r]/';
  442. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  443. $flag = 1;
  444. } else {
  445. $flag = 0;
  446. }
  447. return $flag;
  448. }