blast_ui.blastn.inc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. <?php
  2. /**
  3. * @file
  4. * Contains all functions for the Nucleotide BLAST
  5. */
  6. /**
  7. * Nucleotide BLAST Submission Form
  8. *
  9. * @see blast_nucleotide_form_validate()
  10. * @see blast_nucleotide_form_submit()
  11. */
  12. function blast_nucleotide_form($form, &$form_state) {
  13. // CSS support to the form
  14. $form['#attached']['css'] = array(
  15. drupal_get_path('module', 'blast_ui') . '/css/form.css',
  16. );
  17. // Add the sequence type to the form
  18. $form['sequence_type'] = array(
  19. '#type' => 'hidden',
  20. '#value' => 'nucleotide'
  21. );
  22. // NUCLEOTIDE QUERY
  23. //.........................
  24. $form['query'] = array(
  25. '#type' => 'fieldset',
  26. '#title' => t('Enter Query Sequence'),
  27. '#description' => t('Enter one or more queries in the top text box or use '
  28. . 'the browse button to upload a file from your local disk. The file may '
  29. . 'contain a single sequence or a list of sequences. In both cases, the '
  30. . 'data must be in <a href="@fasta-format-url" target="_blank">FASTA format</a>. ',
  31. array(
  32. '@fasta-format-url' => 'http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml'
  33. )
  34. ),
  35. '#collapsible' => TRUE,
  36. '#collapsed' => FALSE,
  37. );
  38. // Checkbox to show an example.
  39. $form['query']['example_sequence'] = array(
  40. '#type' => 'checkbox',
  41. '#title' => t('Show an Example Sequence'),
  42. '#prefix' => '<span style="float: right;">',
  43. '#suffix' => '</span>',
  44. '#ajax' => array(
  45. 'callback' => 'ajax_blast_ui_example_sequence_callback',
  46. 'wrapper' => 'fasta-textarea',
  47. 'method' => 'replace',
  48. 'effect' => 'fade',
  49. ),
  50. );
  51. // Textfield for submitting a mult-FASTA query
  52. $form['query']['FASTA'] = array(
  53. '#type' => 'textarea',
  54. '#title' => t('Enter FASTA sequence(s)'),
  55. '#description'=>t('Enter query sequence(s) in the text area.'),
  56. '#prefix' => '<div id="fasta-textarea">',
  57. '#suffix' => '</div>',
  58. );
  59. // Upload a file as an alternative to enter a query sequence
  60. $form['#attributes']['enctype'] = 'multipart/form-data';
  61. $form['query']['UPLOAD'] = array(
  62. '#title' => 'Or upload your own query FASTA: ',
  63. '#type' => 'managed_file',
  64. '#description' => t('The file should be a plain-text FASTA
  65. (.fasta, .fna, .fa) file. In other words, it cannot have formatting as is the
  66. case with MS Word (.doc, .docx) or Rich Text Format (.rtf). It cannot be greater
  67. than %max_size in size. <strong>Don\'t forget to press the Upload button before
  68. attempting to submit your BLAST.</strong>',
  69. array(
  70. '%max_size' => round(file_upload_max_size() / 1024 / 1024,1) . 'MB'
  71. )
  72. ),
  73. '#upload_validators' => array(
  74. 'file_validate_extensions' => array('fasta fna fa'),
  75. 'file_validate_size' => array(file_upload_max_size()),
  76. ),
  77. );
  78. // BLAST DATABASE
  79. //.........................
  80. $form['DB'] = array(
  81. '#type' => 'fieldset',
  82. '#title' => t('Choose Search Set'),
  83. '#description' => t('Choose from one of the nucleotide BLAST databases listed below. You can also use the browse button to upload a file from your local disk. The file may contain a single sequence or a list of sequences. '),
  84. '#collapsible' => TRUE,
  85. '#collapsed' => FALSE,
  86. );
  87. $options = get_blast_database_options('n');
  88. $form['DB']['SELECT_DB'] = array(
  89. '#type' => 'select',
  90. '#title' => t('Nucleotide BLAST Databases:'),
  91. '#options' => $options,
  92. '#default_value' => 0,
  93. );
  94. // Upload a file as an alternative to selecting an existing BLAST database
  95. $form['#attributes']['enctype'] = 'multipart/form-data';
  96. $form['DB']['DBUPLOAD'] = array(
  97. '#title' => 'Or upload your own dataset: ',
  98. '#type' => 'managed_file',
  99. '#description' => t('The file should be a plain-text FASTA
  100. (.fasta, .fna, .fa) file. In other words, it cannot have formatting as is the
  101. case with MS Word (.doc, .docx) or Rich Text Format (.rtf). It cannot be greater
  102. than %max_size in size. <strong>Don\'t forget to press the Upload button before
  103. attempting to submit your BLAST.</strong>',
  104. array(
  105. '%max_size' => round(file_upload_max_size() / 1024 / 1024,1) . 'MB'
  106. )
  107. ),
  108. '#upload_validators' => array(
  109. 'file_validate_extensions' => array('fasta fna fa'),
  110. 'file_validate_size' => array(file_upload_max_size()),
  111. ),
  112. );
  113. // ALGORITHM PARAMETERS
  114. //.........................
  115. $form['ALG'] = array(
  116. '#type' => 'fieldset',
  117. '#title' => t('Algorithm parameters'),
  118. '#collapsible' => TRUE,
  119. '#collapsed' => TRUE,
  120. );
  121. // General parameters
  122. $form['ALG']['GParam'] = array(
  123. '#type' => 'fieldset',
  124. '#title' => t('General parameters'),
  125. '#collapsible' => FALSE,
  126. );
  127. $form['ALG']['GParam']['maxTarget'] = array(
  128. '#type' => 'select',
  129. '#title' => t('Max target sequences:'),
  130. '#options' => array(
  131. 0 => t('10'),
  132. 1 => t('50'),
  133. 2 => t('100'),
  134. 3 => t('250'),
  135. 4 => t('500'),
  136. 5 => t('1000'),
  137. 6 => t('5000'),
  138. 7 => t('10000'),
  139. 8 => t('20000'),
  140. ),
  141. '#default_value' => 2,
  142. '#description' => t('Select the maximum number of aligned sequences to display'),
  143. );
  144. $form['ALG']['GParam']['shortQueries'] = array(
  145. '#type' => 'checkbox',
  146. '#title' => t('Automatically adjust parameters for short input sequences'),
  147. '#default_value' => TRUE,
  148. );
  149. $form['ALG']['GParam']['eVal'] = array(
  150. '#type' => 'textfield',
  151. '#title' => t('e-Value (Expected Threshold)'),
  152. '#default_value' => 10,
  153. '#size' => 12,
  154. '#maxlength' => 20,
  155. '#description' => t('Expected number of chance matches in a random model. This number should be give in a decimal format. <a href="http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml#expect" target="_blank">More Information</a> | <a href="https://www.youtube.com/watch?v=nO0wJgZRZJs" target="_blank">Expect value vedio tutorial</a>'),
  156. );
  157. $form['ALG']['GParam']['wordSize'] = array(
  158. '#type' => 'select',
  159. '#title' => t('Word size:'),
  160. '#options' => array(
  161. 0 => t('16'),
  162. 1 => t('20'),
  163. 2 => t('24'),
  164. 3 => t('28'),
  165. 4 => t('32'),
  166. 5 => t('48'),
  167. 6 => t('64'),
  168. 7 => t('128'),
  169. 8 => t('256'),
  170. ),
  171. '#default_value' => 3,
  172. '#description' => t('The length of the seed that initiates an alignment'),
  173. );
  174. $form['ALG']['GParam']['qRange'] = array(
  175. '#type' => 'textfield',
  176. '#title' => t('Max matches in a query range'),
  177. '#default_value' => 0,
  178. '#size' => 12,
  179. '#maxlength' => 20,
  180. '#description' => t('Limit the number of matches to a query range. This option is useful if many strong matches to one part of a query may prevent BLAST from presenting weaker matches to another part of the query.'),
  181. );
  182. // Scoring parameters
  183. $form['ALG']['SParam'] = array(
  184. '#type' => 'fieldset',
  185. '#title' => t('Scoring parameters'),
  186. '#collapsible' => FALSE,
  187. );
  188. $form['ALG']['SParam']['M&MScores'] = array(
  189. '#type' => 'select',
  190. '#title' => t('Match/Mismatch Scores:'),
  191. '#options' => array(
  192. 0 => t('1,-2'),
  193. 1 => t('1,-3'),
  194. 2 => t('1,-4'),
  195. 3 => t('2,-3'),
  196. 4 => t('4,-5'),
  197. 5 => t('1,-1'),
  198. ),
  199. '#default_value' => 0,
  200. '#description' => t('Reward and penalty for matching and mismatching bases.'),
  201. );
  202. $form['ALG']['SParam']['gapCost'] = array(
  203. '#type' => 'select',
  204. '#title' => t('Gap Costs:'),
  205. '#options' => array(
  206. 0 => t('Existence: 5 Extension: 2'),
  207. 1 => t('Existence: 2 Extension: 2'),
  208. 2 => t('Existence: 1 Extension: 2'),
  209. 3 => t('Existence: 0 Extension: 2'),
  210. 4 => t('Existence: 3 Extension: 1'),
  211. 5 => t('Existence: 2 Extension: 1'),
  212. 6 => t('Existence: 1 Extension: 1'),
  213. ),
  214. '#default_value' => 0,
  215. '#description' => t('Cost to create and extend a gap in an alignment. Linear costs are available only with megablast and are determined by the match/mismatch scores.'),
  216. );
  217. // Submit
  218. $form['submit'] = array(
  219. '#type' => 'submit',
  220. '#default_value' => ' BLAST ',
  221. );
  222. return $form;
  223. }
  224. /**
  225. * Form validation handler for blast_nucleotide_form().
  226. *
  227. * @see blast_nucleotide_form().
  228. */
  229. function blast_nucleotide_form_validate($form, &$form_state) {
  230. // Validate Query
  231. //----------------
  232. // @todo: We are currently not validating uploaded files are valid FASTA.
  233. // First check to see if we have an upload & if so then validate it.
  234. $file = file_load($form_state['values']['UPLOAD']);
  235. // If the $file is populated then this is a newly uploaded, temporary file.
  236. if (is_object($file)) {
  237. $form_state['qFlag'] = 'upQuery';
  238. $form_state['upQuery_path'] = drupal_realpath($file->uri);
  239. }
  240. // Otherwise there was no file uploaded.
  241. // Check if there was a query sequence entered in the texfield.
  242. elseif (!empty($form_state['input']['FASTA'])) {
  243. // Check to ensure that the query sequence entered is valid FASTA.
  244. if (_validateFasta($form_state['input']['FASTA'])){
  245. form_set_error('nBLAST', t('You need to provide a valid FASTA sequence
  246. for the query.'));
  247. }
  248. else {
  249. $form_state['qFlag'] = 'seqQuery';
  250. }
  251. }
  252. // Otherwise they didn't enter a query!!
  253. else {
  254. form_set_error('query', t('No query sequence given. Only raw sequence or
  255. sequence of type FASTA can be read. Enter sequence in the box provided or
  256. upload a plain text file.'));
  257. }
  258. // Validate Database
  259. //-------------------
  260. // @todo: We are currently not validating uploaded files are valid FASTA.
  261. // First check to see if we have an upload & if so then validate it.
  262. $file = file_load($form_state['values']['DBUPLOAD']);
  263. // If the $file is populated then this is a newly uploaded, temporary file.
  264. if (is_object($file)) {
  265. $form_state['dbFlag'] = 'upDB';
  266. $form_state['upDB_path'] = drupal_realpath($file->uri);
  267. }
  268. // Otherwise there was no file uploaded
  269. // Check if there was a database choosen from the list instead
  270. elseif (!empty($form_state['values']['SELECT_DB'])) {
  271. $form_state['dbFlag'] = 'blastdb';
  272. }
  273. // Otherwise they didn't select a database!!
  274. else {
  275. form_set_error('DB', t('No database selected. Either choose a database from the list or upload one of your own.'));
  276. }
  277. }
  278. /**
  279. * Form submission handler for blast_nucleotide_form().
  280. *
  281. * @see blast_nucleotide_form().
  282. */
  283. function blast_nucleotide_form_submit($form, &$form_state) {
  284. $error = FALSE;
  285. $eVal = $form_state['values']['eVal'];
  286. $trgtKey = $form_state['values']['maxTarget'];
  287. $numAlign = $form['ALG']['GParam']['maxTarget']['#options'][$trgtKey];
  288. $wsKey = $form_state['values']['wordSize'];
  289. $wordSize = $form['ALG']['GParam']['wordSize']['#options'][$wsKey];
  290. // Expand Gap Cost key into open and extend penalties
  291. $gapKey = $form_state['values']['gapCost'];
  292. switch ($gapKey) {
  293. case 0:
  294. $gapOpen = 5;
  295. $gapExtend = 2;
  296. break;
  297. case 1:
  298. $gapOpen = 2;
  299. $gapExtend = 2;
  300. break;
  301. case 2:
  302. $gapOpen = 1;
  303. $gapExtend = 2;
  304. break;
  305. case 3:
  306. $gapOpen = 0;
  307. $gapExtend = 2;
  308. break;
  309. case 4:
  310. $gapOpen = 3;
  311. $gapExtend = 1;
  312. break;
  313. case 5:
  314. $gapOpen = 2;
  315. $gapExtend = 1;
  316. break;
  317. case 6:
  318. $gapOpen = 1;
  319. $gapExtend = 1;
  320. break;
  321. }
  322. // Epand Match/Mismatch option into penalty/reward values
  323. // @todo Amir: Is the switch supposed to be for $scoreKey?
  324. $scoreKey = $form_state['values']['M&MScores'];
  325. switch ($gapKey) {
  326. case 0:
  327. $penalty = -2;
  328. $reward = 1;
  329. break;
  330. case 1:
  331. $penalty = -3;
  332. $reward = 1;
  333. break;
  334. case 2:
  335. $penalty = -4;
  336. $reward = 1;
  337. break;
  338. case 3:
  339. $penalty = -3;
  340. $reward = 2;
  341. break;
  342. case 4:
  343. $penalty = -5;
  344. $reward = 4;
  345. break;
  346. case 5:
  347. $penalty = -1;
  348. $reward = 1;
  349. break;
  350. }
  351. // If the query was submitted via the texrfield then create a file containing it
  352. if ( isset($form_state['qFlag']) ) {
  353. if ( $form_state['qFlag'] == 'seqQuery' ) {
  354. $seq_content = $form_state['values']['FASTA'];
  355. $query = '/tmp/' . date('YMd_His') . '_query.fasta';
  356. file_put_contents ( $query , $seq_content);
  357. }
  358. elseif ( $form_state['qFlag'] == 'upQuery' ) {
  359. $query = $form_state['upQuery_path'];
  360. }
  361. }
  362. // If the BLAST database was uploaded then use it to run the BLAST
  363. if ( $form_state['dbFlag'] == 'upDB') {
  364. // Since we only support using the -db flag (not -subject) we need to create a
  365. // blast database for the FASTA uploaded.
  366. // NOTE: We can't support subject because we need to generate the ASN.1+ format
  367. // to provide multiple download type options from the same BLAST
  368. $blastdb_with_path = $form_state['upDB_path'];
  369. $result = NULL;
  370. exec("makeblastdb -in $blastdb_with_path -dbtype nucl -parse_seqids 2>&1", $result);
  371. // Check that the BLAST database was made correctly.
  372. $result = implode('<br />', $result);
  373. if (preg_match('/Error/', $result)) {
  374. drupal_set_message('Unable to generate a BLAST database from your uploaded
  375. FASTA sequence. Please check that your file is a valid FASTA file and that if
  376. your sequence headers include pipes (i.e.: | ) they adhere to '
  377. . l('NCBI standards.', 'http://www.ncbi.nlm.nih.gov/books/NBK21097/table/A632/?report=objectonly', array('attributes' => array('target' => '_blank'))),
  378. 'error'
  379. );
  380. $error = TRUE;
  381. }
  382. }
  383. // Otherwise, we are using one of the website provided BLAST databases so form the
  384. // BLAST command accordingly
  385. elseif ($form_state['dbFlag'] == 'blastdb') {
  386. $selected_db = $form_state['values']['SELECT_DB'];
  387. $blastdb_node = node_load($selected_db);
  388. $blastdb_with_path = $blastdb_node->db_path;
  389. }
  390. // Actually submit the BLAST Tripal Job
  391. // NOTE: Tripal jobs needs to be executed from the command-line before it will be run!!
  392. $blastdb_with_suffix = $blastdb_with_path . '.nsq';
  393. if (is_readable($blastdb_with_suffix)) {
  394. global $user;
  395. $output_filestub = date('YMd_His');
  396. $job_args = array(
  397. 'program' => 'blastn',
  398. 'query' => $query,
  399. 'database' => $blastdb_with_path,
  400. 'output_filename' => $output_filestub,
  401. 'options' => array(
  402. 'evalue' => $eVal,
  403. 'word_size' => $wordSize,
  404. 'gapopen' => $gapOpen,
  405. 'gapextend' => $gapExtend,
  406. 'penalty' => $penalty,
  407. 'reward' => $reward
  408. )
  409. );
  410. $job_id = tripal_add_job("BLAST (blastn): $query",'blast_job','run_BLAST_tripal_job', $job_args, $user->uid);
  411. // Redirect to the BLAST results page
  412. drupal_goto("blast/report/$job_id");
  413. }
  414. // We check if $error is set to TRUE because if so then the error has already
  415. // been reported.
  416. elseif (!$error) {
  417. $dbfile_uploaded_msg = ($form_state['dbFlag'] == 'upDB') ? 'The BLAST database was submitted via user upload.' : 'Existing BLAST Database was chosen';
  418. tripal_report_error(
  419. 'blast_ui',
  420. TRIPAL_ERROR,
  421. "BLAST database %db unaccessible. $dbfile_uploaded_msg",
  422. array('%db' => $blastdb_with_path)
  423. );
  424. drupal_set_message('BLAST database unaccessible. Please contact the site administrator.','error');
  425. }
  426. }
  427. /**
  428. * FASTA validating parser
  429. *
  430. * @param $sequence
  431. * A string of characters to be validated. A sequence in FASTA format begins with a single-line description, followed by lines of sequence data.
  432. * The description line is distinguished from the sequence data by a greater-than (">") symbol in the first column.
  433. * The word following the ">" symbol is the identifier of the sequence, and the rest of the line is the description (both are optional).
  434. * There should be no space between the ">" and the first letter of the identifier. The sequence ends if another line starting with a ">" appears;
  435. * this indicates the start of another sequence.
  436. *
  437. * @return
  438. * Return a boolean. 1 if the sequence does not pass the format valifation stage and 0 otherwise.
  439. *
  440. */
  441. function _validateFasta($sequence) {
  442. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  443. $fastaSeqRegEx = '/[^acgntuACGNTU\n\r]/';
  444. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  445. $flag = 1;
  446. } else {
  447. $flag = 0;
  448. }
  449. return $flag;
  450. }