blast_ui.blastn.inc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. <?php
  2. /**
  3. * @file
  4. * Contains all functions for the Nucleotide BLAST
  5. */
  6. /**
  7. * Nucleotide BLAST Submission Form
  8. *
  9. * @see blast_nucleotide_form_validate()
  10. * @see blast_nucleotide_form_submit()
  11. */
  12. function blast_nucleotide_form($form, &$form_state) {
  13. // NUCLEOTIDE QUERY
  14. //.........................
  15. $form['query'] = array(
  16. '#type' => 'fieldset',
  17. '#title' => t('Enter Query Sequence'),
  18. '#description' => t('Enter one or more queries in the top text box or use the browse button to upload a file from your local disk. The file may contain a single sequence or a list of sequences. In both cases, the data must be in FASTA format. <a href="http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml" target="_blank">More information..</a> '),
  19. '#collapsible' => TRUE,
  20. '#collapsed' => FALSE,
  21. );
  22. // Textfield for submitting a mult-FASTA query
  23. $form['query']['FASTA'] = array(
  24. '#type' => 'textarea',
  25. '#title' => t('Enter FASTA sequence(s)'),
  26. '#description'=>t('Enter query sequence(s) in the text area.'),
  27. );
  28. // Upload a file as an alternative to enter a query sequence
  29. $form['#attributes']['enctype'] = 'multipart/form-data';
  30. $form['query']['UPLOAD'] = array(
  31. '#prefix' => 'Or upload your query files: ',
  32. '#type' => 'file',
  33. '#description' => t('The file should be a plain-text FASTA file and not a .doc, .docx, etc. It cannot be greater than 10 Mb in size.'),
  34. );
  35. // BLAST DATABASE
  36. //.........................
  37. $form['DB'] = array(
  38. '#type' => 'fieldset',
  39. '#title' => t('Choose Search Set'),
  40. '#description' => t('Choose from one of the nucleotide BLAST databases listed below. You can also use the browse button to upload a file from your local disk. The file may contain a single sequence or a list of sequences. '),
  41. '#collapsible' => TRUE,
  42. '#collapsed' => FALSE,
  43. );
  44. $options = get_blast_database_options('n');
  45. $form['DB']['SELECT_DB'] = array(
  46. '#type' => 'select',
  47. '#title' => t('Nucleotide BLAST Databases:'),
  48. '#options' => $options,
  49. '#default_value' => 0,
  50. );
  51. // Upload a file as an alternative to enter a query sequence
  52. $form['#attributes']['enctype'] = 'multipart/form-data';
  53. $form['DB']['DBUPLOAD'] = array(
  54. '#prefix' => 'Or upload your own dataset: ',
  55. '#type' => 'file',
  56. '#description' => t('The file should be a plain-text FASTA file and not a .doc, .docx, etc. It cannot be greater than 10 Mb in size.'),
  57. );
  58. // ALGORITHM PARAMETERS
  59. //.........................
  60. $form['ALG'] = array(
  61. '#type' => 'fieldset',
  62. '#title' => t('Algorithm parameters'),
  63. '#collapsible' => TRUE,
  64. '#collapsed' => TRUE,
  65. );
  66. // General parameters
  67. $form['ALG']['GParam'] = array(
  68. '#type' => 'fieldset',
  69. '#title' => t('General parameters'),
  70. '#collapsible' => FALSE,
  71. );
  72. $form['ALG']['GParam']['maxTarget'] = array(
  73. '#type' => 'select',
  74. '#title' => t('Max target sequences:'),
  75. '#options' => array(
  76. 0 => t('10'),
  77. 1 => t('50'),
  78. 2 => t('100'),
  79. 3 => t('250'),
  80. 4 => t('500'),
  81. 5 => t('1000'),
  82. 6 => t('5000'),
  83. 7 => t('10000'),
  84. 8 => t('20000'),
  85. ),
  86. '#default_value' => 2,
  87. '#description' => t('Select the maximum number of aligned sequences to display'),
  88. );
  89. $form['ALG']['GParam']['shortQueries'] = array(
  90. '#type' => 'checkbox',
  91. '#title' => t('Automatically adjust parameters for short input sequences'),
  92. '#default_value' => TRUE,
  93. );
  94. $form['ALG']['GParam']['eVal'] = array(
  95. '#type' => 'textfield',
  96. '#title' => t('e-Value (Expected Threshold)'),
  97. '#default_value' => 10,
  98. '#size' => 12,
  99. '#maxlength' => 20,
  100. '#description' => t('Expected number of chance matches in a random model. This number should be give in a decimal format. <a href="http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml#expect" target="_blank">More Information</a> | <a href="https://www.youtube.com/watch?v=nO0wJgZRZJs" target="_blank">Expect value vedio tutorial</a>'),
  101. );
  102. $form['ALG']['GParam']['wordSize'] = array(
  103. '#type' => 'select',
  104. '#title' => t('Word size:'),
  105. '#options' => array(
  106. 0 => t('16'),
  107. 1 => t('20'),
  108. 2 => t('24'),
  109. 3 => t('28'),
  110. 4 => t('32'),
  111. 5 => t('48'),
  112. 6 => t('64'),
  113. 7 => t('128'),
  114. 8 => t('256'),
  115. ),
  116. '#default_value' => 3,
  117. '#description' => t('The length of the seed that initiates an alignment'),
  118. );
  119. $form['ALG']['GParam']['qRange'] = array(
  120. '#type' => 'textfield',
  121. '#title' => t('Max matches in a query range'),
  122. '#default_value' => 0,
  123. '#size' => 12,
  124. '#maxlength' => 20,
  125. '#description' => t('Limit the number of matches to a query range. This option is useful if many strong matches to one part of a query may prevent BLAST from presenting weaker matches to another part of the query.'),
  126. );
  127. // Scoring parameters
  128. $form['ALG']['SParam'] = array(
  129. '#type' => 'fieldset',
  130. '#title' => t('Scoring parameters'),
  131. '#collapsible' => FALSE,
  132. );
  133. $form['ALG']['SParam']['M&MScores'] = array(
  134. '#type' => 'select',
  135. '#title' => t('Match/Mismatch Scores:'),
  136. '#options' => array(
  137. 0 => t('1,-2'),
  138. 1 => t('1,-3'),
  139. 2 => t('1,-4'),
  140. 3 => t('2,-3'),
  141. 4 => t('4,-5'),
  142. 5 => t('1,-1'),
  143. ),
  144. '#default_value' => 0,
  145. '#description' => t('Reward and penalty for matching and mismatching bases.'),
  146. );
  147. $form['ALG']['SParam']['gapCost'] = array(
  148. '#type' => 'select',
  149. '#title' => t('Gap Costs:'),
  150. '#options' => array(
  151. 0 => t('Existence: 5 Extension: 2'),
  152. 1 => t('Existence: 2 Extension: 2'),
  153. 2 => t('Existence: 1 Extension: 2'),
  154. 3 => t('Existence: 0 Extension: 2'),
  155. 4 => t('Existence: 3 Extension: 1'),
  156. 5 => t('Existence: 2 Extension: 1'),
  157. 6 => t('Existence: 1 Extension: 1'),
  158. ),
  159. '#default_value' => 0,
  160. '#description' => t('Cost to create and extend a gap in an alignment. Linear costs are available only with megablast and are determined by the match/mismatch scores.'),
  161. );
  162. // Submit
  163. $form['submit'] = array(
  164. '#type' => 'submit',
  165. '#default_value' => ' BLAST ',
  166. );
  167. return $form;
  168. }
  169. /**
  170. * Form validation handler for blast_nucleotide_form().
  171. *
  172. * @see blast_nucleotide_form().
  173. */
  174. function blast_nucleotide_form_validate($form, &$form_state) {
  175. // Validate FASTA seq in textfield
  176. $fastaSeq = $form_state['input']['FASTA'];
  177. if (isset($fastaSeq)) {
  178. if (_validateFasta($fastaSeq)){
  179. form_set_error('nBLAST', t('Error: Failed to read the Blast query: Wrong format provided for FASTA nucleotide sequence'));
  180. }
  181. else {
  182. $form_state['qFlag'] = 'seqQuery';
  183. }
  184. }
  185. // Validate query upload
  186. $upQuery = file_save_upload('UPLOAD', array('file_validate_extensions' => array('txt fasta fa fna')), FILE_EXISTS_RENAME);
  187. if ($upQuery) {
  188. $upQuery_uri = $upQuery->uri;
  189. $form_state['upQuery_path'] = drupal_realpath($upQuery_uri);
  190. $upQuery_content = file_get_contents($form_state['upQuery_path']);
  191. if (_validateFasta($upQuery_content)){
  192. form_set_error('nBLAST', t('Error: Failed to upload the Blast query: Wrong format provided for FASTA nucleotide sequence'));
  193. }
  194. else {
  195. $form_state['qFlag'] = 'upQuery';
  196. }
  197. }
  198. // Validate blast database upload
  199. $upDB = file_save_upload('DBUPLOAD', array('file_validate_extensions' => array('txt fasta fa fna')), FILE_EXISTS_RENAME);
  200. if ($upDB) {
  201. $upDB_uri = $upDB->uri;
  202. $form_state['upDB_path'] = drupal_realpath($upDB_uri);
  203. $upDB_content = file_get_contents($form_state['upDB_path']);
  204. if (_validateFasta($upDB_content)){
  205. form_set_error('DB', t('Error: Failed to upload the Blast subject sequence file: Wrong format provided for FASTA nucleotide sequence'));
  206. }
  207. else {
  208. $form_state['dbFlag'] = 'upQuery';
  209. }
  210. }
  211. else {
  212. $form_state['dbFlag'] = 'blastdb';
  213. }
  214. }
  215. /**
  216. * Form submission handler for blast_nucleotide_form().
  217. *
  218. * @see blast_nucleotide_form().
  219. */
  220. function blast_nucleotide_form_submit($form, &$form_state) {
  221. $eVal = $form_state['values']['eVal'];
  222. $trgtKey = $form_state['values']['maxTarget'];
  223. $numAlign = $form['ALG']['GParam']['maxTarget']['#options'][$trgtKey];
  224. $wsKey = $form_state['values']['wordSize'];
  225. $wordSize = $form['ALG']['GParam']['wordSize']['#options'][$wsKey];
  226. // Expand Gap Cost key into open and extend penalties
  227. $gapKey = $form_state['values']['gapCost'];
  228. switch ($gapKey) {
  229. case 0:
  230. $gapOpen = 5;
  231. $gapExtend = 2;
  232. break;
  233. case 1:
  234. $gapOpen = 2;
  235. $gapExtend = 2;
  236. break;
  237. case 2:
  238. $gapOpen = 1;
  239. $gapExtend = 2;
  240. break;
  241. case 3:
  242. $gapOpen = 0;
  243. $gapExtend = 2;
  244. break;
  245. case 4:
  246. $gapOpen = 3;
  247. $gapExtend = 1;
  248. break;
  249. case 5:
  250. $gapOpen = 2;
  251. $gapExtend = 1;
  252. break;
  253. case 6:
  254. $gapOpen = 1;
  255. $gapExtend = 1;
  256. break;
  257. }
  258. // Epand Match/Mismatch option into penalty/reward values
  259. // @todo Amir: Is the switch supposed to be for $scoreKey?
  260. $scoreKey = $form_state['values']['M&MScores'];
  261. switch ($gapKey) {
  262. case 0:
  263. $penalty = -2;
  264. $reward = 1;
  265. break;
  266. case 1:
  267. $penalty = -3;
  268. $reward = 1;
  269. break;
  270. case 2:
  271. $penalty = -4;
  272. $reward = 1;
  273. break;
  274. case 3:
  275. $penalty = -3;
  276. $reward = 2;
  277. break;
  278. case 4:
  279. $penalty = -5;
  280. $reward = 4;
  281. break;
  282. case 5:
  283. $penalty = -1;
  284. $reward = 1;
  285. break;
  286. }
  287. // If the query was submitted via the texrfield then create a file containing it
  288. if ( isset($form_state['qFlag']) ) {
  289. if ( $form_state['qFlag'] == 'seqQuery' ) {
  290. $seq_content = $form_state['values']['FASTA'];
  291. $query = "/tmp/user__query_file.fasta";
  292. file_put_contents ( $query , $seq_content);
  293. }
  294. elseif ( $form_state['qFlag'] == 'upQuery' ) {
  295. $query = $form_state['upQuery_path'];
  296. }
  297. }
  298. // If the BLAST database was uploaded then use it to run the BLAST
  299. if ( $form_state['dbFlag'] == 'upQuery') {
  300. // Since we only support using the -db flag (not -subject) we need to create a
  301. // blast database for the FASTA uploaded.
  302. // NOTE: We can't support subject because we need to generate the ASN.1+ format
  303. // to provide multiple download type options from the same BLAST
  304. $blastdb_with_path = $form_state['upDB_path'];
  305. system("makeblastdb -in $blastdb_with_path -dbtype nucl -parse_seqids");
  306. //$blast_subj_cmd = "blastn -query $query -subject $subjectSeq -out sites/default/files/$subSeqOut.blastn.html -evalue $eVal -word_size $wordSize -gapopen $gapOpen -gapextend $gapExtend -penalty $penalty -reward $reward -num_alignments 100 -html";
  307. //system($blast_subj_cmd);
  308. }
  309. // Otherwise, we are using one of the website provided BLAST databases so form the
  310. // BLAST command accordingly
  311. elseif ($form_state['dbFlag'] == 'blastdb') {
  312. $selected_db = $form_state['values']['SELECT_DB'];
  313. $blastdb_node = node_load($selected_db);
  314. $blastdb_with_path = $blastdb_node->db_path;
  315. }
  316. // Actually submit the BLAST Tripal Job
  317. // NOTE: Tripal jobs needs to be executed from the command-line before it will be run!!
  318. $blastdb_with_suffix = $blastdb_with_path . '.nsq';
  319. if (is_readable($blastdb_with_suffix)) {
  320. global $user;
  321. $output_filestub = date('YMd_His');
  322. $job_args = array(
  323. 'program' => 'blastn',
  324. 'query' => $query,
  325. 'database' => $blastdb_with_path,
  326. 'output_filename' => $output_filestub,
  327. 'options' => array(
  328. 'evalue' => $eVal,
  329. 'word_size' => $wordSize,
  330. 'gapopen' => $gapOpen,
  331. 'gapextend' => $gapExtend,
  332. 'penalty' => $penalty,
  333. 'reward' => $reward
  334. )
  335. );
  336. $job_id = tripal_add_job("BLAST (blastn): $query",'blast_job','run_BLAST_tripal_job', $job_args, $user->uid);
  337. // Redirect to the BLAST results page
  338. drupal_goto("blast/report/$job_id");
  339. }
  340. else {
  341. $dbfile_uploaded_msg = ($form_state['dbFlag'] == 'upQuery') ? 'The BLAST database was submitted via user upload.' : 'Existing BLAST Database was chosen';
  342. tripal_report_error(
  343. 'blast_ui',
  344. TRIPAL_ERROR,
  345. "BLAST database %db unaccessible. $dbfile_uploaded_msg",
  346. array('%db' => $blastdb_with_path)
  347. );
  348. drupal_set_message('BLAST database unaccessible. Please contact the site administrator.','error');
  349. }
  350. }
  351. /**
  352. * FASTA validating parser
  353. *
  354. * @param $sequence
  355. * A string of characters to be validated. A sequence in FASTA format begins with a single-line description, followed by lines of sequence data.
  356. * The description line is distinguished from the sequence data by a greater-than (">") symbol in the first column.
  357. * The word following the ">" symbol is the identifier of the sequence, and the rest of the line is the description (both are optional).
  358. * There should be no space between the ">" and the first letter of the identifier. The sequence ends if another line starting with a ">" appears;
  359. * this indicates the start of another sequence.
  360. *
  361. * @return
  362. * Return a boolean. 1 if the sequence does not pass the format valifation stage and 0 otherwise.
  363. *
  364. */
  365. function _validateFasta($sequence) {
  366. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  367. $fastaSeqRegEx = '/[^acgntuACGNTU\n\r]/';
  368. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  369. $flag = 1;
  370. } else {
  371. $flag = 0;
  372. }
  373. return $flag;
  374. }