blast_ui.blastn.inc 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. <?php
  2. function blast_nucleotide_form($form, &$form_state) {
  3. //Nucleotide BLAST
  4. $form['nBLAST'] = array(
  5. '#type' => 'fieldset',
  6. '#title' => t('Standard Nucleotide BLAST'),
  7. '#collapsible' => TRUE,
  8. '#collapsed' => FALSE,
  9. );
  10. $form['nBLAST']['FASTA'] = array(
  11. '#type' => 'textarea',
  12. '#title' => t('Enter FASTA sequence(s)'),
  13. '#description'=>t('Enter query sequence(s) in the text area.'),
  14. );
  15. // Upload a file as an alternative to enter a query sequence
  16. $form['#attributes']['enctype'] = 'multipart/form-data';
  17. $form['nBLAST']['UPLOAD'] = array(
  18. '#prefix' => 'Or upload your query files: ',
  19. '#type' => 'file',
  20. '#description' => t('Please give a text a FASTA file, not a MS-Word or other document, you can upload up to 10 Mb.'),
  21. );
  22. //DATABASE//
  23. $form['DB'] = array(
  24. '#type' => 'fieldset',
  25. '#title' => t('BLAST Assembled KnowPulse Genomes'),
  26. '#collapsible' => TRUE,
  27. '#collapsed' => FALSE,
  28. );
  29. $options = _DB_options();
  30. $form['DB']['SELECT_DB'] = array(
  31. '#type' => 'select',
  32. '#title' => t('KnowPulse Databases:'),
  33. '#options' => $options,
  34. '#default_value' => t('Select a database'),
  35. );
  36. // Upload a file as an alternative to enter a query sequence
  37. $form['#attributes']['enctype'] = 'multipart/form-data';
  38. $form['DB']['DBUPLOAD'] = array(
  39. '#prefix' => 'Or upload your own dataset: ',
  40. '#type' => 'file',
  41. '#description' => t('Please give a text or a FASTA file, not a MS-Word or other document, you can upload up to 10 Mb.'),
  42. );
  43. //Algorithm parameters
  44. $form['ALG'] = array(
  45. '#type' => 'fieldset',
  46. '#title' => t('Algorithm parameters'),
  47. '#collapsible' => TRUE,
  48. '#collapsed' => TRUE,
  49. );
  50. //General parameters
  51. $form['ALG']['GParam'] = array(
  52. '#type' => 'fieldset',
  53. '#title' => t('General parameters'),
  54. '#collapsible' => FALSE,
  55. );
  56. $form['ALG']['GParam']['maxTarget'] = array(
  57. '#type' => 'select',
  58. '#title' => t('Max target sequences:'),
  59. '#options' => array(
  60. 0 => t('10'),
  61. 1 => t('50'),
  62. 2 => t('100'),
  63. 3 => t('250'),
  64. 4 => t('500'),
  65. 5 => t('1000'),
  66. 6 => t('5000'),
  67. 7 => t('10000'),
  68. 8 => t('20000'),
  69. ),
  70. '#default_value' => 2,
  71. '#description' => t('Select the maximum number of aligned sequences to display'),
  72. );
  73. $form['ALG']['GParam']['shortQueries'] = array(
  74. '#type' => 'checkbox',
  75. '#title' => t('Automatically adjust parameters for short input sequences'),
  76. '#default_value' => TRUE,
  77. );
  78. $form['ALG']['GParam']['eVal'] = array(
  79. '#type' => 'textfield',
  80. '#title' => t('Expect threshold'),
  81. '#default_value' => 10,
  82. '#size' => 12,
  83. '#maxlength' => 20,
  84. '#description' => t('Expected number of chance matches in a random model.'),
  85. );
  86. $form['ALG']['GParam']['wordSize'] = array(
  87. '#type' => 'select',
  88. '#title' => t('Word size:'),
  89. '#options' => array(
  90. 0 => t('16'),
  91. 1 => t('20'),
  92. 2 => t('24'),
  93. 3 => t('28'),
  94. 4 => t('32'),
  95. 5 => t('48'),
  96. 6 => t('64'),
  97. 7 => t('128'),
  98. 8 => t('256'),
  99. ),
  100. '#default_value' => 3,
  101. '#description' => t('The length of the seed that initiates an alignment'),
  102. );
  103. $form['ALG']['GParam']['qRange'] = array(
  104. '#type' => 'textfield',
  105. '#title' => t('Max matches in a query range'),
  106. '#default_value' => 0,
  107. '#size' => 12,
  108. '#maxlength' => 20,
  109. '#description' => t('Limit the number of matches to a query range. This option is useful if many strong matches to one part of a query may prevent BLAST from presenting weaker matches to another part of the query.'),
  110. );
  111. // Scoring parameters
  112. $form['ALG']['SParam'] = array(
  113. '#type' => 'fieldset',
  114. '#title' => t('Scoring parameters'),
  115. '#collapsible' => FALSE,
  116. );
  117. $form['ALG']['SParam']['M&MScores'] = array(
  118. '#type' => 'select',
  119. '#title' => t('Match/Mismatch Scores:'),
  120. '#options' => array(
  121. 0 => t('1,-2'),
  122. 1 => t('1,-3'),
  123. 2 => t('1,-4'),
  124. 3 => t('2,-3'),
  125. 4 => t('4,-5'),
  126. 5 => t('1,-1'),
  127. ),
  128. '#default_value' => 0,
  129. '#description' => t('Reward and penalty for matching and mismatching bases.'),
  130. );
  131. $form['ALG']['SParam']['gapCost'] = array(
  132. '#type' => 'select',
  133. '#title' => t('Gap Costs:'),
  134. '#options' => array(
  135. 0 => t('Existence: 5 Extension: 2'),
  136. 1 => t('Existence: 2 Extension: 2'),
  137. 2 => t('Existence: 1 Extension: 2'),
  138. 3 => t('Existence: 0 Extension: 2'),
  139. 4 => t('Existence: 3 Extension: 1'),
  140. 5 => t('Existence: 2 Extension: 1'),
  141. 6 => t('Existence: 1 Extension: 1'),
  142. ),
  143. '#default_value' => 0,
  144. '#description' => t('Cost to create and extend a gap in an alignment. Linear costs are available only with megablast and are determined by the match/mismatch scores.'),
  145. );
  146. //Submit
  147. $form['submit'] = array(
  148. '#type' => 'submit',
  149. '#default_value' => ' BLAST ',
  150. );
  151. return $form;
  152. }
  153. function blast_nucleotide_form_validate($form, &$form_state) {
  154. $fastaSeq = $form_state['input']['FASTA'];
  155. if (isset($fastaSeq)) {
  156. if(_validateFasta($fastaSeq)){
  157. form_set_error('nBLAST', t('Error: Failed to read the Blast query: Wrong format provided for FASTA nucleotide sequence'));
  158. } else {
  159. $form_state['flag'] = 'seqQuery';
  160. }
  161. }
  162. $upQuery = file_save_upload('UPLOAD', array('file_validate_extensions' => array('txt fasta fa fna')), FILE_EXISTS_RENAME);
  163. if ($upQuery) {
  164. $upQuery_uri = $upQuery->uri;
  165. $form_state['upQuery_path'] = drupal_realpath($upQuery_uri);
  166. $upQuery_content = file_get_contents($form_state['upQuery_path']);
  167. if(_validateFasta($upQuery_content)){
  168. form_set_error('nBLAST', t('Error: Failed to upload the Blast query: Wrong format provided for FASTA nucleotide sequence'));
  169. } else {
  170. $form_state['qFlag'] = 'upQuery';
  171. }
  172. }
  173. $upDB = file_save_upload('DBUPLOAD', array('file_validate_extensions' => array('txt fasta fa fna')), FILE_EXISTS_RENAME);
  174. if ($upDB) {
  175. $upDB_uri = $upDB->uri;
  176. $form_state['upDB_path'] = drupal_realpath($upDB_uri);
  177. $upDB_content = file_get_contents($form_state['upDB_path']);
  178. if(_validateFasta($upDB_content)){
  179. form_set_error('DB', t('Error: Failed to upload the Blast subject sequence file: Wrong format provided for FASTA nucleotide sequence'));
  180. } else {
  181. $form_state['dbFlag'] = 'upQuery';
  182. }
  183. } else {
  184. $form_state['dbFlag'] = 'blastdb';
  185. }
  186. }
  187. function blast_nucleotide_form_submit($form, &$form_state) {
  188. exec("export BLASTDB=<>");
  189. $eVal = $form_state['values']['eVal'];
  190. $trgtKey = $form_state['values']['maxTarget'];
  191. $numAlign = $form['ALG']['GParam']['maxTarget']['#options'][$trgtKey];
  192. $wsKey = $form_state['values']['wordSize'];
  193. $wordSize = $form['ALG']['GParam']['wordSize']['#options'][$wsKey];
  194. $gapKey = $form_state['values']['gapCost'];
  195. switch ($gapKey) {
  196. case 0:
  197. $gapOpen = 5;
  198. $gapExtend = 2;
  199. break;
  200. case 1:
  201. $gapOpen = 2;
  202. $gapExtend = 2;
  203. break;
  204. case 2:
  205. $gapOpen = 1;
  206. $gapExtend = 2;
  207. break;
  208. case 3:
  209. $gapOpen = 0;
  210. $gapExtend = 2;
  211. break;
  212. case 4:
  213. $gapOpen = 3;
  214. $gapExtend = 1;
  215. break;
  216. case 5:
  217. $gapOpen = 2;
  218. $gapExtend = 1;
  219. break;
  220. case 6:
  221. $gapOpen = 1;
  222. $gapExtend = 1;
  223. break;
  224. }
  225. $scoreKey = $form_state['values']['M&MScores'];
  226. switch ($gapKey) {
  227. case 0:
  228. $penalty = -2;
  229. $reward = 1;
  230. break;
  231. case 1:
  232. $penalty = -3;
  233. $reward = 1;
  234. break;
  235. case 2:
  236. $penalty = -4;
  237. $reward = 1;
  238. break;
  239. case 3:
  240. $penalty = -3;
  241. $reward = 2;
  242. break;
  243. case 4:
  244. $penalty = -5;
  245. $reward = 4;
  246. break;
  247. case 5:
  248. $penalty = -1;
  249. $reward = 1;
  250. break;
  251. }
  252. if ( isset($form_state['qFlag']) ) {
  253. if ( $form_state['qFlag'] == 'seqQuery' ) {
  254. $query = $form_state['values']['FASTA'];
  255. } elseif ( $form_state['qFlag'] == 'upQuery' ) {
  256. $query = $form_state['upQuery_path'];
  257. }
  258. }
  259. if ( $form_state['dbFlag'] == 'upQuery') {
  260. $subjectSeq = $form_state['upDB_path'];
  261. $subSeqOut = drupal_basename($form_state['upDB_path']) . rand(0, 10000);
  262. $blast_query_cmd = "blastn -query $query -subject $subjectSeq -out /tmp/$subSeqOut.blastn.html -evalue $eVal -word_size $wordSize -gapopen $gapOpen -gapextend $gapExtend -penalty $penalty -reward $reward -num_alignments 100 -html";
  263. system($blast_query_cmd);
  264. } elseif ($form_state['dbFlag'] == 'blastdb') {
  265. $selected_db = $form_state['values']['SELECT_DB'];
  266. $blastdb = $form['DB']['SELECT_DB']['#options'][$selected_db];
  267. $subSeqOut = drupal_basename($form_state['upQuery_path']). rand(0, 10000);
  268. $blast_db_cmd = "blastn -task blastn -query $query -db /$blastdb -out /tmp/$subSeqOut.blastn.html -evalue $eVal -word_size $wordSize -gapopen $gapOpen -gapextend $gapExtend -penalty $penalty -reward $reward -num_alignments 100 -html";
  269. system($blast_cmd);
  270. }
  271. }
  272. function _validateFasta($sequence) {
  273. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  274. $fastaSeqRegEx = '/[^acgntuACGNTU\n\r]/';
  275. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  276. $flag = 1;
  277. } else {
  278. $flag = 0;
  279. }
  280. return $flag;
  281. }
  282. function _DB_options() {
  283. $type = 'blastdb';
  284. $nodes = node_load_multiple(array(), array('type'=> $type));
  285. foreach ($nodes as $node) {
  286. if ( isset($node) ) {
  287. $options[$node->nid] = $node->db_name;
  288. }
  289. }
  290. return $options;
  291. }