blast_ui.api.inc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. <?php
  2. /**
  3. * @file
  4. * Contains more generally applicable functions as well as some meant to help developers
  5. * Plug-in to the BLAST UI functionality
  6. */
  7. /**
  8. * Get a specific BlastDB.
  9. *
  10. * @param $identifiers
  11. * An array of identifiers used to determine which BLAST DB to retrieve.
  12. *
  13. * @return
  14. * A fully-loaded BLAST DB Node
  15. */
  16. function get_blast_database($identifiers) {
  17. $node = FALSE;
  18. if (isset($identifiers['nid'])) {
  19. $node = node_load($identifiers['nid']);
  20. }
  21. elseif (isset($identifiers['name'])) {
  22. $nid = db_query('SELECT nid FROM {blastdb} WHERE name=:name', array(':name' => $identifiers['name']))->fetchField();
  23. $node = node_load($nid);
  24. } elseif (isset($identifiers['path'])) {
  25. $nid = db_query('SELECT nid FROM {blastdb} WHERE path LIKE :path', array(':path' => db_like($identifiers['path']) . '%'))->fetchField();
  26. $node = node_load($nid);
  27. }
  28. return $node;
  29. }
  30. /**
  31. * Returns a list BLAST DATABASE options
  32. *
  33. * @param $type
  34. * The type of BLAST dabases to restrict the list to (ie: n: nucleotide or p: protein)
  35. *
  36. * @return
  37. * An array where the nid is the key and the value is the human-readable name of the option
  38. */
  39. function get_blast_database_options($type) {
  40. global $user;
  41. // Use the Entity API to get a list of BLAST Nodes to load
  42. // We use this function in order respect node access control so that
  43. // administrators can use this module in combination with a node access module
  44. // of their choice to limit access to specific BLAST databases.
  45. $query = new EntityFieldQuery();
  46. $query->entityCondition('entity_type', 'node')
  47. // Restrict to BLASTDB nodes.
  48. ->entityCondition('bundle', 'blastdb')
  49. // Restrict to Published nodes.
  50. ->propertyCondition('status', 1)
  51. // Restrict to nodes the current user has permission to view.
  52. ->addTag('node_access');
  53. $entities = $query->execute();
  54. // Get all BlastDB nodes
  55. $nodes = node_load_multiple(array_keys($entities['node']));
  56. // Support obsolete database type n/p
  57. $obs_type = '';
  58. if ($type == 'protein') {
  59. $obs_type = 'p';
  60. }
  61. else {
  62. $obs_type = 'n';
  63. }
  64. $options = array();
  65. foreach ($nodes as $node) {
  66. if ( isset($node) && isset($node->db_dbtype) ) {
  67. if ( ($node->db_dbtype == $type) OR ($node->db_dbtype == $obs_type) ) {
  68. $options[$node->nid] = $node->db_name;
  69. }
  70. }
  71. }
  72. asort($options);
  73. $options[0] = 'Select a Dataset';
  74. return $options;
  75. }
  76. /**
  77. * Run BLAST (should be called from the command-line)
  78. *
  79. * @param $program
  80. * Which BLAST program to run (ie: 'blastn', 'tblastn', tblastx', 'blastp','blastx')
  81. * @param $query
  82. * The full path and filename of the query FASTA file
  83. * @param $database
  84. * The full path and filename prefix (excluding .nhr, .nin, .nsq, etc.)
  85. * @param $output_filestub
  86. * The filename (not including path) to give the results. Should not include file type suffix
  87. * @param $options
  88. * An array of additional option where the key is the name of the option used by
  89. * BLAST (ie: 'num_alignments') and the value is relates to this particular
  90. * BLAST job (ie: 250)
  91. */
  92. function run_BLAST_tripal_job($program, $query, $database, $output_filestub, $options, $job_id = NULL) {
  93. $output_file = file_directory_temp() . DIRECTORY_SEPARATOR . $output_filestub . '.blast.asn';
  94. $output_dir = variable_get('file_public_path', conf_path() . '/files')
  95. . DIRECTORY_SEPARATOR . 'tripal' . DIRECTORY_SEPARATOR . 'tripal_blast';
  96. $output_file_xml = $output_dir . DIRECTORY_SEPARATOR . $output_filestub . '.blast.xml';
  97. $output_file_tsv = $output_dir . DIRECTORY_SEPARATOR . $output_filestub . '.blast.tsv';
  98. $output_file_html = $output_dir . DIRECTORY_SEPARATOR . $output_filestub . '.blast.html';
  99. print "\nExecuting $program\n\n";
  100. print "Query: $query\n";
  101. print "Database: $database\n";
  102. print "Results File: $output_file\n";
  103. print "Options:\n";
  104. // Allow administrators to use an absolute path for these commands.
  105. // Defaults to using $PATH.
  106. $blast_path = variable_get('blast_path', '');
  107. $blast_threads = variable_get('blast_threads', '');
  108. // Strip the extension off the BLAST target
  109. $database = preg_replace("/(.*)\.[pn]\w\w/", '$1', $database);
  110. // The executables:
  111. $program = $blast_path . $program;
  112. $blast_formatter_command = $blast_path . 'blast_formatter';
  113. $blast_cmd = "$program -query '$query' -db '$database' -out '$output_file' -outfmt=11";
  114. if (!empty($options)) {
  115. foreach ($options as $opt => $val) {
  116. if ($val) {
  117. print "\t$opt: $val\n";
  118. $blast_cmd .= " -$opt $val";
  119. }
  120. }
  121. }
  122. // Setting the value of threads by admin page
  123. $blast_cmd .= " -num_threads ".$blast_threads;
  124. print "\nExecuting the following BLAST command:\n" . $blast_cmd . "\n";
  125. system($blast_cmd);
  126. if (!file_exists($output_file)) {
  127. tripal_report_error(
  128. 'blast_ui',
  129. TRIPAL_ERROR,
  130. "BLAST did not complete successfully as is implied by the lack of output file (%file). The command run was @command",
  131. array('%file' => $output_file, '@command' => $blast_cmd),
  132. array('print' => TRUE)
  133. );
  134. return FALSE;
  135. }
  136. print "\nGenerating additional download formats...\n";
  137. print "\tXML\n";
  138. $format_cmd = "$blast_formatter_command -archive $output_file -outfmt 5 -out $output_file_xml";
  139. print "\nExecuting $format_cmd\n\n";
  140. system($format_cmd);
  141. if (!file_exists($output_file_xml)) {
  142. tripal_report_error(
  143. 'blast_ui',
  144. TRIPAL_ERROR,
  145. "Unable to convert BLAST ASN.1 archive to XML (%archive => %file).",
  146. array('%archive' => $output_file, '%file' => $output_file_xml),
  147. array('print' => TRUE)
  148. );
  149. }
  150. print "\tTab-delimited\n";
  151. system("$blast_formatter_command -archive $output_file -outfmt 7 -out $output_file_tsv");
  152. if (!file_exists($output_file_tsv)) {
  153. tripal_report_error(
  154. 'blast_ui',
  155. TRIPAL_WARNING,
  156. "Unable to convert BLAST ASN.1 archive to Tabular Output (%archive => %file).",
  157. array('%archive' => $output_file, '%file' => $output_file_tsv),
  158. array('print' => TRUE)
  159. );
  160. }
  161. print "\tHTML (includes alignments)\n";
  162. system("$blast_formatter_command -archive $output_file -outfmt 0 -out $output_file_html -html");
  163. if (!file_exists($output_file_tsv)) {
  164. tripal_report_error(
  165. 'blast_ui',
  166. TRIPAL_WARNING,
  167. "Unable to convert BLAST ASN.1 archive to HTML Output (%archive => %file).",
  168. array('%archive' => $output_file, '%file' => $output_file_html),
  169. array('print' => TRUE)
  170. );
  171. }
  172. print "\nDone!\n";
  173. }
  174. /**
  175. * FASTA validating parser
  176. *
  177. * A sequence in FASTA format begins with a single-line description, followed
  178. * by lines of sequence data.The description line is distinguished from the
  179. * sequence data by a greater-than (">") symbol in the first column. The word
  180. * following the ">" symbol is the identifier of the sequence, and the rest of
  181. * the line is the description (both are optional). There should be no space
  182. * between the ">" and the first letter of the identifier. The sequence ends
  183. * if another line starting with a ">" appears which indicates the start of
  184. * another sequence.
  185. *
  186. * @param $type
  187. * The type of sequence to be validated (ie: either nucleotide or protein).
  188. * @param $sequence
  189. * A string of characters to be validated.
  190. *
  191. * @return
  192. * Return a boolean. 1 if the sequence does not pass the format valifation stage and 0 otherwise.
  193. *
  194. */
  195. function validate_fasta_sequence($type, $sequence) {
  196. if ($type == 'nucleotide') {
  197. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  198. $fastaSeqRegEx = '/[^acgntuACGNTU\n\r]/';
  199. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  200. return TRUE;
  201. } else {
  202. return FALSE;
  203. }
  204. } elseif ($type == 'protein') {
  205. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  206. $fastaSeqRegEx = '/[^acgturykmswbdhvnxACGTURYKMSWBDHVNX\*\-\n\r]/';
  207. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  208. return TRUE;
  209. } else {
  210. return FALSE;
  211. }
  212. }
  213. return FALSE;
  214. }
  215. /**
  216. * Retrieve the regex to capture the Link-out Accession from the Hit Def.
  217. *
  218. * @param $nid
  219. * The node ID of the BLAST database the hit is from.
  220. * @param $options
  221. * An array of options that can be passed to this function. Supported
  222. * options include:
  223. * -
  224. *
  225. * @return
  226. * A PHP regex for use with preg_match to cature the Link-out Accession.
  227. */
  228. function get_blastdb_linkout_regex($node, $options = array()) {
  229. if (empty($node->linkout->regex)) {
  230. switch ($node->linkout->regex_type) {
  231. case 'default':
  232. $regex = '/^(\S+).*/';
  233. break;
  234. case 'genbank':
  235. $regex = '/^gb\|([^\|])*\|.*/';
  236. break;
  237. case 'embl':
  238. $regex = '/^embl\|([^\|])*\|.*/';
  239. break;
  240. case 'swissprot':
  241. $regex = '/^sp\|([^\|])*\|.*/';
  242. break;
  243. }
  244. }
  245. else {
  246. $regex = $node->linkout->regex;
  247. }
  248. return $regex;
  249. }
  250. /**
  251. * Return a list of recent blast jobs to be displayed to the user.
  252. *
  253. * NOTE: The calling function will be expected to do the rendering.
  254. *
  255. * @return
  256. * An array of recent jobs.
  257. */
  258. function get_recent_blast_jobs($programs = array()) {
  259. $filter_jobs = !empty($programs);
  260. // Retrieve any recent jobs from the session variable.
  261. $sid = session_id();
  262. if (isset($_SESSION['all_jobs'][$sid])) {
  263. $jobs = array();
  264. foreach ($_SESSION['all_jobs'][$sid] as $job_id => $job) {
  265. $add = TRUE;
  266. // @TODO: Check that the results are still available.
  267. // This is meant to replace the arbitrary only show jobs executed less than 48 hrs ago.
  268. // Remove jobs from the list that are not of the correct program.
  269. if ($filter_jobs AND !in_array($job['program'], $programs)) {
  270. $add = FALSE;
  271. }
  272. if ($add) {
  273. // Format the query information for display.
  274. // Easiest case: if there is only one query header then show it.
  275. if (sizeof($job['query_defs']) == 1 AND isset($job['query_defs'][0])) {
  276. $job['query_info'] = $job['query_defs'][0];
  277. }
  278. // If we have at least one header then show that along with the count of queries.
  279. elseif (isset($job['query_defs'][0])) {
  280. $job['query_info'] = sizeof($job['query_defs']) . ' queries including "' . $job['query_defs'][0] . '"';
  281. }
  282. // If they provided a sequence with no header.
  283. elseif (empty($job['query_defs'])) {
  284. $job['query_info'] = 'Unnamed Query';
  285. }
  286. // At the very least show the count of queries.
  287. else {
  288. $job['query_info'] = sizeof($job['query_defs']) . ' queries';
  289. }
  290. $jobs[$job_id] = $job;
  291. }
  292. }
  293. return $jobs;
  294. }
  295. else {
  296. return array();
  297. }
  298. }
  299. /**
  300. * Generate an image of HSPs for a given hit.
  301. *
  302. * history:
  303. * 09/23/10 Carson created
  304. * 04/16/12 eksc adapted into POPcorn code
  305. * 03/12/15 deepak Adapted code into Tripal BLAST
  306. * 10/23/15 lacey Fixed deepak's code to be suitable for Drupal.
  307. *
  308. * @param $acc
  309. * target name
  310. * @param $name
  311. * query name, false if none
  312. * @param $tsize
  313. * target size
  314. * @param $qsize
  315. * query size
  316. * @param $hits
  317. * each hit represented in URL as: targetstart_targetend_hspstart_hspend;
  318. * @param $score
  319. * score for each hit
  320. *
  321. * @returm
  322. * A base64 encoded image representing the hit information.
  323. */
  324. function generate_blast_hit_image($acc = '', $scores, $hits, $tsize, $qsize, $name, $hit_name) {
  325. $tok = strtok($hits, ";");
  326. $b_hits = Array();
  327. while ($tok !== false) {
  328. $b_hits[] = $tok;
  329. $tok = strtok(";");
  330. }
  331. // extract score information from score param
  332. $tokscr = strtok($scores, ";");
  333. $b_scores = Array();
  334. while ($tokscr !== false) {
  335. $b_scores[] = $tokscr;
  336. $tokscr = strtok(";");
  337. }
  338. // image measurements
  339. $height = 200 + (count($b_hits) * 16);
  340. $width = 520;
  341. $img = imagecreatetruecolor($width, $height);
  342. $white = imagecolorallocate($img, 255, 255, 255);
  343. $black = imagecolorallocate($img, 0, 0, 0);
  344. $darkgray = imagecolorallocate($img, 100, 100, 100);
  345. $strong = imagecolorallocatealpha($img, 202, 0, 0, 15);
  346. $moderate = imagecolorallocatealpha($img, 204, 102, 0, 20);
  347. $present = imagecolorallocatealpha($img, 204, 204, 0, 35);
  348. $weak = imagecolorallocatealpha($img, 102, 204, 0, 50);
  349. $gray = imagecolorallocate($img, 190, 190, 190);
  350. $lightgray = $white; //imagecolorallocate($img, 230, 230, 230);
  351. imagefill($img, 0, 0, $lightgray);
  352. // Target coordinates
  353. $maxlength = 300;
  354. $t_length = ($tsize > $qsize)
  355. ? $maxlength : $maxlength - 50;
  356. $q_length = ($qsize > $tsize)
  357. ? $maxlength : $maxlength - 50;
  358. $tnormal = $t_length / $tsize;
  359. $qnormal = $q_length / $qsize;
  360. $t_ystart = 30;
  361. $t_yend = $t_ystart + 20;
  362. $t_xstart = 50;
  363. $t_xend = $t_xstart + $t_length;
  364. $t_center = $t_xstart + ($t_length / 2);
  365. // Target labels
  366. $warn = '"'. $hit_name . '"';
  367. imagestring($img, 5, $t_xstart, $t_ystart-20, $acc.$warn, $black);
  368. imagestring($img, 3, 5, $t_ystart+2, "Target", $black);
  369. // Draw bar representing target
  370. imagefilledrectangle($img, $t_xstart, $t_ystart, $t_xend, $t_yend, $gray);
  371. imagerectangle($img, $t_xstart, $t_ystart, $t_xend, $t_yend, $darkgray);
  372. // query coordinates
  373. $q_maxheight = 250;
  374. $q_ystart = $t_yend + 100;
  375. $q_yend = $q_ystart + 20;
  376. $q_xstart = $t_center - $q_length / 2;
  377. $q_xend = $q_xstart + $q_length;
  378. $q_center = ($q_xend + $q_xstart) / 2;
  379. $q_xwidth = $q_xend - $q_xstart;
  380. // Query labels
  381. imagestring($img, 5, $q_xstart, $q_yend+2, $name, $black);
  382. imagestring($img, 3, $q_xstart, $q_ystart+2, 'Query', $black);
  383. // Draw bar representing query
  384. imagefilledrectangle($img, $q_xstart, $q_ystart, $q_xend, $q_yend, $gray);
  385. imagerectangle($img ,$q_xstart, $q_ystart, $q_xend, $q_yend, $darkgray);
  386. // HSP bars will start here
  387. $hsp_bary = $q_yend + 20;
  388. // Draw solids for HSP alignments
  389. for ($ii=count($b_hits)-1; $ii>=0; $ii--) {
  390. // alignment
  391. $cur_hit = $b_hits[$ii];
  392. $cur_score = intval($b_scores[$ii]);
  393. // set color according to score
  394. $cur_color = $darkgray;
  395. if ($cur_score > 200) {
  396. $cur_color = $strong;
  397. }
  398. else if ($cur_score > 80 && $cur_score <= 200) {
  399. $cur_color = $moderate;
  400. }
  401. else if ($cur_score > 50 && $cur_score <= 80) {
  402. $cur_color = $present;
  403. }
  404. else if ($cur_score > 40 && $cur_score <= 50) {
  405. $cur_color = $weak;
  406. }
  407. $t_start = $tnormal * intval(strtok($cur_hit, "_")) + $t_xstart;
  408. $t_end = $tnormal * intval(strtok("_")) + $t_xstart;
  409. $q_start = $qnormal * intval(strtok("_")) + $q_xstart;
  410. $q_end = $qnormal * intval(strtok("_")) + $q_xstart;
  411. $hit1_array = array($t_start, $t_yend, $t_end, $t_yend, $q_end,
  412. $q_ystart, $q_start, $q_ystart);
  413. // HSP coords
  414. imagefilledpolygon($img, $hit1_array, 4, $cur_color);
  415. }//each hit
  416. // Draw lines over fills for HSP alignments
  417. for ($ii=0; $ii<count($b_hits); $ii++) {
  418. // alignment
  419. $cur_hit = $b_hits[$ii];
  420. $t_start = $tnormal * intval(strtok($cur_hit, "_")) + $t_xstart;
  421. $t_end = $tnormal * intval(strtok("_")) + $t_xstart;
  422. $q_start = $qnormal * intval(strtok("_")) + $q_xstart;
  423. $q_end = $qnormal * intval(strtok("_")) + $q_xstart;
  424. $hit1_array = array($t_start, $t_yend, $t_end, $t_yend, $q_end, $q_ystart,
  425. $q_start, $q_ystart,);
  426. imagerectangle($img, $t_start, $t_ystart, $t_end, $t_yend, $black);
  427. imagerectangle($img, $q_start, $q_ystart, $q_end, $q_yend, $black);
  428. imagepolygon ($img, $hit1_array, 4, $black);
  429. // show HSP
  430. imagestring($img, 3, 2, $hsp_bary, ($acc ."HSP" . ($ii + 1)), $black);
  431. $cur_score = intval($b_scores[$ii]);
  432. // set color according to score
  433. $cur_color = $darkgray;
  434. if ($cur_score > 200) {
  435. $cur_color = $strong;
  436. }
  437. else if ($cur_score > 80 && $cur_score <= 200) {
  438. $cur_color = $moderate;
  439. }
  440. else if ($cur_score > 50 && $cur_score <= 80) {
  441. $cur_color = $present;
  442. }
  443. else if ($cur_score > 40 && $cur_score <= 50) {
  444. $cur_color = $weak;
  445. }
  446. imagefilledrectangle($img, $q_start, $hsp_bary, $q_end, $hsp_bary+10, $cur_color);
  447. $hsp_bary += 15;
  448. }//each hit
  449. // Draw the key
  450. $xchart = 390;
  451. $ychart = 10;
  452. $fontsize = 4;
  453. $yinc = 20;
  454. $ywidth = 7;
  455. $xinc = 10;
  456. imagestring($img, 5, $xchart, $ychart - 5, "Bit Scores", $black);
  457. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 1) + $ywidth, ">= 200" , $black);
  458. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 2) + $ywidth, "80 - 200" , $black);
  459. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 3) + $ywidth, "50 - 80" , $black);
  460. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 4) + $ywidth, "40 - 50" , $black);
  461. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 5) + $ywidth, "< 40" , $black);
  462. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 1) + $xinc, $xchart + $yinc, $ychart + ($yinc * 2), $strong);
  463. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 2) + $xinc, $xchart + $yinc, $ychart + ($yinc * 3), $moderate);
  464. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 3) + $xinc, $xchart + $yinc, $ychart + ($yinc * 4), $present);
  465. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 4) + $xinc, $xchart + $yinc, $ychart + ($yinc * 5), $weak);
  466. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 5) + $xinc, $xchart + $yinc, $ychart + ($yinc * 6), $darkgray);
  467. // Now, we have a completed image resource and need to change it to an actual image
  468. // that can be displayed. This is done using imagepng() but unfortuatly that function
  469. // either saves the image to a file or outputs it directly to the screen. Thus, we use
  470. // the following code to capture it and base64 encode it.
  471. ob_start(); // Start buffering the output
  472. imagepng($img, null, 0, PNG_NO_FILTER);
  473. $b64_img = base64_encode(ob_get_contents()); // Get what we've just outputted and base64 it
  474. imagedestroy($img);
  475. ob_end_clean();
  476. return $b64_img;
  477. }