blast_ui.api.inc 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809
  1. <?php
  2. /**
  3. * @file
  4. * Contains more generally applicable functions as well as some meant to help developers
  5. * Plug-in to the BLAST UI functionality
  6. */
  7. /**
  8. * Get a specific BlastDB.
  9. *
  10. * @param $identifiers
  11. * An array of identifiers used to determine which BLAST DB to retrieve.
  12. *
  13. * @return
  14. * A fully-loaded BLAST DB Node
  15. */
  16. function get_blast_database($identifiers) {
  17. $node = FALSE;
  18. if (isset($identifiers['nid'])) {
  19. $node = node_load($identifiers['nid']);
  20. }
  21. elseif (isset($identifiers['name'])) {
  22. $nid = db_query('SELECT nid FROM {blastdb} WHERE name=:name', array(':name' => $identifiers['name']))->fetchField();
  23. $node = node_load($nid);
  24. } elseif (isset($identifiers['path'])) {
  25. $nid = db_query('SELECT nid FROM {blastdb} WHERE path LIKE :path', array(':path' => db_like($identifiers['path']) . '%'))->fetchField();
  26. $node = node_load($nid);
  27. }
  28. return $node;
  29. }
  30. /**
  31. * Returns a list BLAST DATABASE options
  32. *
  33. * @param $type
  34. * The type of BLAST dabases to restrict the list to (ie: n: nucleotide or p: protein)
  35. *
  36. * @return
  37. * An array where the nid is the key and the value is the human-readable name of the option
  38. */
  39. function get_blast_database_options($type) {
  40. global $user;
  41. // Use the Entity API to get a list of BLAST Nodes to load
  42. // We use this function in order respect node access control so that
  43. // administrators can use this module in combination with a node access module
  44. // of their choice to limit access to specific BLAST databases.
  45. $query = new EntityFieldQuery();
  46. $query->entityCondition('entity_type', 'node')
  47. // Restrict to BLASTDB nodes.
  48. ->entityCondition('bundle', 'blastdb')
  49. // Restrict to Published nodes.
  50. ->propertyCondition('status', 1)
  51. // Restrict to nodes the current user has permission to view.
  52. ->addTag('node_access');
  53. $entities = $query->execute();
  54. // Get all BlastDB nodes
  55. $nodes = node_load_multiple(array_keys($entities['node']));
  56. // Support obsolete database type n/p
  57. $obs_type = '';
  58. if ($type == 'protein') {
  59. $obs_type = 'p';
  60. }
  61. else {
  62. $obs_type = 'n';
  63. }
  64. $options = array();
  65. foreach ($nodes as $node) {
  66. if ( isset($node) && isset($node->db_dbtype) ) {
  67. if ( ($node->db_dbtype == $type) OR ($node->db_dbtype == $obs_type) ) {
  68. $options[$node->nid] = $node->db_name;
  69. }
  70. }
  71. }
  72. asort($options);
  73. $options[0] = 'Select a Dataset';
  74. return $options;
  75. }
  76. /**
  77. * Retrieve all the information for a blast job in a standardized node-like format.
  78. *
  79. * @param $job_id
  80. * The non-encoded tripal job_id.
  81. * @retun
  82. * An object describing the blast job.
  83. */
  84. function get_BLAST_job($job_id) {
  85. $blastjob = db_query('SELECT * FROM blastjob WHERE job_id=:id', array(':id' => $job_id))->fetchObject();
  86. $tripal_job = tripal_get_job($job_id);
  87. $job = new stdClass();
  88. $job->job_id = $job_id;
  89. $job->program = $blastjob->blast_program;
  90. $job->options = unserialize($blastjob->options);
  91. $job->date_submitted = $tripal_job->submit_date;
  92. $job->date_started = $tripal_job->start_time;
  93. $job->date_completed = $tripal_job->end_time;
  94. // TARGET BLAST DATABASE.
  95. // If a provided blast database was used then load details.
  96. if ($blastjob->target_blastdb ) {
  97. $job->blastdb = get_blast_database(array('nid' => $blastjob->target_blastdb));
  98. }
  99. // Otherwise the user uploaded their own database so provide what information we can.
  100. else {
  101. $job->blastdb = new stdClass();
  102. $job->blastdb->db_name = 'User Uploaded';
  103. $job->blastdb->db_path = $blastjob->target_file;
  104. $job->blastdb->linkout = new stdClass();
  105. $job->blastdb->linkout->none = TRUE;
  106. if ($job->program == 'blastp' OR $job->program == 'tblastn') {
  107. $job->blastdb->db_dbtype = 'protein';
  108. }
  109. else {
  110. $job->blastdb->db_dbtype = 'nucleotide';
  111. }
  112. }
  113. // FILES.
  114. $job->files = new stdClass();
  115. $job->files->query = $blastjob->query_file;
  116. $job->files->target = $blastjob->target_file;
  117. $job->files->result = new stdClass();
  118. $job->files->result->archive = $blastjob->result_filestub . '.asn';
  119. $job->files->result->xml = $blastjob->result_filestub . '.xml';
  120. $job->files->result->tsv = $blastjob->result_filestub . '.tsv';
  121. $job->files->result->html = $blastjob->result_filestub . '.html';
  122. $job->files->result->gff = $blastjob->result_filestub . '.gff';
  123. return $job;
  124. }
  125. /**
  126. * Run BLAST (should be called from the command-line)
  127. *
  128. * @param $program
  129. * Which BLAST program to run (ie: 'blastn', 'tblastn', tblastx', 'blastp','blastx')
  130. * @param $query
  131. * The full path and filename of the query FASTA file
  132. * @param $database
  133. * The full path and filename prefix (excluding .nhr, .nin, .nsq, etc.)
  134. * @param $output_filestub
  135. * The filename (not including path) to give the results. Should not include file type suffix
  136. * @param $options
  137. * An array of additional option where the key is the name of the option used by
  138. * BLAST (ie: 'num_alignments') and the value is relates to this particular
  139. * BLAST job (ie: 250)
  140. */
  141. function run_BLAST_tripal_job($program, $query, $database, $output_filestub, $options, $job_id = NULL) {
  142. $output_file = $output_filestub . '.asn';
  143. $output_file_xml = $output_filestub . '.xml';
  144. $output_file_tsv = $output_filestub . '.tsv';
  145. $output_file_html = $output_filestub . '.html';
  146. $output_file_gff = $output_filestub . '.gff';
  147. print "\nExecuting $program\n\n";
  148. print "Query: $query\n";
  149. print "Database: $database\n";
  150. print "Results File: $output_file\n";
  151. print "Options:\n";
  152. // Allow administrators to use an absolute path for these commands.
  153. // Defaults to using $PATH.
  154. $blast_path = variable_get('blast_path', '');
  155. $blast_threads = variable_get('blast_threads', 1);
  156. // Strip the extension off the BLAST target
  157. $database = preg_replace("/(.*)\.[pn]\w\w/", '$1', $database);
  158. // The executables:
  159. $program = $blast_path . $program;
  160. $blast_formatter_command = $blast_path . 'blast_formatter';
  161. $blast_cmd = "$program -query '$query' -db '$database' -out '$output_file' -outfmt=11";
  162. if (!empty($options)) {
  163. foreach ($options as $opt => $val) {
  164. if ($val) {
  165. print "\t$opt: $val\n";
  166. $blast_cmd .= " -$opt $val";
  167. }
  168. }
  169. }
  170. // Setting the value of threads by admin page
  171. $blast_cmd .= " -num_threads ".$blast_threads;
  172. print "\nExecuting the following BLAST command:\n" . $blast_cmd . "\n";
  173. system($blast_cmd);
  174. if (!file_exists($output_file)) {
  175. tripal_report_error(
  176. 'blast_ui',
  177. TRIPAL_ERROR,
  178. "BLAST did not complete successfully as is implied by the lack of output file (%file). The command run was @command",
  179. array('%file' => $output_file, '@command' => $blast_cmd),
  180. array('print' => TRUE)
  181. );
  182. return FALSE;
  183. }
  184. print "\nGenerating additional download formats...\n";
  185. print "\tXML\n";
  186. $format_cmd = "$blast_formatter_command -archive $output_file -outfmt 5 -out $output_file_xml";
  187. print "\nExecuting $format_cmd\n\n";
  188. system($format_cmd);
  189. if (!file_exists($output_file_xml)) {
  190. tripal_report_error(
  191. 'blast_ui',
  192. TRIPAL_ERROR,
  193. "Unable to convert BLAST ASN.1 archive to XML (%archive => %file).",
  194. array('%archive' => $output_file, '%file' => $output_file_xml),
  195. array('print' => TRUE)
  196. );
  197. }
  198. print "\tTab-delimited\n";
  199. system("$blast_formatter_command -archive $output_file -outfmt 7 -out $output_file_tsv");
  200. if (!file_exists($output_file_tsv)) {
  201. tripal_report_error(
  202. 'blast_ui',
  203. TRIPAL_WARNING,
  204. "Unable to convert BLAST ASN.1 archive to Tabular Output (%archive => %file).",
  205. array('%archive' => $output_file, '%file' => $output_file_tsv),
  206. array('print' => TRUE)
  207. );
  208. }
  209. print "\tGFF\n";
  210. convert_tsv2gff3($output_file_tsv,$output_file_gff);
  211. if (!file_exists($output_file_gff)) {
  212. tripal_report_error(
  213. 'blast_ui',
  214. TRIPAL_WARNING,
  215. "Unable to convert BLAST Tabular Output to GFF Output (%archive => %file).",
  216. array('%archive' => $output_file, '%file' => $output_file_gff),
  217. array('print' => TRUE)
  218. );
  219. }
  220. print "\tHTML (includes alignments)\n";
  221. system("$blast_formatter_command -archive $output_file -outfmt 0 -out $output_file_html -html");
  222. if (!file_exists($output_file_tsv)) {
  223. tripal_report_error(
  224. 'blast_ui',
  225. TRIPAL_WARNING,
  226. "Unable to convert BLAST ASN.1 archive to HTML Output (%archive => %file).",
  227. array('%archive' => $output_file, '%file' => $output_file_html),
  228. array('print' => TRUE)
  229. );
  230. }
  231. print "\nDone!\n";
  232. }
  233. /**
  234. * FASTA validating parser
  235. *
  236. * A sequence in FASTA format begins with a single-line description, followed
  237. * by lines of sequence data.The description line is distinguished from the
  238. * sequence data by a greater-than (">") symbol in the first column. The word
  239. * following the ">" symbol is the identifier of the sequence, and the rest of
  240. * the line is the description (both are optional). There should be no space
  241. * between the ">" and the first letter of the identifier. The sequence ends
  242. * if another line starting with a ">" appears which indicates the start of
  243. * another sequence.
  244. *
  245. * @param $type
  246. * The type of sequence to be validated (ie: either nucleotide or protein).
  247. * @param $sequence
  248. * A string of characters to be validated.
  249. *
  250. * @return
  251. * Return a boolean. 1 if the sequence does not pass the format valifation stage and 0 otherwise.
  252. *
  253. */
  254. function validate_fasta_sequence($type, $sequence) {
  255. if ($type == 'nucleotide') {
  256. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  257. $fastaSeqRegEx = '/[^ATCGNUKMBVSWDYRHatcgnukmbvswdyrh\n\r]/'; //Includes IUPAC codes.
  258. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  259. return TRUE;
  260. } else {
  261. return FALSE;
  262. }
  263. } elseif ($type == 'protein') {
  264. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  265. $fastaSeqRegEx = '/[^ABCDEFGHIKLMNPQRSTUVWYZXabcdefghiklmnpqrstuvwyzx\*\-\n\r]/';
  266. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  267. return TRUE;
  268. } else {
  269. return FALSE;
  270. }
  271. }
  272. return FALSE;
  273. }
  274. /**
  275. * Retrieve the regex to capture the Link-out Accession from the Hit Def.
  276. *
  277. * @param $nid
  278. * The node ID of the BLAST database the hit is from.
  279. * @param $options
  280. * An array of options that can be passed to this function. Supported
  281. * options include:
  282. * -
  283. *
  284. * @return
  285. * A PHP regex for use with preg_match to cature the Link-out Accession.
  286. */
  287. function get_blastdb_linkout_regex($node, $options = array()) {
  288. if (empty($node->linkout->regex)) {
  289. switch ($node->linkout->regex_type) {
  290. case 'default':
  291. $regex = '/^(\S+).*/';
  292. break;
  293. case 'genbank':
  294. $regex = '/^gb\|([^\|])*\|.*/';
  295. break;
  296. case 'embl':
  297. $regex = '/^embl\|([^\|])*\|.*/';
  298. break;
  299. case 'swissprot':
  300. $regex = '/^sp\|([^\|])*\|.*/';
  301. break;
  302. }
  303. }
  304. else {
  305. $regex = $node->linkout->regex;
  306. }
  307. return $regex;
  308. }
  309. /**
  310. * Return a list of recent blast jobs to be displayed to the user.
  311. *
  312. * @param $programs
  313. * An array of blast programs you want jobs to be displayed for (ie: blastn, blastx, tblastn, blastp)
  314. *
  315. * @return
  316. * An array of recent jobs.
  317. */
  318. function get_recent_blast_jobs($programs = array()) {
  319. $filter_jobs = !empty($programs);
  320. // Retrieve any recent jobs from the session variable.
  321. if (isset($_SESSION['blast_jobs'])) {
  322. $jobs = array();
  323. foreach ($_SESSION['blast_jobs'] as $job_secret) {
  324. $add = TRUE;
  325. $job_id = blast_ui_reveal_secret($job_secret);
  326. $job = get_BLAST_job($job_id);
  327. // @TODO: Check that the results are still available.
  328. // This is meant to replace the arbitrary only show jobs executed less than 48 hrs ago.
  329. // Remove jobs from the list that are not of the correct program.
  330. if ($filter_jobs AND !in_array($job->program, $programs)) {
  331. $add = FALSE;
  332. }
  333. if ($add) {
  334. $job->query_summary = format_query_headers($job->files->query);
  335. $jobs[] = $job;
  336. }
  337. }
  338. return $jobs;
  339. }
  340. else {
  341. return array();
  342. }
  343. }
  344. /**
  345. * Retrieve the number of recent jobs.
  346. */
  347. function get_number_of_recent_jobs() {
  348. if (isset($_SESSION['blast_jobs'])) {
  349. return sizeof($_SESSION['blast_jobs']);
  350. }
  351. return 0;
  352. }
  353. /**
  354. * Summarize a fasta file based on it's headers.
  355. *
  356. * @param $file
  357. * The full path to the FASTA file.
  358. *
  359. * @return
  360. * A string describing the number of sequences and often including the first query header.
  361. */
  362. function format_query_headers($file) {
  363. $headers = array();
  364. exec('grep ">" '.$file, $headers);
  365. // Easiest case: if there is only one query header then show it.
  366. if (sizeof($headers) == 1 AND isset($headers[0])) {
  367. return ltrim($headers[0], '>');
  368. }
  369. // If we have at least one header then show that along with the count of queries.
  370. elseif (isset($headers[0])) {
  371. return sizeof($headers) . ' queries including "' . ltrim($headers[0], '>') . '"';
  372. }
  373. // If they provided a sequence with no header.
  374. elseif (empty($headers)) {
  375. return 'Unnamed Query';
  376. }
  377. // At the very least show the count of queries.
  378. else {
  379. return sizeof($headers) . ' queries';
  380. }
  381. }
  382. /**
  383. * Sort recent blast jobs by the date they were submitted.
  384. * Ascending is in order of submission.
  385. *
  386. * THIS FUNCTION SHOULD BY USED BY USORT().
  387. */
  388. function sort_blast_jobs_by_date_submitted_asc($a, $b) {
  389. return ($a->date_submitted - $b->date_submitted);
  390. }
  391. /**
  392. * Sort recent blast jobs by the date they were submitted.
  393. * Descending is most recent first.
  394. *
  395. * THIS FUNCTION SHOULD BY USED BY USORT().
  396. */
  397. function sort_blast_jobs_by_date_submitted_desc($a, $b) {
  398. return ($b->date_submitted - $a->date_submitted);
  399. }
  400. /**
  401. * Generate an image of HSPs for a given hit.
  402. *
  403. * history:
  404. * 09/23/10 Carson created
  405. * 04/16/12 eksc adapted into POPcorn code
  406. * 03/12/15 deepak Adapted code into Tripal BLAST
  407. * 10/23/15 lacey Fixed deepak's code to be suitable for Drupal.
  408. *
  409. * @param $acc
  410. * target name
  411. * @param $name
  412. * query name, false if none
  413. * @param $tsize
  414. * target size
  415. * @param $qsize
  416. * query size
  417. * @param $hits
  418. * each hit represented in URL as: targetstart_targetend_hspstart_hspend;
  419. * @param $score
  420. * score for each hit
  421. *
  422. * @returm
  423. * A base64 encoded image representing the hit information.
  424. */
  425. function generate_blast_hit_image($acc = '', $scores, $hits, $tsize, $qsize, $name, $hit_name) {
  426. $tok = strtok($hits, ";");
  427. $b_hits = Array();
  428. while ($tok !== false) {
  429. $b_hits[] = $tok;
  430. $tok = strtok(";");
  431. }
  432. // extract score information from score param
  433. $tokscr = strtok($scores, ";");
  434. $b_scores = Array();
  435. while ($tokscr !== false) {
  436. $b_scores[] = $tokscr;
  437. $tokscr = strtok(";");
  438. }
  439. // image measurements
  440. $height = 200 + (count($b_hits) * 16);
  441. $width = 520;
  442. $img = imagecreatetruecolor($width, $height);
  443. $white = imagecolorallocate($img, 255, 255, 255);
  444. $black = imagecolorallocate($img, 0, 0, 0);
  445. $darkgray = imagecolorallocate($img, 100, 100, 100);
  446. $strong = imagecolorallocatealpha($img, 202, 0, 0, 15);
  447. $moderate = imagecolorallocatealpha($img, 204, 102, 0, 20);
  448. $present = imagecolorallocatealpha($img, 204, 204, 0, 35);
  449. $weak = imagecolorallocatealpha($img, 102, 204, 0, 50);
  450. $gray = imagecolorallocate($img, 190, 190, 190);
  451. $lightgray = $white; //imagecolorallocate($img, 230, 230, 230);
  452. imagefill($img, 0, 0, $lightgray);
  453. // Target coordinates
  454. $maxlength = 300;
  455. $t_length = ($tsize > $qsize)
  456. ? $maxlength : $maxlength - 50;
  457. $q_length = ($qsize > $tsize)
  458. ? $maxlength : $maxlength - 50;
  459. $tnormal = $t_length / $tsize;
  460. $qnormal = $q_length / $qsize;
  461. $t_ystart = 30;
  462. $t_yend = $t_ystart + 20;
  463. $t_xstart = 50;
  464. $t_xend = $t_xstart + $t_length;
  465. $t_center = $t_xstart + ($t_length / 2);
  466. // Target labels
  467. $warn = '"'. $hit_name . '"';
  468. imagestring($img, 5, $t_xstart, $t_ystart-20, $acc.$warn, $black);
  469. imagestring($img, 3, 5, $t_ystart+2, "Target", $black);
  470. // Draw bar representing target
  471. imagefilledrectangle($img, $t_xstart, $t_ystart, $t_xend, $t_yend, $gray);
  472. imagerectangle($img, $t_xstart, $t_ystart, $t_xend, $t_yend, $darkgray);
  473. // query coordinates
  474. $q_maxheight = 250;
  475. $q_ystart = $t_yend + 100;
  476. $q_yend = $q_ystart + 20;
  477. $q_xstart = $t_center - $q_length / 2;
  478. $q_xend = $q_xstart + $q_length;
  479. $q_center = ($q_xend + $q_xstart) / 2;
  480. $q_xwidth = $q_xend - $q_xstart;
  481. // Query labels
  482. imagestring($img, 5, $q_xstart, $q_yend+2, $name, $black);
  483. imagestring($img, 3, $q_xstart, $q_ystart+2, 'Query', $black);
  484. // Draw bar representing query
  485. imagefilledrectangle($img, $q_xstart, $q_ystart, $q_xend, $q_yend, $gray);
  486. imagerectangle($img ,$q_xstart, $q_ystart, $q_xend, $q_yend, $darkgray);
  487. // HSP bars will start here
  488. $hsp_bary = $q_yend + 20;
  489. // Draw solids for HSP alignments
  490. for ($ii=count($b_hits)-1; $ii>=0; $ii--) {
  491. // alignment
  492. $cur_hit = $b_hits[$ii];
  493. $cur_score = intval($b_scores[$ii]);
  494. // set color according to score
  495. $cur_color = $darkgray;
  496. if ($cur_score > 200) {
  497. $cur_color = $strong;
  498. }
  499. else if ($cur_score > 80 && $cur_score <= 200) {
  500. $cur_color = $moderate;
  501. }
  502. else if ($cur_score > 50 && $cur_score <= 80) {
  503. $cur_color = $present;
  504. }
  505. else if ($cur_score > 40 && $cur_score <= 50) {
  506. $cur_color = $weak;
  507. }
  508. $t_start = $tnormal * intval(strtok($cur_hit, "_")) + $t_xstart;
  509. $t_end = $tnormal * intval(strtok("_")) + $t_xstart;
  510. $q_start = $qnormal * intval(strtok("_")) + $q_xstart;
  511. $q_end = $qnormal * intval(strtok("_")) + $q_xstart;
  512. $hit1_array = array($t_start, $t_yend, $t_end, $t_yend, $q_end,
  513. $q_ystart, $q_start, $q_ystart);
  514. // HSP coords
  515. imagefilledpolygon($img, $hit1_array, 4, $cur_color);
  516. }//each hit
  517. // Draw lines over fills for HSP alignments
  518. for ($ii=0; $ii<count($b_hits); $ii++) {
  519. // alignment
  520. $cur_hit = $b_hits[$ii];
  521. $t_start = $tnormal * intval(strtok($cur_hit, "_")) + $t_xstart;
  522. $t_end = $tnormal * intval(strtok("_")) + $t_xstart;
  523. $q_start = $qnormal * intval(strtok("_")) + $q_xstart;
  524. $q_end = $qnormal * intval(strtok("_")) + $q_xstart;
  525. $hit1_array = array($t_start, $t_yend, $t_end, $t_yend, $q_end, $q_ystart,
  526. $q_start, $q_ystart,);
  527. imagerectangle($img, $t_start, $t_ystart, $t_end, $t_yend, $black);
  528. imagerectangle($img, $q_start, $q_ystart, $q_end, $q_yend, $black);
  529. imagepolygon ($img, $hit1_array, 4, $black);
  530. // show HSP
  531. imagestring($img, 3, 2, $hsp_bary, ($acc ."HSP" . ($ii + 1)), $black);
  532. $cur_score = intval($b_scores[$ii]);
  533. // set color according to score
  534. $cur_color = $darkgray;
  535. if ($cur_score > 200) {
  536. $cur_color = $strong;
  537. }
  538. else if ($cur_score > 80 && $cur_score <= 200) {
  539. $cur_color = $moderate;
  540. }
  541. else if ($cur_score > 50 && $cur_score <= 80) {
  542. $cur_color = $present;
  543. }
  544. else if ($cur_score > 40 && $cur_score <= 50) {
  545. $cur_color = $weak;
  546. }
  547. imagefilledrectangle($img, $q_start, $hsp_bary, $q_end, $hsp_bary+10, $cur_color);
  548. $hsp_bary += 15;
  549. }//each hit
  550. // Draw the key
  551. $xchart = 390;
  552. $ychart = 10;
  553. $fontsize = 4;
  554. $yinc = 20;
  555. $ywidth = 7;
  556. $xinc = 10;
  557. imagestring($img, 5, $xchart, $ychart - 5, "Bit Scores", $black);
  558. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 1) + $ywidth, ">= 200" , $black);
  559. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 2) + $ywidth, "80 - 200" , $black);
  560. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 3) + $ywidth, "50 - 80" , $black);
  561. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 4) + $ywidth, "40 - 50" , $black);
  562. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 5) + $ywidth, "< 40" , $black);
  563. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 1) + $xinc, $xchart + $yinc, $ychart + ($yinc * 2), $strong);
  564. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 2) + $xinc, $xchart + $yinc, $ychart + ($yinc * 3), $moderate);
  565. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 3) + $xinc, $xchart + $yinc, $ychart + ($yinc * 4), $present);
  566. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 4) + $xinc, $xchart + $yinc, $ychart + ($yinc * 5), $weak);
  567. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 5) + $xinc, $xchart + $yinc, $ychart + ($yinc * 6), $darkgray);
  568. // Now, we have a completed image resource and need to change it to an actual image
  569. // that can be displayed. This is done using imagepng() but unfortuatly that function
  570. // either saves the image to a file or outputs it directly to the screen. Thus, we use
  571. // the following code to capture it and base64 encode it.
  572. ob_start(); // Start buffering the output
  573. imagepng($img, null, 0, PNG_NO_FILTER);
  574. $b64_img = base64_encode(ob_get_contents()); // Get what we've just outputted and base64 it
  575. imagedestroy($img);
  576. ob_end_clean();
  577. return $b64_img;
  578. }
  579. /**
  580. * Convert tsv blast output to gff output file.
  581. *
  582. * Created by Sofia Robb
  583. * 09/15/2016
  584. *
  585. * The subject (hit) will be the source feature.
  586. * The query will be the target.
  587. *
  588. * @todo: find a more efficient way since currently the first loop stores all the blast
  589. * results into an array and then the second loop prints them.
  590. *
  591. * @param $blast_tsv
  592. * The name of the blast tsv output file.
  593. * @param $blast_gff
  594. * The name of the blast gff output file.
  595. */
  596. function convert_tsv2gff3($blast_tsv,$blast_gff){
  597. // Open a new file for writting the gff.
  598. $gff = fopen($blast_gff,"w");
  599. fwrite($gff,"##gff-version 3\n");
  600. // Open the TSV file to read from.
  601. $tsv = fopen($blast_tsv, "r") or die("Unable to open tsv file!");
  602. // For each line in the TSV file...
  603. $results = array();
  604. while(!feof($tsv)) {
  605. $line = fgets($tsv);
  606. $line = rtrim($line);
  607. // Skip the line if it's empty.
  608. if (preg_match('/^#/',$line) or preg_match('/^\s*$/',$line)){
  609. continue;
  610. }
  611. // Each line has the following parts:
  612. // 0: query id,
  613. // 1: subject id,
  614. // 2: % identity,
  615. // 3: alignment length,
  616. // 4: mismatches,
  617. // 5: gap opens,
  618. // 6: q. start,
  619. // 7: q. end,
  620. // 8: s. start,
  621. // 9: s. end,
  622. // 10: evalue,
  623. // 11: bit score
  624. $parts = preg_split('/\t/', $line);
  625. // Assign the important parts of the time to readable variables.
  626. $hitname = $parts[1];
  627. $queryId = $parts[0];
  628. $subjectStart = $parts[8];
  629. $subjectEnd = $parts[9];
  630. $queryStart = $part[6];
  631. $queryEnd = $parts[7];
  632. $eval = $parts[10];
  633. $hspInfo = "$queryId,$subjectStart,$subjectEnd,$queryStart,$queryEnd";
  634. $results[$hitname][$hspInfo] = $eval;
  635. } // end tsv file while
  636. $IDs = array();
  637. $count = 0;
  638. $last_s = NULL;
  639. // Need to go thru each line of tsv to find the first and last hsp of a hit.
  640. // Need to get the smallest and largest coordinate for the parent feature
  641. foreach ($results as $s => $hspInfoArray) {
  642. $count++;
  643. $hsp = 0;
  644. foreach ($hspInfoArray as $hspInfoStr => $e) {
  645. list($q,$ss,$se,$qs,$qe) = preg_split('/,/',$hspInfoStr);
  646. if ($s != NULL and $s != $last_s ) {
  647. $hsp=0;
  648. }
  649. $IDs["$s,$q"]['count']=$count;
  650. $q_strand = '+';
  651. if ($qs > $qe) {
  652. list($qs,$qe) = array($qe,$qs);
  653. $q_strand = '-';
  654. }
  655. $IDs["$s,$q"]['strand']='+';
  656. list($start,$end) = array($ss,$se);
  657. if($ss > $se) {
  658. list($start,$end) = array($se,$ss);
  659. $IDs["$s,$q"]['strand']='-';
  660. }
  661. if (!array_key_exists('SS',$IDs["$s,$q"]) or $ss < $IDs["$s,$q"]['SS']) {
  662. $IDs["$s,$q"]['SS'] = $ss;
  663. }
  664. if (!array_key_exists('SE',$IDs["$s,$q"]) or $se > $IDs["$s,$q"]['SE']) {
  665. $IDs["$s,$q"]['SE'] = $se;
  666. }
  667. if (!array_key_exists('E',$IDs["$s,$q"]) or $e < $IDs["$s,$q"]['E']) {
  668. $IDs["$s,$q"]['E'] = $e;
  669. }
  670. $hsp++;
  671. $IDs["$s,$q"]['HSPs'][] = join("\t", array($s, "BLASTRESULT" , "match_part" , $start , $end , $e , $IDs["$s,$q"]['strand'] , '.' , "ID=$s.$count.$hsp;Parent=$s.$count;Target=$q $qs $qe $q_strand"));
  672. $last_s = $s;
  673. }
  674. }
  675. // Now can print a parent gff line and all the children.
  676. // Note: the evalues seem to be sorted properly without actually sorting them.
  677. // @todo: need to make sure this is always true.
  678. foreach ($IDs as $sq => $value ) {
  679. list ($s,$q) = preg_split('/,/' , $sq);
  680. $evalue = $IDs["$s,$q"]['E'];
  681. $count = $IDs["$s,$q"]['count'];
  682. $parent = join ("\t", array($s, "BLASTRESULT" , "match" , $IDs["$s,$q"]['SS'] , $IDs["$s,$q"]['SE'] , $IDs["$s,$q"]['E'] , $IDs["$s,$q"]['strand'] , '.' , "ID=$s.$count;Name=$q($evalue)")) . "\n";
  683. $child = join ("\n",$IDs[$sq]['HSPs']) . "\n";
  684. fwrite($gff,$parent);
  685. fwrite($gff,$child);
  686. }
  687. // Close the files.
  688. fclose($tsv);
  689. fclose($gff);
  690. }