blast_ui.api.inc 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970
  1. <?php
  2. /**
  3. * @file
  4. * Contains more generally applicable functions as well as some meant to help developers
  5. * Plug-in to the BLAST UI functionality
  6. */
  7. /**
  8. * Get a specific BlastDB.
  9. *
  10. * @param $identifiers
  11. * An array of identifiers used to determine which BLAST DB to retrieve.
  12. *
  13. * @return
  14. * A fully-loaded BLAST DB Node
  15. */
  16. function get_blast_database($identifiers) {
  17. $node = FALSE;
  18. if (isset($identifiers['nid'])) {
  19. $node = node_load($identifiers['nid']);
  20. }
  21. elseif (isset($identifiers['name'])) {
  22. $nid = db_query('SELECT nid FROM {blastdb} WHERE name=:name', array(':name' => $identifiers['name']))->fetchField();
  23. $node = node_load($nid);
  24. } elseif (isset($identifiers['path'])) {
  25. $nid = db_query('SELECT nid FROM {blastdb} WHERE path LIKE :path', array(':path' => db_like($identifiers['path']) . '%'))->fetchField();
  26. $node = node_load($nid);
  27. }
  28. return $node;
  29. }
  30. /**
  31. * Returns a list BLAST DATABASE options
  32. *
  33. * @param $type
  34. * The type of BLAST dabases to restrict the list to (ie: n: nucleotide or p: protein)
  35. *
  36. * @return
  37. * An array where the nid is the key and the value is the human-readable name of the option
  38. */
  39. function get_blast_database_options($type) {
  40. global $user;
  41. // Get all BlastDB nodes
  42. $nodes = get_blast_database_nodes();
  43. // Support obsolete database type n/p
  44. $obs_type = '';
  45. if ($type == 'protein') {
  46. $obs_type = 'p';
  47. }
  48. else {
  49. $obs_type = 'n';
  50. }
  51. $options = array();
  52. foreach ($nodes as $node) {
  53. if ( isset($node) && isset($node->db_dbtype) ) {
  54. if ( ($node->db_dbtype == $type) OR ($node->db_dbtype == $obs_type) ) {
  55. $options[$node->nid] = $node->db_name;
  56. }
  57. }
  58. }
  59. // Sort alphabetically
  60. asort($options);
  61. return $options;
  62. }
  63. /**
  64. * Returns all blast database nodes.
  65. *
  66. * @return
  67. * An array of nodes.
  68. */
  69. function get_blast_database_nodes() {
  70. // Use the Entity API to get a list of BLAST Nodes to load
  71. // We use this function in order respect node access control so that
  72. // administrators can use this module in combination with a node access module
  73. // of their choice to limit access to specific BLAST databases.
  74. $query = new EntityFieldQuery();
  75. $query->entityCondition('entity_type', 'node')
  76. // Restrict to BLASTDB nodes.
  77. ->entityCondition('bundle', 'blastdb')
  78. // Restrict to Published nodes.
  79. ->propertyCondition('status', 1)
  80. // Restrict to nodes the current user has permission to view.
  81. ->addTag('node_access');
  82. $entities = $query->execute();
  83. // Get all BlastDB nodes
  84. return node_load_multiple(array_keys($entities['node']));
  85. }
  86. /**
  87. * Retrieve all the information for a blast job in a standardized node-like format.
  88. *
  89. * @param $job_id
  90. * The non-encoded tripal job_id.
  91. * @retun
  92. * An object describing the blast job.
  93. */
  94. function get_BLAST_job($job_id) {
  95. $blastjob = db_query('SELECT * FROM blastjob WHERE job_id=:id', array(':id' => $job_id))->fetchObject();
  96. if (!$blastjob) {
  97. return false;
  98. }
  99. $tripal_job = tripal_get_job($job_id);
  100. $job = new stdClass();
  101. $job->job_id = $job_id;
  102. $job->program = $blastjob->blast_program;
  103. $job->options = unserialize($blastjob->options);
  104. $job->date_submitted = $tripal_job->submit_date;
  105. $job->date_started = $tripal_job->start_time;
  106. $job->date_completed = $tripal_job->end_time;
  107. // TARGET BLAST DATABASE.
  108. // If a provided blast database was used then load details.
  109. if ($blastjob->target_blastdb ) {
  110. $job->blastdb = get_blast_database(array('nid' => $blastjob->target_blastdb));
  111. }
  112. // Otherwise the user uploaded their own database so provide what information we can.
  113. else {
  114. $job->blastdb = new stdClass();
  115. $job->blastdb->db_name = 'User Uploaded';
  116. $job->blastdb->db_path = $blastjob->target_file;
  117. $job->blastdb->linkout = new stdClass();
  118. $job->blastdb->linkout->none = TRUE;
  119. if ($job->program == 'blastp' OR $job->program == 'tblastn') {
  120. $job->blastdb->db_dbtype = 'protein';
  121. }
  122. else {
  123. $job->blastdb->db_dbtype = 'nucleotide';
  124. }
  125. }
  126. // FILES.
  127. $job->files = new stdClass();
  128. $job->files->query = $blastjob->query_file;
  129. $job->files->target = $blastjob->target_file;
  130. $job->files->result = new stdClass();
  131. $job->files->result->archive = $blastjob->result_filestub . '.asn';
  132. $job->files->result->xml = $blastjob->result_filestub . '.xml';
  133. $job->files->result->tsv = $blastjob->result_filestub . '.tsv';
  134. $job->files->result->html = $blastjob->result_filestub . '.html';
  135. $job->files->result->gff = $blastjob->result_filestub . '.gff';
  136. return $job;
  137. }
  138. /**
  139. * Run BLAST (should be called from the command-line)
  140. *
  141. * @param $program
  142. * Which BLAST program to run (ie: 'blastn', 'tblastn', tblastx', 'blastp','blastx')
  143. * @param $query
  144. * The full path and filename of the query FASTA file
  145. * @param $database
  146. * The full path and filename prefix (excluding .nhr, .nin, .nsq, etc.)
  147. * @param $output_filestub
  148. * The filename (not including path) to give the results. Should not include file type suffix
  149. * @param $options
  150. * An array of additional option where the key is the name of the option used by
  151. * BLAST (ie: 'num_alignments') and the value is relates to this particular
  152. * BLAST job (ie: 250)
  153. */
  154. function run_BLAST_tripal_job($program, $query, $database, $output_filestub, $options, $job_id = NULL) {
  155. $output_file = $output_filestub . '.asn';
  156. $output_file_xml = $output_filestub . '.xml';
  157. $output_file_tsv = $output_filestub . '.tsv';
  158. $output_file_html = $output_filestub . '.html';
  159. $output_file_gff = $output_filestub . '.gff';
  160. print "\nExecuting $program\n\n";
  161. print "Query: $query\n";
  162. print "Database: $database\n";
  163. print "Results File: $output_file\n";
  164. print "Options:\n";
  165. // Allow administrators to use an absolute path for these commands.
  166. // Defaults to using $PATH.
  167. $blast_path = variable_get('blast_path', '');
  168. $blast_threads = variable_get('blast_threads', 1);
  169. // Strip the extension off the BLAST target
  170. $suffix = ['.ndb', '.nhr', '.nin', '.not', '.nsq', '.ntf', '.nto',
  171. '.pdb', '.phr', '.pin', '.pot', '.psq', '.ptf', '.pto'];
  172. $database = str_replace($suffix,'',$database);
  173. // Check that the database exists before trying to execute the job.
  174. if (!(file_exists($database . '.nsq') OR file_exists($database . '.psq'))) {
  175. tripal_report_error(
  176. 'blast_ui',
  177. TRIPAL_ERROR,
  178. "Unable to find the BLAST database (ie: @db). Please ensure you have supplied the absolute path not including the file format endings.",
  179. array('@db' => $database),
  180. array('print' => TRUE)
  181. );
  182. return FALSE;
  183. }
  184. // The BLAST executeable.
  185. $program = $blast_path . $program;
  186. if (!file_exists($program)) {
  187. tripal_report_error(
  188. 'blast_ui',
  189. TRIPAL_ERROR,
  190. "Unable to find the BLAST executable (ie: /usr/bin/blastn). This can be changed in the admin settings; you supplied: @command",
  191. array('@command' => $program),
  192. array('print' => TRUE)
  193. );
  194. return FALSE;
  195. }
  196. // The blast db formatter executable.
  197. $blast_formatter_command = $blast_path . 'blast_formatter';
  198. if (!file_exists($blast_formatter_command)) {
  199. tripal_report_error(
  200. 'blast_ui',
  201. TRIPAL_ERROR,
  202. "Unable to find the BLAST Formatter executable (ie: /usr/bin/blast_formatter). This can be changed in the admin settings; you supplied: @command",
  203. array('@command' => $blast_formatter_command),
  204. array('print' => TRUE)
  205. );
  206. return FALSE;
  207. }
  208. // Note: all variables are escaped (adds single quotes around their values) for security reasons.
  209. $blast_cmd = escapeshellarg($program) . ' -query ' . escapeshellarg($query) . ' -db ' . escapeshellarg($database) . ' -out ' . escapeshellarg($output_file) . ' -outfmt=11';
  210. if (!empty($options)) {
  211. foreach ($options as $opt => $val) {
  212. $val = trim($val);
  213. if (!empty($val)) {
  214. print "\t$opt: $val\n";
  215. // We want to escape all the option values since they were supplied via
  216. // user input. These values should also have been checked in the
  217. // advanced form _validate functions but this adds an extra layer of
  218. // protection.
  219. $blast_cmd .= ' -' . escapeshellarg($opt) . ' ' . escapeshellarg($val);
  220. }
  221. }
  222. }
  223. // Setting the value of threads by admin page
  224. $blast_cmd .= ' -num_threads ' . escapeshellarg($blast_threads);
  225. print "\nExecuting the following BLAST command:\n" . $blast_cmd . "\n";
  226. system($blast_cmd);
  227. if (!file_exists($output_file)) {
  228. tripal_report_error(
  229. 'blast_ui',
  230. TRIPAL_ERROR,
  231. "BLAST did not complete successfully as is implied by the lack of output file (%file). The command run was @command",
  232. array('%file' => $output_file, '@command' => $blast_cmd),
  233. array('print' => TRUE)
  234. );
  235. return FALSE;
  236. }
  237. print "\nGenerating additional download formats...\n";
  238. print "\tXML\n";
  239. $format_cmd = escapeshellarg($blast_formatter_command) . ' -archive ' . escapeshellarg($output_file) . ' -outfmt 5 -out ' . escapeshellarg($output_file_xml);
  240. print "\t\tExecuting $format_cmd\n\n";
  241. system($format_cmd);
  242. if (!file_exists($output_file_xml)) {
  243. tripal_report_error(
  244. 'blast_ui',
  245. TRIPAL_ERROR,
  246. "Unable to convert BLAST ASN.1 archive to XML (%archive => %file).",
  247. array('%archive' => $output_file, '%file' => $output_file_xml),
  248. array('print' => TRUE)
  249. );
  250. }
  251. print "\tTab-delimited\n";
  252. $format_cmd = escapeshellarg($blast_formatter_command) . ' -archive ' . escapeshellarg($output_file) . ' -outfmt 7 -out ' . escapeshellarg($output_file_tsv);
  253. print "\t\tExecuting $format_cmd\n\n";
  254. system($format_cmd);
  255. if (!file_exists($output_file_tsv)) {
  256. tripal_report_error(
  257. 'blast_ui',
  258. TRIPAL_WARNING,
  259. "Unable to convert BLAST ASN.1 archive to Tabular Output (%archive => %file).",
  260. array('%archive' => $output_file, '%file' => $output_file_tsv),
  261. array('print' => TRUE)
  262. );
  263. }
  264. print "\tGFF\n";
  265. convert_tsv2gff3($output_file_tsv,$output_file_gff);
  266. if (!file_exists($output_file_gff)) {
  267. tripal_report_error(
  268. 'blast_ui',
  269. TRIPAL_WARNING,
  270. "Unable to convert BLAST Tabular Output to GFF Output (%archive => %file).",
  271. array('%archive' => $output_file, '%file' => $output_file_gff),
  272. array('print' => TRUE)
  273. );
  274. }
  275. print "\tHTML (includes alignments)\n";
  276. $format_cmd = escapeshellarg($blast_formatter_command) . ' -archive ' . escapeshellarg($output_file) . ' -outfmt 0 -out ' . escapeshellarg($output_file_html) . ' -html';
  277. print "\t\tExecuting $format_cmd\n\n";
  278. system($format_cmd);
  279. if (!file_exists($output_file_tsv)) {
  280. tripal_report_error(
  281. 'blast_ui',
  282. TRIPAL_WARNING,
  283. "Unable to convert BLAST ASN.1 archive to HTML Output (%archive => %file).",
  284. array('%archive' => $output_file, '%file' => $output_file_html),
  285. array('print' => TRUE)
  286. );
  287. }
  288. print "\nDone!\n";
  289. }
  290. /**
  291. * FASTA validating parser
  292. *
  293. * A sequence in FASTA format begins with a single-line description, followed
  294. * by lines of sequence data.The description line is distinguished from the
  295. * sequence data by a greater-than (">") symbol in the first column. The word
  296. * following the ">" symbol is the identifier of the sequence, and the rest of
  297. * the line is the description (both are optional). There should be no space
  298. * between the ">" and the first letter of the identifier. The sequence ends
  299. * if another line starting with a ">" appears which indicates the start of
  300. * another sequence.
  301. *
  302. * @param $type
  303. * The type of sequence to be validated (ie: either nucleotide or protein).
  304. * @param $sequence
  305. * A string of characters to be validated.
  306. *
  307. * @return
  308. * Return a boolean. 1 if the sequence does not pass the format valifation stage and 0 otherwise.
  309. *
  310. */
  311. function validate_fasta_sequence($type, $sequence) {
  312. //Includes IUPAC codes.
  313. $fastaSeqRegEx = ($type == 'nucleotide')
  314. ? '/^[ATCGNUKMBVSWDYRHatcgnukmbvswdyrh\[\/\]\s\n\r]*$/'
  315. : '/^[acdefghiklmnpqrstvwyACDEFGHIKLMNPQRSTVWY\*\-\s\n\r]*$/';
  316. $defRegEx = '/^>\S.*/';
  317. // For each line of the sequence.
  318. foreach (explode("\n", $sequence) as $line) {
  319. // Is this a definition line?
  320. if ($line[0] == '>') {
  321. if (!preg_match($defRegEx, $line)) {
  322. return FALSE;
  323. }
  324. }
  325. // Otherwise it's a sequence line
  326. else {
  327. if (!preg_match($fastaSeqRegEx, $line)) {
  328. return FALSE;
  329. }
  330. }
  331. }
  332. return TRUE;
  333. }
  334. /**
  335. * Retrieve the regex to capture the Link-out Accession from the Hit Def.
  336. *
  337. * @param $nid
  338. * The node ID of the BLAST database the hit is from.
  339. * @param $options
  340. * An array of options that can be passed to this function. Supported
  341. * options include:
  342. * -
  343. *
  344. * @return
  345. * A PHP regex for use with preg_match to cature the Link-out Accession.
  346. */
  347. function get_blastdb_linkout_regex($node, $options = array()) {
  348. if (empty($node->linkout->regex)) {
  349. switch ($node->linkout->regex_type) {
  350. case 'default':
  351. $regex = '/^(\S+).*/';
  352. break;
  353. case 'genbank':
  354. $regex = '/^gb\|([^\|])*\|.*/';
  355. break;
  356. case 'embl':
  357. $regex = '/^embl\|([^\|])*\|.*/';
  358. break;
  359. case 'swissprot':
  360. $regex = '/^sp\|([^\|])*\|.*/';
  361. break;
  362. }
  363. }
  364. else {
  365. $regex = $node->linkout->regex;
  366. }
  367. return $regex;
  368. }
  369. /**
  370. * Return a list of recent blast jobs to be displayed to the user.
  371. *
  372. * @param $programs
  373. * An array of blast programs you want jobs to be displayed for (ie: blastn, blastx, tblastn, blastp)
  374. *
  375. * @return
  376. * An array of recent jobs.
  377. */
  378. function get_recent_blast_jobs($programs = array()) {
  379. $filter_jobs = !empty($programs);
  380. // Retrieve any recent jobs from the session variable.
  381. if (isset($_SESSION['blast_jobs'])) {
  382. $jobs = array();
  383. foreach ($_SESSION['blast_jobs'] as $job_secret) {
  384. $add = TRUE;
  385. $job_id = blast_ui_reveal_secret($job_secret);
  386. if ($job = get_BLAST_job($job_id)) {
  387. // @TODO: Check that the results are still available.
  388. // This is meant to replace the arbitrary only show jobs executed less than 48 hrs ago.
  389. // Remove jobs from the list that are not of the correct program.
  390. if ($filter_jobs AND !in_array($job->program, $programs)) {
  391. $add = FALSE;
  392. }
  393. if ($add) {
  394. $job->query_summary = format_query_headers($job->files->query);
  395. $jobs[] = $job;
  396. }
  397. }
  398. }
  399. return $jobs;
  400. }
  401. else {
  402. return array();
  403. }
  404. }
  405. /**
  406. * Retrieve the number of recent jobs.
  407. */
  408. function get_number_of_recent_jobs() {
  409. if (isset($_SESSION['blast_jobs'])) {
  410. return sizeof($_SESSION['blast_jobs']);
  411. }
  412. return 0;
  413. }
  414. /**
  415. * Summarize a fasta file based on it's headers.
  416. *
  417. * @param $file
  418. * The full path to the FASTA file.
  419. *
  420. * @return
  421. * A string describing the number of sequences and often including the first query header.
  422. */
  423. function format_query_headers($file) {
  424. $headers = array();
  425. exec('grep ">" ' . escapeshellarg($file), $headers);
  426. // Easiest case: if there is only one query header then show it.
  427. if (sizeof($headers) == 1 AND isset($headers[0])) {
  428. return ltrim($headers[0], '>');
  429. }
  430. // If we have at least one header then show that along with the count of queries.
  431. elseif (isset($headers[0])) {
  432. return sizeof($headers) . ' queries including "' . ltrim($headers[0], '>') . '"';
  433. }
  434. // If they provided a sequence with no header.
  435. elseif (empty($headers)) {
  436. return 'Unnamed Query';
  437. }
  438. // At the very least show the count of queries.
  439. else {
  440. return sizeof($headers) . ' queries';
  441. }
  442. }
  443. /**
  444. * Sort recent blast jobs by the date they were submitted.
  445. * Ascending is in order of submission.
  446. *
  447. * THIS FUNCTION SHOULD BY USED BY USORT().
  448. */
  449. function sort_blast_jobs_by_date_submitted_asc($a, $b) {
  450. return ($a->date_submitted - $b->date_submitted);
  451. }
  452. /**
  453. * Sort recent blast jobs by the date they were submitted.
  454. * Descending is most recent first.
  455. *
  456. * THIS FUNCTION SHOULD BY USED BY USORT().
  457. */
  458. function sort_blast_jobs_by_date_submitted_desc($a, $b) {
  459. return ($b->date_submitted - $a->date_submitted);
  460. }
  461. /**
  462. * Generate an image of HSPs for a given hit.
  463. *
  464. * history:
  465. * 09/23/10 Carson created
  466. * 04/16/12 eksc adapted into POPcorn code
  467. * 03/12/15 deepak Adapted code into Tripal BLAST
  468. * 10/23/15 lacey Fixed deepak's code to be suitable for Drupal.
  469. *
  470. * @param $acc
  471. * target name
  472. * @param $name
  473. * query name, false if none
  474. * @param $tsize
  475. * target size
  476. * @param $qsize
  477. * query size
  478. * @param $hits
  479. * each hit represented in URL as: targetstart_targetend_hspstart_hspend;
  480. * @param $score
  481. * score for each hit
  482. *
  483. * @returm
  484. * A base64 encoded image representing the hit information.
  485. */
  486. function generate_blast_hit_image($acc = '', $scores, $hits, $tsize, $qsize, $name, $hit_name) {
  487. $tok = strtok($hits, ";");
  488. $b_hits = Array();
  489. while ($tok !== false) {
  490. $b_hits[] = $tok;
  491. $tok = strtok(";");
  492. }
  493. // extract score information from score param
  494. $tokscr = strtok($scores, ";");
  495. $b_scores = Array();
  496. while ($tokscr !== false) {
  497. $b_scores[] = $tokscr;
  498. $tokscr = strtok(";");
  499. }
  500. // image measurements
  501. $height = 200 + (count($b_hits) * 16);
  502. $width = 520;
  503. $img = imagecreatetruecolor($width, $height);
  504. $white = imagecolorallocate($img, 255, 255, 255);
  505. $black = imagecolorallocate($img, 0, 0, 0);
  506. $darkgray = imagecolorallocate($img, 100, 100, 100);
  507. $strong = imagecolorallocatealpha($img, 202, 0, 0, 15);
  508. $moderate = imagecolorallocatealpha($img, 204, 102, 0, 20);
  509. $present = imagecolorallocatealpha($img, 204, 204, 0, 35);
  510. $weak = imagecolorallocatealpha($img, 102, 204, 0, 50);
  511. $gray = imagecolorallocate($img, 190, 190, 190);
  512. $lightgray = $white; //imagecolorallocate($img, 230, 230, 230);
  513. imagefill($img, 0, 0, $lightgray);
  514. // Target coordinates
  515. $maxlength = 300;
  516. $t_length = ($tsize > $qsize)
  517. ? $maxlength : $maxlength - 50;
  518. $q_length = ($qsize > $tsize)
  519. ? $maxlength : $maxlength - 50;
  520. $tnormal = $t_length / $tsize;
  521. $qnormal = $q_length / $qsize;
  522. $t_ystart = 30;
  523. $t_yend = $t_ystart + 20;
  524. $t_xstart = 50;
  525. $t_xend = $t_xstart + $t_length;
  526. $t_center = $t_xstart + ($t_length / 2);
  527. // Target labels
  528. $warn = '"'. $hit_name . '"';
  529. imagestring($img, 5, $t_xstart, $t_ystart-20, $acc.$warn, $black);
  530. imagestring($img, 3, 5, $t_ystart+2, "Target", $black);
  531. // Draw bar representing target
  532. imagefilledrectangle($img, $t_xstart, $t_ystart, $t_xend, $t_yend, $gray);
  533. imagerectangle($img, $t_xstart, $t_ystart, $t_xend, $t_yend, $darkgray);
  534. // query coordinates
  535. $q_maxheight = 250;
  536. $q_ystart = $t_yend + 100;
  537. $q_yend = $q_ystart + 20;
  538. $q_xstart = $t_center - $q_length / 2;
  539. $q_xend = $q_xstart + $q_length;
  540. $q_center = ($q_xend + $q_xstart) / 2;
  541. $q_xwidth = $q_xend - $q_xstart;
  542. // Query labels
  543. imagestring($img, 5, $q_xstart, $q_yend+2, $name, $black);
  544. imagestring($img, 3, $q_xstart, $q_ystart+2, 'Query', $black);
  545. // Draw bar representing query
  546. imagefilledrectangle($img, $q_xstart, $q_ystart, $q_xend, $q_yend, $gray);
  547. imagerectangle($img ,$q_xstart, $q_ystart, $q_xend, $q_yend, $darkgray);
  548. // HSP bars will start here
  549. $hsp_bary = $q_yend + 20;
  550. // Draw solids for HSP alignments
  551. for ($ii=count($b_hits)-1; $ii>=0; $ii--) {
  552. // alignment
  553. $cur_hit = $b_hits[$ii];
  554. $cur_score = intval($b_scores[$ii]);
  555. // set color according to score
  556. $cur_color = $darkgray;
  557. if ($cur_score > 200) {
  558. $cur_color = $strong;
  559. }
  560. else if ($cur_score > 80 && $cur_score <= 200) {
  561. $cur_color = $moderate;
  562. }
  563. else if ($cur_score > 50 && $cur_score <= 80) {
  564. $cur_color = $present;
  565. }
  566. else if ($cur_score > 40 && $cur_score <= 50) {
  567. $cur_color = $weak;
  568. }
  569. $t_start = $tnormal * intval(strtok($cur_hit, "_")) + $t_xstart;
  570. $t_end = $tnormal * intval(strtok("_")) + $t_xstart;
  571. $q_start = $qnormal * intval(strtok("_")) + $q_xstart;
  572. $q_end = $qnormal * intval(strtok("_")) + $q_xstart;
  573. $hit1_array = array($t_start, $t_yend, $t_end, $t_yend, $q_end,
  574. $q_ystart, $q_start, $q_ystart);
  575. // HSP coords
  576. imagefilledpolygon($img, $hit1_array, 4, $cur_color);
  577. }//each hit
  578. // Draw lines over fills for HSP alignments
  579. for ($ii=0; $ii<count($b_hits); $ii++) {
  580. // alignment
  581. $cur_hit = $b_hits[$ii];
  582. $t_start = $tnormal * intval(strtok($cur_hit, "_")) + $t_xstart;
  583. $t_end = $tnormal * intval(strtok("_")) + $t_xstart;
  584. $q_start = $qnormal * intval(strtok("_")) + $q_xstart;
  585. $q_end = $qnormal * intval(strtok("_")) + $q_xstart;
  586. $hit1_array = array($t_start, $t_yend, $t_end, $t_yend, $q_end, $q_ystart,
  587. $q_start, $q_ystart,);
  588. imagerectangle($img, $t_start, $t_ystart, $t_end, $t_yend, $black);
  589. imagerectangle($img, $q_start, $q_ystart, $q_end, $q_yend, $black);
  590. imagepolygon ($img, $hit1_array, 4, $black);
  591. // show HSP
  592. imagestring($img, 3, 2, $hsp_bary, ($acc ."HSP" . ($ii + 1)), $black);
  593. $cur_score = intval($b_scores[$ii]);
  594. // set color according to score
  595. $cur_color = $darkgray;
  596. if ($cur_score > 200) {
  597. $cur_color = $strong;
  598. }
  599. else if ($cur_score > 80 && $cur_score <= 200) {
  600. $cur_color = $moderate;
  601. }
  602. else if ($cur_score > 50 && $cur_score <= 80) {
  603. $cur_color = $present;
  604. }
  605. else if ($cur_score > 40 && $cur_score <= 50) {
  606. $cur_color = $weak;
  607. }
  608. imagefilledrectangle($img, $q_start, $hsp_bary, $q_end, $hsp_bary+10, $cur_color);
  609. $hsp_bary += 15;
  610. }//each hit
  611. // Draw the key
  612. $xchart = 390;
  613. $ychart = 10;
  614. $fontsize = 4;
  615. $yinc = 20;
  616. $ywidth = 7;
  617. $xinc = 10;
  618. imagestring($img, 5, $xchart, $ychart - 5, "Bit Scores", $black);
  619. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 1) + $ywidth, ">= 200" , $black);
  620. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 2) + $ywidth, "80 - 200" , $black);
  621. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 3) + $ywidth, "50 - 80" , $black);
  622. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 4) + $ywidth, "40 - 50" , $black);
  623. imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 5) + $ywidth, "< 40" , $black);
  624. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 1) + $xinc, $xchart + $yinc, $ychart + ($yinc * 2), $strong);
  625. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 2) + $xinc, $xchart + $yinc, $ychart + ($yinc * 3), $moderate);
  626. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 3) + $xinc, $xchart + $yinc, $ychart + ($yinc * 4), $present);
  627. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 4) + $xinc, $xchart + $yinc, $ychart + ($yinc * 5), $weak);
  628. imagefilledRectangle($img, $xchart, $ychart + ($yinc * 5) + $xinc, $xchart + $yinc, $ychart + ($yinc * 6), $darkgray);
  629. // Now, we have a completed image resource and need to change it to an actual image
  630. // that can be displayed. This is done using imagepng() but unfortuatly that function
  631. // either saves the image to a file or outputs it directly to the screen. Thus, we use
  632. // the following code to capture it and base64 encode it.
  633. ob_start(); // Start buffering the output
  634. imagepng($img, null, 0, PNG_NO_FILTER);
  635. $b64_img = base64_encode(ob_get_contents()); // Get what we've just outputted and base64 it
  636. imagedestroy($img);
  637. ob_end_clean();
  638. return $b64_img;
  639. }
  640. /**
  641. * Convert tsv blast output to gff output file.
  642. *
  643. * Created by Sofia Robb
  644. * 09/15/2016
  645. * counter bugfix 10/27/2016
  646. *
  647. * The subject (hit) will be the source feature.
  648. * The query will be the target.
  649. *
  650. * @todo: find a more efficient way since currently the first loop stores all the blast
  651. * results into an array and then the second loop prints them.
  652. *
  653. * @param $blast_tsv
  654. * The name of the blast tsv output file.
  655. * @param $blast_gff
  656. * The name of the blast gff output file.
  657. */
  658. function convert_tsv2gff3($blast_tsv,$blast_gff){
  659. // Open a new file for writting the gff.
  660. $gff = fopen($blast_gff,"w");
  661. fwrite($gff,"##gff-version 3\n");
  662. // Open the TSV file to read from.
  663. $tsv = fopen($blast_tsv, "r") or die("Unable to open tsv file!");
  664. // For each line in the TSV file...
  665. // Need to go thru each line of tsv to find the first and last hsp of a hit.
  666. $last_s = NULL;
  667. $hsp = NULL;
  668. $HitResult=array();
  669. while(!feof($tsv)) {
  670. $line = fgets($tsv);
  671. $line = rtrim($line);
  672. // Skip the line if it's empty.
  673. if (preg_match('/^#/',$line) or preg_match('/^\s*$/',$line)){
  674. continue;
  675. }
  676. ## for keeping track of new queries and hits
  677. // Each line has the following parts:
  678. // 0: query id,
  679. // 1: subject id,
  680. // 2: % identity,
  681. // 3: alignment length,
  682. // 4: mismatches,
  683. // 5: gap opens,
  684. // 6: q. start,
  685. // 7: q. end,
  686. // 8: s. start,
  687. // 9: s. end,
  688. // 10: evalue,
  689. // 11: bit score
  690. $parts = preg_split('/\t/', $line);
  691. // Assign the important parts of the line to readable variables.
  692. $s = $parts[1];
  693. $q = $parts[0];
  694. $ss = $parts[8];
  695. $se = $parts[9];
  696. $qs = $parts[6];
  697. $qe = $parts[7];
  698. $e = $parts[10];
  699. // if this is a new hit print the last and
  700. // empty the $HitResult array and
  701. // reset hsp counter
  702. if ($last_s != NULL and $s != $last_s ) {
  703. printGFF_parent_children($gff,$HitResult);
  704. $HitResult = array();
  705. $hsp=0;
  706. }
  707. // every line is a new hsp
  708. $hsp++;
  709. // determine query strand to use in match_part line, no need to store, just print
  710. $q_strand = '+';
  711. if ($qs > $qe) {
  712. list($qs,$qe) = array($qe,$qs);
  713. $q_strand = '-';
  714. }
  715. // determine subject (hit) strand to use in match line, needs to be stored
  716. $HitResult["$s,$q"]['strand']='+';
  717. list($start,$end) = array($ss,$se);
  718. if($ss > $se) {
  719. list($start,$end) = array($se,$ss);
  720. $HitResult["$s,$q"]['strand']='-';
  721. }
  722. // store smallest start
  723. if (!array_key_exists('SS',$HitResult["$s,$q"]) or $ss < $HitResult["$s,$q"]['SS']) {
  724. $HitResult["$s,$q"]['SS'] = $ss;
  725. }
  726. // store largest end
  727. if (!array_key_exists('SE',$HitResult["$s,$q"]) or $se > $HitResult["$s,$q"]['SE']) {
  728. $HitResult["$s,$q"]['SE'] = $se;
  729. }
  730. // store best evalue
  731. if (!array_key_exists('E',$HitResult["$s,$q"]) or $e < $HitResult["$s,$q"]['E']) {
  732. $HitResult["$s,$q"]['E'] = $e;
  733. }
  734. // generate the match_part line for each hsp
  735. $HitResult["$s,$q"]['HSPs'][] = join("\t", array($s, "BLASTRESULT" , "match_part" , $start , $end , $e , $HitResult["$s,$q"]['strand'] , '.' , "ID=$s.$q.$hsp;Parent=$s.$q;Target=$q $qs $qe $q_strand"));
  736. $last_s = $s;
  737. } // end tsv file while
  738. // print hit and hsp for the last hit
  739. printGFF_parent_children($gff,$HitResult);
  740. // Close the files.
  741. fclose($tsv);
  742. fclose($gff);
  743. }
  744. /**
  745. * printGFF_parent_children
  746. * prints the GFF parent feature and all of its children features
  747. *
  748. *
  749. * @param $blast_feature_array
  750. * an array of the all the child features which is used to generate the smallest and largest coordinates for the parent
  751. *
  752. *
  753. */
  754. function printGFF_parent_children ($gff,$blast_feature_array){
  755. foreach ($blast_feature_array as $sq => $value ) {
  756. list ($s,$q) = preg_split('/,/' , $sq);
  757. $evalue = $blast_feature_array["$s,$q"]['E'];
  758. $parent = join ("\t", array($s, "BLASTRESULT" , "match" , $blast_feature_array["$s,$q"]['SS'] , $blast_feature_array["$s,$q"]['SE'] , $blast_feature_array["$s,$q"]['E'] , $blast_feature_array["$s,$q"]['strand'] , '.' , "ID=$s.$q;Name=$q($evalue)")) . "\n";
  759. $child = join ("\n",$blast_feature_array["$s,$q"]['HSPs']) . "\n";
  760. fwrite($gff,$parent);
  761. fwrite($gff,$child);
  762. }
  763. }
  764. /**
  765. * Get text from cvitjs conf file, if possible.
  766. *
  767. * @param $genome_target
  768. * The section of the config to return. Should consist of "data."+[blastdb name].
  769. *
  770. * @return
  771. * A string containing the entire contents of the cvitjs configuration file. FALSE otherwise.
  772. */
  773. function blast_ui_get_cvit_conf_text($genome_target = FALSE) {
  774. // Retrieve the full path and filename of the conf.
  775. $cvit_conf = blast_ui_get_cvit_conf();
  776. if ($cvit_conf) {
  777. // Retrieve the contents of the file.
  778. $contents = '';
  779. if (file_exists($cvit_conf)) {
  780. $contents = file_get_contents($cvit_conf);
  781. }
  782. // If no genome target was provided then return the full file.
  783. if ($contents && $genome_target == FALSE) {
  784. return $contents;
  785. }
  786. // If a genome target was provided, then only return that section.
  787. if ($genome_target) {
  788. $section = array();
  789. $in_section = FALSE;
  790. // For each line of the configuration file...
  791. $section_header = '['.$genome_target.']';
  792. $lines = preg_split('/\r\n|\n|\r/', trim($contents));
  793. foreach($lines as $l) {
  794. // Are we in the section for this genome target?
  795. if (trim($l) == $section_header) {
  796. $in_section = TRUE; }
  797. // Id so and we haven't fallen out of it through an empty line,
  798. // then add it to saved section for returning.
  799. if ($in_section) {
  800. if (trim($l) == '') { break; }
  801. $section[] = trim($l);
  802. }
  803. }
  804. // If we found the section, then return it ;-).
  805. if (!empty($section)) {
  806. return implode("\n", $section);
  807. }
  808. }
  809. }
  810. return false;
  811. }
  812. /**
  813. * Get path to cvitjs conf file.
  814. *
  815. * @return
  816. * The path to the CViTjs codebase.
  817. */
  818. function blast_ui_get_cvit_conf($cvitjs_location = NULL) {
  819. if (!$cvitjs_location) {
  820. $cvitjs_location = libraries_get_path('cvitjs') . DIRECTORY_SEPARATOR;
  821. }
  822. $cvit_conf_path = $cvitjs_location . 'cvit.conf';
  823. return $cvit_conf_path;
  824. }