123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860 |
- <?php
- function get_blast_database($identifiers) {
- $node = FALSE;
- if (isset($identifiers['nid'])) {
- $node = node_load($identifiers['nid']);
- }
- elseif (isset($identifiers['name'])) {
- $nid = db_query('SELECT nid FROM {blastdb} WHERE name=:name', array(':name' => $identifiers['name']))->fetchField();
- $node = node_load($nid);
- } elseif (isset($identifiers['path'])) {
- $nid = db_query('SELECT nid FROM {blastdb} WHERE path LIKE :path', array(':path' => db_like($identifiers['path']) . '%'))->fetchField();
- $node = node_load($nid);
- }
- return $node;
- }
- function get_blast_database_options($type) {
- global $user;
-
-
-
-
- $query = new EntityFieldQuery();
- $query->entityCondition('entity_type', 'node')
-
- ->entityCondition('bundle', 'blastdb')
-
- ->propertyCondition('status', 1)
-
- ->addTag('node_access');
- $entities = $query->execute();
-
- $nodes = node_load_multiple(array_keys($entities['node']));
-
- $obs_type = '';
- if ($type == 'protein') {
- $obs_type = 'p';
- }
- else {
- $obs_type = 'n';
- }
- $options = array();
- foreach ($nodes as $node) {
- if ( isset($node) && isset($node->db_dbtype) ) {
- if ( ($node->db_dbtype == $type) OR ($node->db_dbtype == $obs_type) ) {
- $options[$node->nid] = $node->db_name;
- }
- }
- }
-
- asort($options);
- return $options;
- }
- function get_BLAST_job($job_id) {
- $blastjob = db_query('SELECT * FROM blastjob WHERE job_id=:id', array(':id' => $job_id))->fetchObject();
- $tripal_job = tripal_get_job($job_id);
- $job = new stdClass();
- $job->job_id = $job_id;
- $job->program = $blastjob->blast_program;
- $job->options = unserialize($blastjob->options);
- $job->date_submitted = $tripal_job->submit_date;
- $job->date_started = $tripal_job->start_time;
- $job->date_completed = $tripal_job->end_time;
-
-
- if ($blastjob->target_blastdb ) {
- $job->blastdb = get_blast_database(array('nid' => $blastjob->target_blastdb));
- }
-
- else {
- $job->blastdb = new stdClass();
- $job->blastdb->db_name = 'User Uploaded';
- $job->blastdb->db_path = $blastjob->target_file;
- $job->blastdb->linkout = new stdClass();
- $job->blastdb->linkout->none = TRUE;
- if ($job->program == 'blastp' OR $job->program == 'tblastn') {
- $job->blastdb->db_dbtype = 'protein';
- }
- else {
- $job->blastdb->db_dbtype = 'nucleotide';
- }
- }
-
- $job->files = new stdClass();
- $job->files->query = $blastjob->query_file;
- $job->files->target = $blastjob->target_file;
- $job->files->result = new stdClass();
- $job->files->result->archive = $blastjob->result_filestub . '.asn';
- $job->files->result->xml = $blastjob->result_filestub . '.xml';
- $job->files->result->tsv = $blastjob->result_filestub . '.tsv';
- $job->files->result->html = $blastjob->result_filestub . '.html';
- $job->files->result->gff = $blastjob->result_filestub . '.gff';
- return $job;
- }
- function run_BLAST_tripal_job($program, $query, $database, $output_filestub, $options, $job_id = NULL) {
- $output_file = $output_filestub . '.asn';
- $output_file_xml = $output_filestub . '.xml';
- $output_file_tsv = $output_filestub . '.tsv';
- $output_file_html = $output_filestub . '.html';
- $output_file_gff = $output_filestub . '.gff';
- print "\nExecuting $program\n\n";
- print "Query: $query\n";
- print "Database: $database\n";
- print "Results File: $output_file\n";
- print "Options:\n";
-
-
- $blast_path = variable_get('blast_path', '');
- $blast_threads = variable_get('blast_threads', 1);
-
- $database = preg_replace("/(.*)\.[pn]\w\w$/", '$1', $database);
-
- $program = $blast_path . $program;
- if (!file_exists($program)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_ERROR,
- "Unable to find the BLAST executable (ie: /urs/bin/blastn). This can be changed in the admin settings; you supplied: @command",
- array('@command' => $program),
- array('print' => TRUE)
- );
- return FALSE;
- }
-
- $blast_formatter_command = $blast_path . 'blast_formatter';
- if (!file_exists($blast_formatter_command)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_ERROR,
- "Unable to find the BLAST Formatter executable (ie: /urs/bin/blast_formatter). This can be changed in the admin settings; you supplied: @command",
- array('@command' => $blast_formatter_command),
- array('print' => TRUE)
- );
- return FALSE;
- }
-
- $blast_cmd = escapeshellarg($program) . ' -query ' . escapeshellarg($query) . ' -db ' . escapeshellarg($database) . ' -out ' . escapeshellarg($output_file) . ' -outfmt=11';
- if (!empty($options)) {
- foreach ($options as $opt => $val) {
- $val = trim($val);
- if (!empty($val)) {
- print "\t$opt: $val\n";
-
-
-
-
- $blast_cmd .= ' -' . escapeshellarg($opt) . ' ' . escapeshellarg($val);
- }
- }
- }
-
- $blast_cmd .= ' -num_threads ' . escapeshellarg($blast_threads);
- print "\nExecuting the following BLAST command:\n" . $blast_cmd . "\n";
- system($blast_cmd);
- if (!file_exists($output_file)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_ERROR,
- "BLAST did not complete successfully as is implied by the lack of output file (%file). The command run was @command",
- array('%file' => $output_file, '@command' => $blast_cmd),
- array('print' => TRUE)
- );
- return FALSE;
- }
- print "\nGenerating additional download formats...\n";
- print "\tXML\n";
- $format_cmd = escapeshellarg($blast_formatter_command) . ' -archive ' . escapeshellarg($output_file) . ' -outfmt 5 -out ' . escapeshellarg($output_file_xml);
- print "\t\tExecuting $format_cmd\n\n";
- system($format_cmd);
- if (!file_exists($output_file_xml)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_ERROR,
- "Unable to convert BLAST ASN.1 archive to XML (%archive => %file).",
- array('%archive' => $output_file, '%file' => $output_file_xml),
- array('print' => TRUE)
- );
- }
- print "\tTab-delimited\n";
- $format_cmd = escapeshellarg($blast_formatter_command) . ' -archive ' . escapeshellarg($output_file) . ' -outfmt 7 -out ' . escapeshellarg($output_file_tsv);
- print "\t\tExecuting $format_cmd\n\n";
- system($format_cmd);
- if (!file_exists($output_file_tsv)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_WARNING,
- "Unable to convert BLAST ASN.1 archive to Tabular Output (%archive => %file).",
- array('%archive' => $output_file, '%file' => $output_file_tsv),
- array('print' => TRUE)
- );
- }
- print "\tGFF\n";
- convert_tsv2gff3($output_file_tsv,$output_file_gff);
- if (!file_exists($output_file_gff)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_WARNING,
- "Unable to convert BLAST Tabular Output to GFF Output (%archive => %file).",
- array('%archive' => $output_file, '%file' => $output_file_gff),
- array('print' => TRUE)
- );
- }
- print "\tHTML (includes alignments)\n";
- $format_cmd = escapeshellarg($blast_formatter_command) . ' -archive ' . escapeshellarg($output_file) . ' -outfmt 0 -out ' . escapeshellarg($output_file_html) . ' -html';
- print "\t\tExecuting $format_cmd\n\n";
- system($format_cmd);
- if (!file_exists($output_file_tsv)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_WARNING,
- "Unable to convert BLAST ASN.1 archive to HTML Output (%archive => %file).",
- array('%archive' => $output_file, '%file' => $output_file_html),
- array('print' => TRUE)
- );
- }
- print "\nDone!\n";
- }
- function validate_fasta_sequence($type, $sequence) {
-
- $fastaSeqRegEx = ($type == 'nucleotide')
- ? '/^[ATCGNUKMBVSWDYRHatcgnukmbvswdyrh\s\n\r]*$/'
- : '/^[ABCDEFGHIKLMNPQRSTUVWYZXabcdefghiklmnpqrstuvwyzx\*\-\s\n\r]*$/';
- $defRegEx = '/^>.*(\\n|\\r)(.*)$/sm';
- if (preg_match($defRegEx, $sequence, $matches)) {
- if (isset($matches[2]) && $matches[2] != '' && preg_match($fastaSeqRegEx, $matches[2])) {
- return true;
- }
- }
- else if ($sequence != '' && preg_match($defRegEx, $sequence)) {
- return true;
- }
-
- return false;
- }
- function get_blastdb_linkout_regex($node, $options = array()) {
- if (empty($node->linkout->regex)) {
- switch ($node->linkout->regex_type) {
- case 'default':
- $regex = '/^(\S+).*/';
- break;
- case 'genbank':
- $regex = '/^gb\|([^\|])*\|.*/';
- break;
- case 'embl':
- $regex = '/^embl\|([^\|])*\|.*/';
- break;
- case 'swissprot':
- $regex = '/^sp\|([^\|])*\|.*/';
- break;
- }
- }
- else {
- $regex = $node->linkout->regex;
- }
- return $regex;
- }
- function get_recent_blast_jobs($programs = array()) {
- $filter_jobs = !empty($programs);
-
- if (isset($_SESSION['blast_jobs'])) {
- $jobs = array();
- foreach ($_SESSION['blast_jobs'] as $job_secret) {
- $add = TRUE;
- $job_id = blast_ui_reveal_secret($job_secret);
- $job = get_BLAST_job($job_id);
-
-
-
- if ($filter_jobs AND !in_array($job->program, $programs)) {
- $add = FALSE;
- }
- if ($add) {
- $job->query_summary = format_query_headers($job->files->query);
- $jobs[] = $job;
- }
- }
- return $jobs;
- }
- else {
- return array();
- }
- }
- function get_number_of_recent_jobs() {
- if (isset($_SESSION['blast_jobs'])) {
- return sizeof($_SESSION['blast_jobs']);
- }
- return 0;
- }
- function format_query_headers($file) {
- $headers = array();
- exec('grep ">" ' . escapeshellarg($file), $headers);
-
- if (sizeof($headers) == 1 AND isset($headers[0])) {
- return ltrim($headers[0], '>');
- }
-
- elseif (isset($headers[0])) {
- return sizeof($headers) . ' queries including "' . ltrim($headers[0], '>') . '"';
- }
-
- elseif (empty($headers)) {
- return 'Unnamed Query';
- }
-
- else {
- return sizeof($headers) . ' queries';
- }
- }
- function sort_blast_jobs_by_date_submitted_asc($a, $b) {
- return ($a->date_submitted - $b->date_submitted);
- }
- function sort_blast_jobs_by_date_submitted_desc($a, $b) {
- return ($b->date_submitted - $a->date_submitted);
- }
- function generate_blast_hit_image($acc = '', $scores, $hits, $tsize, $qsize, $name, $hit_name) {
- $tok = strtok($hits, ";");
- $b_hits = Array();
- while ($tok !== false) {
- $b_hits[] = $tok;
- $tok = strtok(";");
- }
-
- $tokscr = strtok($scores, ";");
- $b_scores = Array();
- while ($tokscr !== false) {
- $b_scores[] = $tokscr;
- $tokscr = strtok(";");
- }
-
- $height = 200 + (count($b_hits) * 16);
- $width = 520;
- $img = imagecreatetruecolor($width, $height);
- $white = imagecolorallocate($img, 255, 255, 255);
- $black = imagecolorallocate($img, 0, 0, 0);
- $darkgray = imagecolorallocate($img, 100, 100, 100);
- $strong = imagecolorallocatealpha($img, 202, 0, 0, 15);
- $moderate = imagecolorallocatealpha($img, 204, 102, 0, 20);
- $present = imagecolorallocatealpha($img, 204, 204, 0, 35);
- $weak = imagecolorallocatealpha($img, 102, 204, 0, 50);
- $gray = imagecolorallocate($img, 190, 190, 190);
- $lightgray = $white;
- imagefill($img, 0, 0, $lightgray);
-
- $maxlength = 300;
- $t_length = ($tsize > $qsize)
- ? $maxlength : $maxlength - 50;
- $q_length = ($qsize > $tsize)
- ? $maxlength : $maxlength - 50;
- $tnormal = $t_length / $tsize;
- $qnormal = $q_length / $qsize;
- $t_ystart = 30;
- $t_yend = $t_ystart + 20;
- $t_xstart = 50;
- $t_xend = $t_xstart + $t_length;
- $t_center = $t_xstart + ($t_length / 2);
-
- $warn = '"'. $hit_name . '"';
- imagestring($img, 5, $t_xstart, $t_ystart-20, $acc.$warn, $black);
- imagestring($img, 3, 5, $t_ystart+2, "Target", $black);
-
- imagefilledrectangle($img, $t_xstart, $t_ystart, $t_xend, $t_yend, $gray);
- imagerectangle($img, $t_xstart, $t_ystart, $t_xend, $t_yend, $darkgray);
-
- $q_maxheight = 250;
- $q_ystart = $t_yend + 100;
- $q_yend = $q_ystart + 20;
- $q_xstart = $t_center - $q_length / 2;
- $q_xend = $q_xstart + $q_length;
- $q_center = ($q_xend + $q_xstart) / 2;
- $q_xwidth = $q_xend - $q_xstart;
-
- imagestring($img, 5, $q_xstart, $q_yend+2, $name, $black);
- imagestring($img, 3, $q_xstart, $q_ystart+2, 'Query', $black);
-
- imagefilledrectangle($img, $q_xstart, $q_ystart, $q_xend, $q_yend, $gray);
- imagerectangle($img ,$q_xstart, $q_ystart, $q_xend, $q_yend, $darkgray);
-
- $hsp_bary = $q_yend + 20;
-
- for ($ii=count($b_hits)-1; $ii>=0; $ii--) {
-
- $cur_hit = $b_hits[$ii];
- $cur_score = intval($b_scores[$ii]);
-
- $cur_color = $darkgray;
- if ($cur_score > 200) {
- $cur_color = $strong;
- }
- else if ($cur_score > 80 && $cur_score <= 200) {
- $cur_color = $moderate;
- }
- else if ($cur_score > 50 && $cur_score <= 80) {
- $cur_color = $present;
- }
- else if ($cur_score > 40 && $cur_score <= 50) {
- $cur_color = $weak;
- }
- $t_start = $tnormal * intval(strtok($cur_hit, "_")) + $t_xstart;
- $t_end = $tnormal * intval(strtok("_")) + $t_xstart;
- $q_start = $qnormal * intval(strtok("_")) + $q_xstart;
- $q_end = $qnormal * intval(strtok("_")) + $q_xstart;
- $hit1_array = array($t_start, $t_yend, $t_end, $t_yend, $q_end,
- $q_ystart, $q_start, $q_ystart);
-
- imagefilledpolygon($img, $hit1_array, 4, $cur_color);
- }
-
- for ($ii=0; $ii<count($b_hits); $ii++) {
-
- $cur_hit = $b_hits[$ii];
- $t_start = $tnormal * intval(strtok($cur_hit, "_")) + $t_xstart;
- $t_end = $tnormal * intval(strtok("_")) + $t_xstart;
- $q_start = $qnormal * intval(strtok("_")) + $q_xstart;
- $q_end = $qnormal * intval(strtok("_")) + $q_xstart;
- $hit1_array = array($t_start, $t_yend, $t_end, $t_yend, $q_end, $q_ystart,
- $q_start, $q_ystart,);
- imagerectangle($img, $t_start, $t_ystart, $t_end, $t_yend, $black);
- imagerectangle($img, $q_start, $q_ystart, $q_end, $q_yend, $black);
- imagepolygon ($img, $hit1_array, 4, $black);
-
- imagestring($img, 3, 2, $hsp_bary, ($acc ."HSP" . ($ii + 1)), $black);
- $cur_score = intval($b_scores[$ii]);
-
- $cur_color = $darkgray;
- if ($cur_score > 200) {
- $cur_color = $strong;
- }
- else if ($cur_score > 80 && $cur_score <= 200) {
- $cur_color = $moderate;
- }
- else if ($cur_score > 50 && $cur_score <= 80) {
- $cur_color = $present;
- }
- else if ($cur_score > 40 && $cur_score <= 50) {
- $cur_color = $weak;
- }
- imagefilledrectangle($img, $q_start, $hsp_bary, $q_end, $hsp_bary+10, $cur_color);
- $hsp_bary += 15;
- }
-
- $xchart = 390;
- $ychart = 10;
- $fontsize = 4;
- $yinc = 20;
- $ywidth = 7;
- $xinc = 10;
- imagestring($img, 5, $xchart, $ychart - 5, "Bit Scores", $black);
- imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 1) + $ywidth, ">= 200" , $black);
- imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 2) + $ywidth, "80 - 200" , $black);
- imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 3) + $ywidth, "50 - 80" , $black);
- imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 4) + $ywidth, "40 - 50" , $black);
- imagestring($img, $fontsize, $xchart + $yinc + $xinc,$ychart + ($yinc * 5) + $ywidth, "< 40" , $black);
- imagefilledRectangle($img, $xchart, $ychart + ($yinc * 1) + $xinc, $xchart + $yinc, $ychart + ($yinc * 2), $strong);
- imagefilledRectangle($img, $xchart, $ychart + ($yinc * 2) + $xinc, $xchart + $yinc, $ychart + ($yinc * 3), $moderate);
- imagefilledRectangle($img, $xchart, $ychart + ($yinc * 3) + $xinc, $xchart + $yinc, $ychart + ($yinc * 4), $present);
- imagefilledRectangle($img, $xchart, $ychart + ($yinc * 4) + $xinc, $xchart + $yinc, $ychart + ($yinc * 5), $weak);
- imagefilledRectangle($img, $xchart, $ychart + ($yinc * 5) + $xinc, $xchart + $yinc, $ychart + ($yinc * 6), $darkgray);
-
-
-
-
- ob_start();
- imagepng($img, null, 0, PNG_NO_FILTER);
- $b64_img = base64_encode(ob_get_contents());
- imagedestroy($img);
- ob_end_clean();
- return $b64_img;
- }
- function convert_tsv2gff3($blast_tsv,$blast_gff){
-
- $gff = fopen($blast_gff,"w");
- fwrite($gff,"##gff-version 3\n");
-
- $tsv = fopen($blast_tsv, "r") or die("Unable to open tsv file!");
-
-
- $last_s = NULL;
- $hsp = NULL;
- $HitResult=array();
-
- while(!feof($tsv)) {
- $line = fgets($tsv);
- $line = rtrim($line);
-
- if (preg_match('/^#/',$line) or preg_match('/^\s*$/',$line)){
- continue;
- }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- $parts = preg_split('/\t/', $line);
-
- $s = $parts[1];
- $q = $parts[0];
- $ss = $parts[8];
- $se = $parts[9];
- $qs = $parts[6];
- $qe = $parts[7];
- $e = $parts[10];
-
-
-
-
- if ($last_s != NULL and $s != $last_s ) {
- printGFF_parent_children($gff,$HitResult);
- $HitResult = array();
- $hsp=0;
- }
-
-
- $hsp++;
-
-
- $q_strand = '+';
- if ($qs > $qe) {
- list($qs,$qe) = array($qe,$qs);
- $q_strand = '-';
- }
-
- $HitResult["$s,$q"]['strand']='+';
- list($start,$end) = array($ss,$se);
- if($ss > $se) {
- list($start,$end) = array($se,$ss);
- $HitResult["$s,$q"]['strand']='-';
- }
-
-
- if (!array_key_exists('SS',$HitResult["$s,$q"]) or $ss < $HitResult["$s,$q"]['SS']) {
- $HitResult["$s,$q"]['SS'] = $ss;
- }
-
-
- if (!array_key_exists('SE',$HitResult["$s,$q"]) or $se > $HitResult["$s,$q"]['SE']) {
- $HitResult["$s,$q"]['SE'] = $se;
- }
-
- if (!array_key_exists('E',$HitResult["$s,$q"]) or $e < $HitResult["$s,$q"]['E']) {
- $HitResult["$s,$q"]['E'] = $e;
- }
-
-
- $HitResult["$s,$q"]['HSPs'][] = join("\t", array($s, "BLASTRESULT" , "match_part" , $start , $end , $e , $HitResult["$s,$q"]['strand'] , '.' , "ID=$s.$q.$hsp;Parent=$s.$q;Target=$q $qs $qe $q_strand"));
- $last_s = $s;
- }
-
- printGFF_parent_children($gff,$HitResult);
-
- fclose($tsv);
- fclose($gff);
- }
- function printGFF_parent_children ($gff,$blast_feature_array){
- foreach ($blast_feature_array as $sq => $value ) {
- list ($s,$q) = preg_split('/,/' , $sq);
- $evalue = $blast_feature_array["$s,$q"]['E'];
- $parent = join ("\t", array($s, "BLASTRESULT" , "match" , $blast_feature_array["$s,$q"]['SS'] , $blast_feature_array["$s,$q"]['SE'] , $blast_feature_array["$s,$q"]['E'] , $blast_feature_array["$s,$q"]['strand'] , '.' , "ID=$s.$q;Name=$q($evalue)")) . "\n";
- $child = join ("\n",$blast_feature_array["$s,$q"]['HSPs']) . "\n";
- fwrite($gff,$parent);
- fwrite($gff,$child);
- }
- }
|