123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237 |
- <?php
- /**
- * @file
- * Contains more generally applicable functions as well as some meant to help developers
- * Plug-in to the BLAST UI functionality
- */
- /**
- * Returns a list BLAST DATABASE options
- *
- * @param $type
- * The type of BLAST dabases to restrict the list to (ie: n: nucleotide or p: protein)
- *
- * @return
- * An array where the nid is the key and the value is the human-readable name of the option
- */
- function get_blast_database_options($type) {
- global $user;
- // Use the Entity API to get a list of BLAST Nodes to load
- // We use this function in order respect node access control so that
- // administrators can use this module in combination with a node access module
- // of their choice to limit access to specific BLAST databases.
- $query = new EntityFieldQuery();
- $query->entityCondition('entity_type', 'node')
- // Restrict to BLASTDB nodes.
- ->entityCondition('bundle', 'blastdb')
- // Restrict to Published nodes.
- ->propertyCondition('status', 1)
- // Restrict to nodes the current user has permission to view.
- ->addTag('node_access');
- $entities = $query->execute();
- // Get all BlastDB nodes
- $nodes = node_load_multiple(array_keys($entities['node']));
- // Support obsolete database type n/p
- $obs_type = '';
- if ($type == 'protein') {
- $obs_type = 'p';
- }
- else {
- $obs_type = 'n';
- }
- $options = array();
- foreach ($nodes as $node) {
- if ( isset($node) && isset($node->db_dbtype) ) {
- if ( ($node->db_dbtype == $type) OR ($node->db_dbtype == $obs_type) ) {
- $options[$node->nid] = $node->db_name;
- }
- }
- }
- asort($options);
- $options[0] = 'Select a Dataset';
- return $options;
- }
- /**
- * Run BLAST (should be called from the command-line)
- *
- * @param $program
- * Which BLAST program to run (ie: 'blastn', 'tblastn', tblastx', 'blastp','blastx')
- * @param $query
- * The full path and filename of the query FASTA file
- * @param $database
- * The full path and filename prefix (excluding .nhr, .nin, .nsq, etc.)
- * @param $output_filestub
- * The filename (not including path) to give the results. Should not include file type suffix
- * @param $options
- * An array of additional option where the key is the name of the option used by
- * BLAST (ie: 'num_alignments') and the value is relates to this particular
- * BLAST job (ie: 250)
- */
- function run_BLAST_tripal_job($program, $query, $database, $output_filestub, $options, $job_id = NULL) {
- $output_file = 'sites/default/files/' . $output_filestub . '.blast.asn';
- $output_file_xml = 'sites/default/files/' . $output_filestub . '.blast.xml';
- $output_file_tsv = 'sites/default/files/' . $output_filestub . '.blast.tsv';
- $output_file_html = 'sites/default/files/' . $output_filestub . '.blast.html';
- print "\nExecuting $program\n\n";
- print "Query: $query\n";
- print "Database: $database\n";
- print "Results File: $output_file\n";
- print "Options:\n";
- $blast_cmd = "$program -query $query -db $database -out $output_file -outfmt=11";
- if (!empty($options)) {
- foreach ($options as $opt => $val) {
- print "\t$opt: $val\n";
- $blast_cmd .= " -$opt $val";
- }
- }
- print "\nExecuting the following BLAST command:\n" . $blast_cmd . "\n";
- system($blast_cmd);
- if(!file_exists($output_file)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_ERROR,
- "BLAST did not complete successfully as is implied by the lack of output file (%file). The command run was @command",
- array('%file' => $output_file, '@command' => $blast_cmd),
- array('print' => TRUE)
- );
- return FALSE;
- }
- print "\nGenerating additional download formats...\n";
- print "\tXML\n";
- system("blast_formatter -archive $output_file -outfmt 5 -out $output_file_xml");
- if(!file_exists($output_file_xml)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_ERROR,
- "Unable to convert BLAST ASN.1 archive (%archive) to XML (%file).",
- array('%archive' => $output_file, '%file' => $output_file_xml),
- array('print' => TRUE)
- );
- }
- print "\tTab-delimited\n";
- system("blast_formatter -archive $output_file -outfmt 7 -out $output_file_tsv");
- if(!file_exists($output_file_tsv)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_WARNING,
- "Unable to convert BLAST ASN.1 archive (%archive) to Tabular Output (%file).",
- array('%archive' => $output_file, '%file' => $output_file_tsv),
- array('print' => TRUE)
- );
- }
- print "\tHTML (includes alignments)\n";
- system("blast_formatter -archive $output_file -outfmt 0 -out $output_file_html -html");
- if(!file_exists($output_file_tsv)) {
- tripal_report_error(
- 'blast_ui',
- TRIPAL_WARNING,
- "Unable to convert BLAST ASN.1 archive (%archive) to HTML Output (%file).",
- array('%archive' => $output_file, '%file' => $output_file_html),
- array('print' => TRUE)
- );
- }
- print "\nDone!\n";
- }
- /**
- * FASTA validating parser
- *
- * A sequence in FASTA format begins with a single-line description, followed
- * by lines of sequence data.The description line is distinguished from the
- * sequence data by a greater-than (">") symbol in the first column. The word
- * following the ">" symbol is the identifier of the sequence, and the rest of
- * the line is the description (both are optional). There should be no space
- * between the ">" and the first letter of the identifier. The sequence ends
- * if another line starting with a ">" appears which indicates the start of
- * another sequence.
- *
- * @param $type
- * The type of sequence to be validated (ie: either nucleotide or protein).
- * @param $sequence
- * A string of characters to be validated.
- *
- * @return
- * Return a boolean. 1 if the sequence does not pass the format valifation stage and 0 otherwise.
- *
- */
- function validate_fasta_sequence($type, $sequence) {
- if ($type == 'nucleotide') {
- $fastaIdRegEx = '/^>.*(\\n|\\r)/';
- $fastaSeqRegEx = '/[^acgntuACGNTU\n\r]/';
- if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
- return TRUE;
- } else {
- return FALSE;
- }
- } elseif ($type == 'protein') {
- $fastaIdRegEx = '/^>.*(\\n|\\r)/';
- $fastaSeqRegEx = '/[^acgturykmswbdhvnxACGTURYKMSWBDHVNX\*\-\n\r]/';
- if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
- return TRUE;
- } else {
- return FALSE;
- }
- }
- return FALSE;
- }
- /**
- * Retrieve the regex to capture the Link-out Accession from the Hit Def.
- *
- * @param $nid
- * The node ID of the BLAST database the hit is from.
- * @param $options
- * An array of options that can be passed to this function. Supported
- * options include:
- * -
- *
- * @return
- * A PHP regex for use with preg_match to cature the Link-out Accession.
- */
- function get_blastdb_linkout_regex($node, $options = array()) {
- if (empty($node->linkout->regex)) {
- switch ($node->linkout->regex_type) {
- case 'default':
- $regex = '/^(\s+) .*/';
- break;
- case 'genbank':
- $regex = '/^gb\|([^\|])*\|.*/';
- break;
- case 'embl':
- $regex = '/^embl\|([^\|])*\|.*/';
- break;
- case 'swissprot':
- $regex = '/^sp\|([^\|])*\|.*/';
- break;
- }
- }
- else {
- $regex = $node->linkout->regex;
- }
- return $regex;
- }
|