blast_ui.api.inc 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. <?php
  2. /**
  3. * @file
  4. * Contains more generally applicable functions as well as some meant to help developers
  5. * Plug-in to the BLAST UI functionality
  6. */
  7. /**
  8. * Returns a list BLAST DATABASE options
  9. *
  10. * @param $type
  11. * The type of BLAST dabases to restrict the list to (ie: n: nucleotide or p: protein)
  12. *
  13. * @return
  14. * An array where the nid is the key and the value is the human-readable name of the option
  15. */
  16. function get_blast_database_options($type) {
  17. global $user;
  18. // Use the Entity API to get a list of BLAST Nodes to load
  19. // We use this function in order respect node access control so that
  20. // administrators can use this module in combination with a node access module
  21. // of their choice to limit access to specific BLAST databases.
  22. $query = new EntityFieldQuery();
  23. $query->entityCondition('entity_type', 'node')
  24. // Restrict to BLASTDB nodes.
  25. ->entityCondition('bundle', 'blastdb')
  26. // Restrict to Published nodes.
  27. ->propertyCondition('status', 1)
  28. // Restrict to nodes the current user has permission to view.
  29. ->addTag('node_access');
  30. $entities = $query->execute();
  31. // Get all BlastDB nodes
  32. $nodes = node_load_multiple(array_keys($entities['node']));
  33. // Support obsolete database type n/p
  34. $obs_type = '';
  35. if ($type == 'protein') {
  36. $obs_type = 'p';
  37. }
  38. else {
  39. $obs_type = 'n';
  40. }
  41. $options = array();
  42. foreach ($nodes as $node) {
  43. if ( isset($node) && isset($node->db_dbtype) ) {
  44. if ( ($node->db_dbtype == $type) OR ($node->db_dbtype == $obs_type) ) {
  45. $options[$node->nid] = $node->db_name;
  46. }
  47. }
  48. }
  49. asort($options);
  50. $options[0] = 'Select a Dataset';
  51. return $options;
  52. }
  53. /**
  54. * Run BLAST (should be called from the command-line)
  55. *
  56. * @param $program
  57. * Which BLAST program to run (ie: 'blastn', 'tblastn', tblastx', 'blastp','blastx')
  58. * @param $query
  59. * The full path and filename of the query FASTA file
  60. * @param $database
  61. * The full path and filename prefix (excluding .nhr, .nin, .nsq, etc.)
  62. * @param $output_filestub
  63. * The filename (not including path) to give the results. Should not include file type suffix
  64. * @param $options
  65. * An array of additional option where the key is the name of the option used by
  66. * BLAST (ie: 'num_alignments') and the value is relates to this particular
  67. * BLAST job (ie: 250)
  68. */
  69. function run_BLAST_tripal_job($program, $query, $database, $output_filestub, $options, $job_id = NULL) {
  70. $output_file = 'sites/default/files/' . $output_filestub . '.blast.asn';
  71. $output_file_xml = 'sites/default/files/' . $output_filestub . '.blast.xml';
  72. $output_file_tsv = 'sites/default/files/' . $output_filestub . '.blast.tsv';
  73. $output_file_html = 'sites/default/files/' . $output_filestub . '.blast.html';
  74. print "\nExecuting $program\n\n";
  75. print "Query: $query\n";
  76. print "Database: $database\n";
  77. print "Results File: $output_file\n";
  78. print "Options:\n";
  79. $blast_cmd = "$program -query $query -db $database -out $output_file -outfmt=11";
  80. if (!empty($options)) {
  81. foreach ($options as $opt => $val) {
  82. print "\t$opt: $val\n";
  83. $blast_cmd .= " -$opt $val";
  84. }
  85. }
  86. print "\nExecuting the following BLAST command:\n" . $blast_cmd . "\n";
  87. system($blast_cmd);
  88. if(!file_exists($output_file)) {
  89. tripal_report_error(
  90. 'blast_ui',
  91. TRIPAL_ERROR,
  92. "BLAST did not complete successfully as is implied by the lack of output file (%file). The command run was @command",
  93. array('%file' => $output_file, '@command' => $blast_cmd),
  94. array('print' => TRUE)
  95. );
  96. return FALSE;
  97. }
  98. print "\nGenerating additional download formats...\n";
  99. print "\tXML\n";
  100. system("blast_formatter -archive $output_file -outfmt 5 -out $output_file_xml");
  101. if(!file_exists($output_file_xml)) {
  102. tripal_report_error(
  103. 'blast_ui',
  104. TRIPAL_ERROR,
  105. "Unable to convert BLAST ASN.1 archive (%archive) to XML (%file).",
  106. array('%archive' => $output_file, '%file' => $output_file_xml),
  107. array('print' => TRUE)
  108. );
  109. }
  110. print "\tTab-delimited\n";
  111. system("blast_formatter -archive $output_file -outfmt 7 -out $output_file_tsv");
  112. if(!file_exists($output_file_tsv)) {
  113. tripal_report_error(
  114. 'blast_ui',
  115. TRIPAL_WARNING,
  116. "Unable to convert BLAST ASN.1 archive (%archive) to Tabular Output (%file).",
  117. array('%archive' => $output_file, '%file' => $output_file_tsv),
  118. array('print' => TRUE)
  119. );
  120. }
  121. print "\tHTML (includes alignments)\n";
  122. system("blast_formatter -archive $output_file -outfmt 0 -out $output_file_html -html");
  123. if(!file_exists($output_file_tsv)) {
  124. tripal_report_error(
  125. 'blast_ui',
  126. TRIPAL_WARNING,
  127. "Unable to convert BLAST ASN.1 archive (%archive) to HTML Output (%file).",
  128. array('%archive' => $output_file, '%file' => $output_file_html),
  129. array('print' => TRUE)
  130. );
  131. }
  132. print "\nDone!\n";
  133. }
  134. /**
  135. * FASTA validating parser
  136. *
  137. * A sequence in FASTA format begins with a single-line description, followed
  138. * by lines of sequence data.The description line is distinguished from the
  139. * sequence data by a greater-than (">") symbol in the first column. The word
  140. * following the ">" symbol is the identifier of the sequence, and the rest of
  141. * the line is the description (both are optional). There should be no space
  142. * between the ">" and the first letter of the identifier. The sequence ends
  143. * if another line starting with a ">" appears which indicates the start of
  144. * another sequence.
  145. *
  146. * @param $type
  147. * The type of sequence to be validated (ie: either nucleotide or protein).
  148. * @param $sequence
  149. * A string of characters to be validated.
  150. *
  151. * @return
  152. * Return a boolean. 1 if the sequence does not pass the format valifation stage and 0 otherwise.
  153. *
  154. */
  155. function validate_fasta_sequence($type, $sequence) {
  156. if ($type == 'nucleotide') {
  157. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  158. $fastaSeqRegEx = '/[^acgntuACGNTU\n\r]/';
  159. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  160. return TRUE;
  161. } else {
  162. return FALSE;
  163. }
  164. } elseif ($type == 'protein') {
  165. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  166. $fastaSeqRegEx = '/[^acgturykmswbdhvnxACGTURYKMSWBDHVNX\*\-\n\r]/';
  167. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  168. return TRUE;
  169. } else {
  170. return FALSE;
  171. }
  172. }
  173. return FALSE;
  174. }
  175. /**
  176. * Retrieve the regex to capture the Link-out Accession from the Hit Def.
  177. *
  178. * @param $nid
  179. * The node ID of the BLAST database the hit is from.
  180. * @param $options
  181. * An array of options that can be passed to this function. Supported
  182. * options include:
  183. * -
  184. *
  185. * @return
  186. * A PHP regex for use with preg_match to cature the Link-out Accession.
  187. */
  188. function get_blastdb_linkout_regex($node, $options = array()) {
  189. if (empty($node->linkout->regex)) {
  190. switch ($node->linkout->regex_type) {
  191. case 'default':
  192. $regex = '/^(\s+) .*/';
  193. break;
  194. case 'genbank':
  195. $regex = '/^gb\|([^\|])*\|.*/';
  196. break;
  197. case 'embl':
  198. $regex = '/^embl\|([^\|])*\|.*/';
  199. break;
  200. case 'swissprot':
  201. $regex = '/^sp\|([^\|])*\|.*/';
  202. break;
  203. }
  204. }
  205. else {
  206. $regex = $node->linkout->regex;
  207. }
  208. return $regex;
  209. }