blast_ui.api.inc 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. <?php
  2. /**
  3. * @file
  4. * Contains more generally applicable functions as well as some meant to help developers
  5. * Plug-in to the BLAST UI functionality
  6. */
  7. /**
  8. * Returns a list BLAST DATABASE options
  9. *
  10. * @param $type
  11. * The type of BLAST dabases to restrict the list to (ie: n: nucleotide or p: protein)
  12. *
  13. * @return
  14. * An array where the nid is the key and the value is the human-readable name of the option
  15. */
  16. function get_blast_database_options($type) {
  17. // Get all BlastDB nodes
  18. $nodes = node_load_multiple(array(), array('type'=> 'blastdb'));
  19. // Support obsolete database type n/p
  20. $obs_type = '';
  21. if ($type == 'protein') {
  22. $obs_type = 'p';
  23. }
  24. else {
  25. $obs_type = 'n';
  26. }
  27. $options = array();
  28. foreach ($nodes as $node) {
  29. if ( isset($node) && isset($node->db_dbtype) ) {
  30. if ( ($node->db_dbtype == $type) OR ($node->db_dbtype == $obs_type) ) {
  31. $options[$node->nid] = $node->db_name;
  32. }
  33. }
  34. }
  35. asort($options);
  36. $options[0] = 'Select a Dataset';
  37. return $options;
  38. }
  39. /**
  40. * Run BLAST (should be called from the command-line)
  41. *
  42. * @param $program
  43. * Which BLAST program to run (ie: 'blastn', 'tblastn', tblastx', 'blastp','blastx')
  44. * @param $query
  45. * The full path and filename of the query FASTA file
  46. * @param $database
  47. * The full path and filename prefix (excluding .nhr, .nin, .nsq, etc.)
  48. * @param $output_filestub
  49. * The filename (not including path) to give the results. Should not include file type suffix
  50. * @param $options
  51. * An array of additional option where the key is the name of the option used by
  52. * BLAST (ie: 'num_alignments') and the value is relates to this particular
  53. * BLAST job (ie: 250)
  54. */
  55. function run_BLAST_tripal_job($program, $query, $database, $output_filestub, $options, $job_id = NULL) {
  56. $output_file = 'sites/default/files/' . $output_filestub . '.blast.asn';
  57. $output_file_xml = 'sites/default/files/' . $output_filestub . '.blast.xml';
  58. $output_file_tsv = 'sites/default/files/' . $output_filestub . '.blast.tsv';
  59. $output_file_html = 'sites/default/files/' . $output_filestub . '.blast.html';
  60. print "\nExecuting $program\n\n";
  61. print "Query: $query\n";
  62. print "Database: $database\n";
  63. print "Results File: $output_file\n";
  64. print "Options:\n";
  65. $blast_cmd = "$program -query $query -db $database -out $output_file -outfmt=11";
  66. if (!empty($options)) {
  67. foreach ($options as $opt => $val) {
  68. print "\t$opt: $val\n";
  69. $blast_cmd .= " -$opt $val";
  70. }
  71. }
  72. print "\nExecuting the following BLAST command:\n" . $blast_cmd . "\n";
  73. system($blast_cmd);
  74. if(!file_exists($output_file)) {
  75. tripal_report_error(
  76. 'blast_ui',
  77. TRIPAL_ERROR,
  78. "BLAST did not complete successfully as is implied by the lack of output file (%file). The command run was @command",
  79. array('%file' => $output_file, '@command' => $blast_cmd),
  80. array('print' => TRUE)
  81. );
  82. return FALSE;
  83. }
  84. print "\nGenerating additional download formats...\n";
  85. print "\tXML\n";
  86. system("blast_formatter -archive $output_file -outfmt 5 -out $output_file_xml");
  87. if(!file_exists($output_file_xml)) {
  88. tripal_report_error(
  89. 'blast_ui',
  90. TRIPAL_ERROR,
  91. "Unable to convert BLAST ASN.1 archive (%archive) to XML (%file).",
  92. array('%archive' => $output_file, '%file' => $output_file_xml),
  93. array('print' => TRUE)
  94. );
  95. }
  96. print "\tTab-delimited\n";
  97. system("blast_formatter -archive $output_file -outfmt 7 -out $output_file_tsv");
  98. if(!file_exists($output_file_tsv)) {
  99. tripal_report_error(
  100. 'blast_ui',
  101. TRIPAL_WARNING,
  102. "Unable to convert BLAST ASN.1 archive (%archive) to Tabular Output (%file).",
  103. array('%archive' => $output_file, '%file' => $output_file_tsv),
  104. array('print' => TRUE)
  105. );
  106. }
  107. print "\tHTML (includes alignments)\n";
  108. system("blast_formatter -archive $output_file -outfmt 0 -out $output_file_html -html");
  109. if(!file_exists($output_file_tsv)) {
  110. tripal_report_error(
  111. 'blast_ui',
  112. TRIPAL_WARNING,
  113. "Unable to convert BLAST ASN.1 archive (%archive) to HTML Output (%file).",
  114. array('%archive' => $output_file, '%file' => $output_file_html),
  115. array('print' => TRUE)
  116. );
  117. }
  118. print "\nDone!\n";
  119. }
  120. /**
  121. * FASTA validating parser
  122. *
  123. * A sequence in FASTA format begins with a single-line description, followed
  124. * by lines of sequence data.The description line is distinguished from the
  125. * sequence data by a greater-than (">") symbol in the first column. The word
  126. * following the ">" symbol is the identifier of the sequence, and the rest of
  127. * the line is the description (both are optional). There should be no space
  128. * between the ">" and the first letter of the identifier. The sequence ends
  129. * if another line starting with a ">" appears which indicates the start of
  130. * another sequence.
  131. *
  132. * @param $type
  133. * The type of sequence to be validated (ie: either nucleotide or protein).
  134. * @param $sequence
  135. * A string of characters to be validated.
  136. *
  137. * @return
  138. * Return a boolean. 1 if the sequence does not pass the format valifation stage and 0 otherwise.
  139. *
  140. */
  141. function validate_fasta_sequence($type, $sequence) {
  142. if ($type == 'nucleotide') {
  143. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  144. $fastaSeqRegEx = '/[^acgntuACGNTU\n\r]/';
  145. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  146. return TRUE;
  147. } else {
  148. return FALSE;
  149. }
  150. } elseif ($type == 'protein') {
  151. $fastaIdRegEx = '/^>.*(\\n|\\r)/';
  152. $fastaSeqRegEx = '/[^acgturykmswbdhvnxACGTURYKMSWBDHVNX\*\-\n\r]/';
  153. if ( preg_match($fastaSeqRegEx,$sequence) && !(preg_match($fastaIdRegEx,$sequence)) ) {
  154. return TRUE;
  155. } else {
  156. return FALSE;
  157. }
  158. }
  159. return FALSE;
  160. }