tripal_analysis_blast.module 74 KB


  1. <?php
  2. /*******************************************************************************
  3. * Tripal Blast Result lets users show/hide blast results associated
  4. * with a tripal feature
  5. ******************************************************************************/
  6. function tripal_analysis_blast_init(){
  7. // Add javascript and style sheet
  8. drupal_add_css(drupal_get_path('theme', 'tripal').'/css/tripal_analysis_blast.css');
  9. drupal_add_js(drupal_get_path('theme', 'tripal').'/js/tripal_analysis_blast.js');
  10. }
  11. /*******************************************************************************
  12. * tripal_analysis_blast_menu()
  13. * HOOK: Implementation of hook_menu()
  14. * Entry points and paths of the module
  15. */
  16. function tripal_analysis_blast_menu() {
  17. // Show top 10/25/all blast results for ajax calls
  18. $items['tripal_top_blast'] = array(
  19. 'path' => 'top_blast',
  20. 'title' => t('Blast Hits'),
  21. 'page callback' => 'tripal_get_feature_blast_results_ajax',
  22. 'page arguments' => array(1,2,3),
  23. 'access arguments' => array('access content'),
  24. 'type' => MENU_CALLBACK
  25. );
  26. // Show regular expressions for selected database in Blast admin page
  27. $items['admin/tripal/tripal_analysis/tripal_blast_regex/%'] = array(
  28. 'title' => t('Blast Regex'),
  29. 'page callback' => 'tripal_get_blast_regex',
  30. 'page arguments' => array(4),
  31. 'access arguments' => array('administer site configuration'),
  32. 'type' => MENU_CALLBACK
  33. );
  34. $items['tripal_blast_report'] = array(
  35. 'title' => t('Homology Report'),
  36. 'page callback' => 'tripal_get_blast_report',
  37. 'page arguments' => array(1,2,3,4,5),
  38. 'access arguments' => array('access chado_analysis_blast content'),
  39. 'type' => MENU_CALLBACK,
  40. 'file' => 'tripal_analysis_blast_htmlreport.inc'
  41. );
  42. return $items;
  43. }
  44. /*******************************************************************************
  45. * tripal_analysis_blast_nodeapi()
  46. * HOOK: Implementation of hook_nodeapi()
  47. * Display blast results for allowed node types
  48. */
  49. function tripal_analysis_blast_nodeapi(&$node, $op, $teaser, $page) {
  50. switch ($op) {
  51. case 'view':
  52. // Find out which node types for showing the blast
  53. $types_to_show = variable_get('tripal_analysis_blast_setting',
  54. array('chado_feature'));
  55. // Abort if this node is not one of the types we should show.
  56. if (!in_array($node->type, $types_to_show, TRUE)) {
  57. break;
  58. }
  59. // Add blast to the content item if it's not a teaser
  60. if (!$teaser && $node->feature->feature_id) {
  61. if($node->build_mode == NODE_BUILD_SEARCH_INDEX){
  62. $node->content['tripal_analysis_blast_index_version'] = array(
  63. '#value' => theme('tripal_analysis_blast_results_index_version',$node),
  64. '#weight' => 8,
  65. );
  66. } else {
  67. if(strcmp($node->type,'chado_feature')==0){
  68. // Show blast result if not at teaser view
  69. $node->content['tripal_feature_blast_results'] = array(
  70. '#value' => theme('tripal_feature_blast_results', $node),
  71. '#weight' => 8
  72. );
  73. }
  74. }
  75. }
  76. }
  77. }
  78. /************************************************************************
  79. * We need to let drupal know about our theme functions and their arguments.
  80. * We create theme functions to allow users of the module to customize the
  81. * look and feel of the output generated in this module
  82. */
  83. function tripal_analysis_blast_theme () {
  84. return array(
  85. 'tripal_analysis_blast_results_index_version' => array (
  86. 'arguments' => array('node'),
  87. ),
  88. 'tripal_feature_blast_results' => array(
  89. 'arguments' => array('node'=> null),
  90. 'template' => 'tripal_feature_blast_results',
  91. )
  92. );
  93. }
  94. /*******************************************************************************
  95. *
  96. */
  97. function tripal_get_feature_blast_results_ajax($feature_id, $db_id, $max){
  98. $sql = "SELECT nid FROM {chado_feature} WHERE feature_id = %d";
  99. $nid = db_fetch_object(db_query($sql,$feature_id));
  100. $node = node_load($nid->nid);
  101. // add the additional variables that the theme needs to generate the output
  102. $node->db_id = $db_id;
  103. $node->max = $max;
  104. // call the theme to rebuild the blast results
  105. drupal_json(array('update' => theme('tripal_feature_blast_results',$node)));
  106. }
  107. /*******************************************************************************
  108. *
  109. */
  110. function tripal_analysis_blast_preprocess_tripal_feature_blast_results(&$variables){
  111. $feature = $variables['node']->feature;
  112. $db_id = $variables['node']->db_id;
  113. $max = 10;
  114. if(isset($variables['node']->max)){
  115. $max = $variables['node']->max;
  116. }
  117. $blast_results = tripal_get_feature_blast_results($feature->feature_id, $db_id, $max);
  118. $variables['tripal_analysis_blast']['blast_results_list'] = $blast_results;
  119. }
  120. /*******************************************************************************
  121. * Prepare blast result for the feature shown on the page
  122. */
  123. function theme_tripal_analysis_blast_results_index_version ($node) {
  124. $feature = $node->feature;
  125. $content = tripal_get_blast_results_index_version($feature->feature_id);
  126. return $content;
  127. }
  128. /*******************************************************************************
  129. * tripal_get_feature_blast_results()
  130. * Get blast result from featureprop table for the feature
  131. */
  132. function tripal_get_feature_blast_results($feature_id, $db_id, $max){
  133. // Get the blast results stored as XML from the analysisfeatureprop table
  134. // the type for the property is named 'analysis_blast_output_iteration_hits'
  135. // and is found in the 'tripal' controlled vocabulary. This CV term was
  136. // added by this module.
  137. $sql = "SELECT AP.value AS apvalue, AFP.value AS afpvalue, AF.analysis_id AS aid
  138. FROM {analysisfeatureprop} AFP
  139. INNER JOIN {analysisfeature} AF ON AF.analysisfeature_id = AFP.analysisfeature_id
  140. INNER JOIN {analysisprop} AP ON AP.analysis_id = AF.analysis_id
  141. INNER JOIN {cvterm} CVT on AFP.type_id = CVT.cvterm_id
  142. INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
  143. WHERE AF.feature_id = %d AND CV.name = '%s' AND
  144. CVT.name = '%s' AND AP.value like '%|%' ";
  145. $previous_db = tripal_db_set_active('chado');
  146. $result = db_query($sql, $feature_id,'tripal','analysis_blast_output_iteration_hits');
  147. tripal_db_set_active($previous_db);
  148. // get the HTML content for viewing each of the XML file
  149. $blast_obj_array = array ();
  150. $blast_obj_counter = 0;
  151. while ($analysisfeatureprop = db_fetch_object($result)) {
  152. // the database db_id is stored in the value field of the table
  153. // along with the XML. The two are separated by a bar. We
  154. // will separate these two:
  155. $blastsettings = explode("|", $analysisfeatureprop->apvalue);
  156. // if we don't have the proper number of fields in the value column then
  157. // skip this entry
  158. if(count($blastsettings) != 3){
  159. continue;
  160. }
  161. if(!$blastsettings[0]){
  162. continue;
  163. }
  164. $att_db_id = $blastsettings[0];
  165. // get analysis name and date
  166. $previous_db = tripal_db_set_active('chado');
  167. $sql = "SELECT analysis_id AS aid, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  168. FROM {analysis}
  169. WHERE analysis_id = %d";
  170. $analysis = db_fetch_object(db_query($sql, $analysisfeatureprop->aid));
  171. tripal_db_set_active($previous_db);
  172. // Get db object using the db_id
  173. $previous_db = tripal_db_set_active('chado');
  174. $sql = "SELECT * FROM {db} WHERE db_id=%d";
  175. $db = db_fetch_object(db_query($sql, $att_db_id));
  176. tripal_db_set_active($previous_db);
  177. // we want to add this blast result to our list if a database id has
  178. // not been specified or if it has been specified and the database id
  179. // for this analysis matches that of the one requested
  180. if(!$db_id or ($db_id and $att_db_id == $db_id)) {
  181. $blast_obj = tripal_analysis_blast_get_result_object($analysisfeatureprop->afpvalue,$db,$max,$feature_id, $analysis);
  182. $blast_obj->analysis = $analysis;
  183. $blast_obj_array [$blast_obj_counter] = $blast_obj;
  184. $blast_obj_counter ++;
  185. }
  186. }
  187. return $blast_obj_array;
  188. }
  189. /*******************************************************************************
  190. * Scanning the file folder for blast results and prepare content for indexing
  191. */
  192. function tripal_get_blast_results_index_version ($feature_id){
  193. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  194. // for inserting into the analysisfeatureprop table
  195. $previous_db = tripal_db_set_active('chado');
  196. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  197. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  198. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  199. "AND CV.name = 'tripal'";
  200. $type_id = db_result(db_query($sql));
  201. // Get xml string from analysisfeatureprop value column, get db_id from analysisprop value column
  202. // , and get analysis_id from analysisfeature table
  203. $sql = "SELECT AP.value AS apvalue, AFP.value AS afpvalue, AF.analysis_id AS aid
  204. FROM {analysisfeatureprop} AFP
  205. INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id
  206. INNER JOIN analysisprop AP ON AP.analysis_id = AF.analysis_id
  207. WHERE feature_id = %d
  208. AND AFP.type_id = %d ";
  209. $result = db_query($sql, $feature_id, $type_id);
  210. tripal_db_set_active($previous_db);
  211. // get the HTML content for viewing each of the XML file
  212. while ($analysisfeatureprop = db_fetch_object($result)) {
  213. // get analysis name and date
  214. $previous_db = tripal_db_set_active('chado');
  215. $sql = "SELECT analysis_id AS aid, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  216. FROM {analysis} WHERE analysis_id = %d";
  217. $analysis = db_fetch_object(db_query($sql, $analysisfeatureprop->aid));
  218. tripal_db_set_active($previous_db);
  219. $blastsettings = explode("|", $analysisfeatureprop->apvalue);
  220. $att_db_id = $blastsettings [0];
  221. // Get db object using the db_id
  222. $previous_db = tripal_db_set_active('chado');
  223. $sql = "SELECT * FROM {db} WHERE db_id=%d";
  224. $db = db_fetch_object(db_query($sql, $att_db_id));
  225. tripal_db_set_active($previous_db);
  226. // Only index best 10 hits because the default page only shows 10 blast results
  227. $max = 10;
  228. $content .= parse_NCBI_Blast_XML_index_version($analysisfeatureprop->afpvalue,$db,$max,$feature_id,$ajax, $analysis);
  229. }
  230. return $content;
  231. }
  232. /*******************************************************************************
  233. * Parse NCBI Blast results for indexing so that user can use blast results to
  234. * find corresponding features
  235. */
  236. function parse_NCBI_Blast_XML_index_version($xml_string,$db,$feature_id) {
  237. // Get the parser using db_id
  238. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  239. $parser = db_fetch_object(db_query($sql, $db->db_id));
  240. $db_name = $parser->displayname;
  241. $is_genbank = $parser->genbank_style;
  242. $regex_hit_id = $parser->regex_hit_id;
  243. $regex_hit_def = $parser->regex_hit_def;
  244. $regex_hit_accession = $parser->regex_hit_accession;
  245. // set default if regular expressions have not been specified
  246. if(!$regex_hit_id){
  247. $regex_hit_id = '/^(.*?)\s.*$/';
  248. } else {
  249. $regex_hit_id = '/'.$regex_hit_id.'/';
  250. }
  251. if(!$regex_hit_def){
  252. $regex_hit_def = '/^.*?\s(.*)$/';
  253. } else {
  254. $regex_hit_def = '/'.$regex_hit_def.'/';
  255. }
  256. if(!$regex_hit_accession){
  257. $regex_hit_accession = '/^(.*?)\s.*$/';
  258. } else {
  259. $regex_hit_accession = '/'.$regex_hit_accession.'/';
  260. }
  261. $html_out .= "<h3>$db_name</h3>";
  262. // Load the file. This XML file should be an extract
  263. // of the original XML file with only a single iteration.
  264. // An iteration is essentially all the hits for a single
  265. // query sequence.
  266. $xml_output = simplexml_load_string($xml_string);
  267. $iteration = '';
  268. // new XML file parser has added the feature name within <Iteration_query-def> tags.
  269. if ($xml_output->getName() == 'Iteration') {
  270. foreach ($xml_output->children() as $xml_tag) {
  271. if ($xml_tag->getName() == 'Iteration_query-def') {
  272. // Here we show the feature name again to check if we pull the correct data
  273. $html_out .= "Query: $xml_tag<br>";
  274. } else if ($xml_tag->getName() == 'Iteration_hits') {
  275. $iteration = $xml_tag;
  276. }
  277. }
  278. // This is for the file parsed by the old parser
  279. } else {
  280. $iteration = $xml_output;
  281. }
  282. // now run through the blast hits/hsps of this iteration
  283. // and generate the rows of the table
  284. foreach($iteration->children() as $hits){
  285. $best_evalue = 0;
  286. foreach($hits->children() as $hit){
  287. $best_evalue = 0;
  288. $element_name = $hit->getName();
  289. if($element_name == 'Hit_id'){
  290. // if parsing "name, acc, desc" from three tags (1/3)
  291. if ($is_genbank) {
  292. $hit_name = $hit;
  293. }
  294. } else if($element_name == 'Hit_def'){
  295. if($is_genbank){
  296. $description = $hit;
  297. } else {
  298. $accession = preg_replace($regex_hit_accession,"$1",$hit);
  299. $hit_name = preg_replace($regex_hit_id,"$1",$hit);
  300. $description = preg_replace($regex_hit_def,"$1",$hit);
  301. }
  302. } else if($element_name == 'Hit_accession'){
  303. // if parsing "name, acc, desc" from three tags (3/3)
  304. if ($is_genbank){
  305. $accession = $hit;
  306. }
  307. // now run through each HSP for this hit
  308. }
  309. }
  310. $html_out .= "<p>$hit_name<br>";
  311. $html_out .= "$accession<br>";
  312. $html_out .= "<b>$description</b></br>";
  313. $hsp_html_out = '';
  314. }
  315. return $html_out;
  316. }
  317. /*******************************************************************************
  318. * Tripal Blast administrative setting form. This function is called by
  319. * tripal_analysis module which asks for an admin form to show on the page
  320. */
  321. function tripal_analysis_blast_get_settings() {
  322. // Get an array of node types with internal names as keys
  323. $options = node_get_types('names');
  324. // Add 'chado_feature' to allowed content types for showing blast results
  325. $allowedoptions ['chado_feature'] = "Show blast results on feature pages";
  326. $form['description'] = array(
  327. '#type' => 'item',
  328. '#value' => t("Most chado features were analyzed by blast against major sequence databases. This option allows user to display the blast analysis results. Please read user manual for storage and display of blast files. Check the box to enable the analysis results. Uncheck to disable it."),
  329. '#weight' => 0,
  330. );
  331. $form['tripal_analysis_blast_setting'] = array(
  332. '#type' => 'checkboxes',
  333. '#options' => $allowedoptions,
  334. '#default_value' => variable_get('tripal_analysis_blast_setting',
  335. array('chado_feature')),
  336. );
  337. $form['blast_parser'] = array(
  338. '#title' => t('Blast Parser Settings'),
  339. '#type' => 'fieldset',
  340. '#description' => t('Configure parsers for showing blast results. Each database is '.
  341. 'allowed to have one xml parser.'),
  342. '#weight' => 10
  343. );
  344. $previous_db = tripal_db_set_active('chado'); // use chado database
  345. // get a list of db from chado for user to choose
  346. $sql = 'SELECT db_id, name FROM {db} ORDER BY lower(name)';
  347. $results = db_query ($sql);
  348. $blastdbs = array();
  349. while ($db = db_fetch_object($results)){
  350. $blastdbs[$db->db_id] = $db->name;
  351. }
  352. $form['db_options'] = array(
  353. '#type' => 'value',
  354. '#value' => $blastdbs
  355. );
  356. $form['blast_parser']['blastdb'] = array(
  357. '#title' => t('Database'),
  358. '#type' => 'select',
  359. '#description' => t('The database used for the blast analysis.'),
  360. '#options' => $form['db_options']['#value'],
  361. '#attributes' => array(
  362. 'onChange' => "return tripal_update_regex(this)",
  363. )
  364. );
  365. $form['blast_parser']['displayname'] = array(
  366. '#title' => t('Title for the blast analysis'),
  367. '#type' => 'textfield',
  368. );
  369. $form['blast_parser']['gb_style_parser'] = array(
  370. '#title' => t('Use Genebank style parser. This will clear all regular expression settings for the selected database.'),
  371. '#type' => 'checkbox',
  372. '#attributes' => array(
  373. 'onClick' => "return tripal_set_genbank_style(this)",
  374. )
  375. );
  376. $form['blast_parser']['hit_id'] = array(
  377. '#title' => t('Regular expression for Hit Name'),
  378. '#type' => 'textfield',
  379. );
  380. $form['blast_parser']['hit_def'] = array(
  381. '#title' => t('Regular expression for Hit Description'),
  382. '#type' => 'textfield',
  383. );
  384. $form['blast_parser']['hit_accession'] = array(
  385. '#title' => t('Regular expression for Hit Accession'),
  386. '#type' => 'textfield',
  387. );
  388. $form['blast_parser']['button'] = array(
  389. '#type' => 'submit',
  390. '#value' => t('Save settings')
  391. );
  392. tripal_db_set_active($previous_db); // use drupal database
  393. $settings->form = $form;
  394. $settings->title = "Tripal Blast";
  395. return $settings;
  396. }
  397. /*******************************************************************************
  398. * Parse Blast XML Output file into analysisfeatureprop table
  399. */
  400. function tripal_analysis_blast_parseXMLFile ($analysis_id, $blastdb, $blastfile, $no_parsed, $job_id) {
  401. // Prepare log
  402. $filename = preg_replace("/.*\/(.*)/", "$1", $blastfile);
  403. $logfile = file_directory_path() . "/tripal/tripal_analysis_blast/load_$filename.log";
  404. $log = fopen($logfile, 'a'); // append parsing results to log file
  405. // If user input a file (e.g. blast.xml)
  406. if (is_file($blastfile)) {
  407. // Parsing started
  408. print "Parsing File:".$blastfile." ...\n";
  409. fwrite($log, date("D M j G:i:s Y").". Loading $blastfile\n");
  410. if ($no_parsed == 'all') {
  411. print "Parsing all hits...\n";
  412. } else {
  413. print "Parsing top $no_parsed hits...\n";
  414. }
  415. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  416. // for inserting into the analysisfeatureprop table
  417. $previous_db = tripal_db_set_active('chado'); // use chado database
  418. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  419. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  420. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  421. "AND CV.name = 'tripal'";
  422. $type_id = db_result(db_query($sql));
  423. // Load the XML file.
  424. $blastoutput = simplexml_load_file($blastfile);
  425. $no_iterations = 0;
  426. foreach($blastoutput->children() as $tmp) {
  427. if ($tmp->getName() == 'BlastOutput_iterations') {
  428. foreach($tmp->children() as $itr) {
  429. if ($itr->getName() == 'Iteration') {
  430. $no_iterations ++;
  431. }
  432. }
  433. }
  434. }
  435. print "$no_iterations iterations to be processed.\n";
  436. $interval = intval($no_iterations * 0.01);
  437. $idx_iterations = 0;
  438. foreach ($blastoutput->children() as $blastoutput_tags) {
  439. if ($blastoutput_tags->getName() == 'BlastOutput_iterations') {
  440. foreach($blastoutput_tags->children() as $iterations) {
  441. if ($iterations->getName() == 'Iteration') {
  442. // Set job status
  443. $idx_iterations ++;
  444. if ($idx_iterations % $interval == 0) {
  445. $percentage = (int) ($idx_iterations / $no_iterations * 100);
  446. tripal_db_set_active($previous_db);
  447. tripal_job_set_progress($job_id, $percentage);
  448. $previous_db = tripal_db_set_active('chado');
  449. print $percentage."% ";
  450. }
  451. // now run through the blast hits/hsps of this iteration
  452. // and generate the rows of the table
  453. $feature_id = 0;
  454. foreach($iterations->children() as $iteration_tags) {
  455. // Match chado feature uniquename with <Iteration_query-def>
  456. // and get the feature_id
  457. $featurenaem_xml = '';
  458. if($iteration_tags->getName() == 'Iteration_query-def'){
  459. // If the Iteration_query-def in the format of "feature_id|uniquename"
  460. // get feature_id from it directly
  461. if (preg_match("/^(\d+)\|.+/", $iteration_tags, $matches)) {
  462. $feature_id = $matches[1];
  463. // If not in above format, try to match <Iteration_query-def> to feature's uniquename
  464. } else {
  465. // treat the first word of <Iteration_query-def> as uniquename
  466. $first_word = $iteration_tags;
  467. if (preg_match('/^(.*?)\s.*$/', $iteration_tags, $matches)) {
  468. $first_word = $matches[1];
  469. }
  470. // Find out how many features match this uniquename
  471. $sql = "SELECT count(feature_id) FROM {feature} ".
  472. "WHERE uniquename = '%s' ";
  473. $no_features = db_result(db_query($sql, $first_word));
  474. // If there is only one match, get the feature_id
  475. if ($no_features == 1) {
  476. $sql = "SELECT feature_id FROM {feature} ".
  477. "WHERE uniquename = '%s' ";
  478. $feature_id = db_result(db_query($sql, $first_word));
  479. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  480. } else if ($no_features > 1) {
  481. fwrite($log, "Ambiguous: ".$first_word." matches more than one feature and is not processed.\n");
  482. continue;
  483. // If the uniquename did not match, skip and print 'Failed'
  484. } else {
  485. fwrite($log, "Failed: ".$first_word."\n");
  486. }
  487. }
  488. // Successfully matched. print 'Succeeded'
  489. if ($feature_id) {
  490. fwrite($log, "Succeeded: ".$first_word." => feature id:".$feature_id);
  491. $featurename_xml = $iteration_tags->asXML();
  492. }
  493. // Insert Iteration_hits into analysisfeatureprop and analysisfeature tables
  494. } else if($iteration_tags->getName() == 'Iteration_hits'){
  495. if ($feature_id) {
  496. // Make sure this iteration doesn't exist in analysisfeatureprop. If it does, update but not insert
  497. $sql = "SELECT analysisfeatureprop_id FROM {analysisfeatureprop} AFP ".
  498. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  499. "WHERE feature_id=%d ".
  500. "AND analysis_id=%d ".
  501. "AND type_id=%d ";
  502. $result = db_query($sql, $feature_id, $analysis_id, $type_id);
  503. $analysisfeatureprop = db_fetch_object($result);
  504. $xml_content = "<Iteration>\n".$featurename_xml."\n";
  505. // parse all hits
  506. if ($no_parsed == 'all') {
  507. $xml_content .= $iteration_tags->asXML();
  508. // parse only top hits
  509. } else {
  510. $counter = 0;
  511. $xml_content .= "<Iteration_hits>\n";
  512. foreach ($iteration_tags->children() As $hit) {
  513. if ($counter < $no_parsed) {
  514. $xml_content .= $hit->asXML();
  515. } else {
  516. break;
  517. }
  518. $counter ++;
  519. }
  520. $xml_content .= "</Iteration_hits>";
  521. }
  522. $xml_content .= "\n</Iteration>";
  523. // If this Iteration_hits already exists, update it
  524. if ($analysisfeatureprop) {
  525. $sql = "UPDATE {analysisfeatureprop} ".
  526. "SET value = '%s' ".
  527. "WHERE analysisfeatureprop_id = %d ";
  528. db_query($sql, $xml_content, $analysisfeatureprop->analysisfeatureprop_id);
  529. fwrite($log, " (Update)\n"); // write to log
  530. // Otherwise, insert the Iteration_hits into analysisfeature and analysisfeatureprop tables
  531. } else {
  532. //------------------------------------------------------
  533. // Insert into analysisfeature table
  534. //------------------------------------------------------
  535. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  536. "VALUES (%d, %d)";
  537. db_query ($sql, $feature_id, $analysis_id);
  538. // Get the newly inserted analysisfeature_id
  539. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
  540. $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
  541. //------------------------------------------------------
  542. // Insert into analysisfeatureprop table
  543. //------------------------------------------------------
  544. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  545. "VALUES (%d, %d, '%s', %d)";
  546. db_query($sql, $analysisfeature_id, $type_id, $xml_content, '0');
  547. fwrite($log, " (Insert)\n"); // write to log
  548. }
  549. }
  550. }
  551. }
  552. }
  553. }
  554. }
  555. }
  556. tripal_db_set_active ($previous_db); // Use drupal database
  557. // Otherwise, $blastfile is a directory. Iterate through all xml files in it
  558. } else {
  559. $dir_handle = @opendir($blastfile) or die("Unable to open $blastfile");
  560. $pattern = sql_regcase($blastfile . "/*.XML");
  561. $total_files = count(glob($pattern));
  562. print "$total_files file(s) to be parsed.\n";
  563. if ($no_parsed == 'all') {
  564. print "Parsing all hits...\n";
  565. } else {
  566. print "Parsing top $no_parsed hits...\n";
  567. }
  568. $interval = intval($total_files * 0.01);
  569. $no_file = 0;
  570. // Get cvterm_id for 'analysis_blast_output_iteration_hits' which is required
  571. // for inserting into the analysisfeatureprop table
  572. $previous_db = tripal_db_set_active('chado'); // use chado database
  573. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  574. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  575. "WHERE CVT.name = 'analysis_blast_output_iteration_hits' ".
  576. "AND CV.name = 'tripal'";
  577. $type_id = db_result(db_query($sql));
  578. // Parsing all files in the directory
  579. while ($file = readdir($dir_handle)) {
  580. if(preg_match("/^.*\.XML/i",$file)){
  581. // Set job status
  582. if ($no_file % $interval == 0) {
  583. $percentage = (int) ($no_file / $total_files * 100);
  584. tripal_db_set_active($previous_db);
  585. tripal_job_set_progress($job_id, $percentage);
  586. $previous_db = tripal_db_set_active('chado');
  587. print $percentage."% ";
  588. }
  589. // Parsing started
  590. print "Parsing File:".$file.". ";
  591. fwrite($log, date("D M j G:i:s Y").". Loading $file\n");
  592. // Load the XML file.
  593. $blastoutput = simplexml_load_file($blastfile."/".$file);
  594. $no_iterations = 0;
  595. foreach($blastoutput->children() as $tmp) {
  596. if ($tmp->getName() == 'BlastOutput_iterations') {
  597. foreach($tmp->children() as $itr) {
  598. if ($itr->getName() == 'Iteration') {
  599. $no_iterations ++;
  600. }
  601. }
  602. }
  603. }
  604. print "$no_iterations iterations to be processed.\n";
  605. foreach ($blastoutput->children() as $blastoutput_tags) {
  606. if ($blastoutput_tags->getName() == 'BlastOutput_iterations') {
  607. foreach($blastoutput_tags->children() as $iterations) {
  608. if ($iterations->getName() == 'Iteration') {
  609. // now run through the blast hits/hsps of this iteration
  610. // and generate the rows of the table
  611. $feature_id = 0;
  612. foreach($iterations->children() as $iteration_tags) {
  613. // Match chado feature uniquename with <Iteration_query-def>
  614. // and get the feature_id
  615. $featurenaem_xml = '';
  616. if($iteration_tags->getName() == 'Iteration_query-def'){
  617. // If the Iteration_query-def in the format of "feature_id|uniquename"
  618. // get feature_id from it directly
  619. if (preg_match("/^(\d+)\|.+/", $iteration_tags, $matches)) {
  620. $feature_id = $matches[1];
  621. // If not in above format, try to match <Iteration_query-def> to feature's uniquename
  622. } else {
  623. // treat the first word of <Iteration_query-def> as uniquename
  624. $first_word = $iteration_tags;
  625. if (preg_match('/^(.*?)\s.*$/', $iteration_tags, $matches)) {
  626. $first_word = $matches[1];
  627. }
  628. // Find out how many features match this uniquename
  629. $sql = "SELECT count(feature_id) FROM {feature} ".
  630. "WHERE uniquename = '%s' ";
  631. $no_features = db_result(db_query($sql, $first_word));
  632. // If there is only one match, get the feature_id
  633. if ($no_features == 1) {
  634. $sql = "SELECT feature_id FROM {feature} ".
  635. "WHERE uniquename = '%s' ";
  636. $feature_id = db_result(db_query($sql, $first_word));
  637. // If the uniquename matches more than one features then skip and print 'Ambiguous'
  638. } else if ($no_features > 1) {
  639. fwrite($log, "Ambiguous: ".$first_word." matches more than one feature and is not processed.\n");
  640. continue;
  641. // If the uniquename did not match, skip and print 'Failed'
  642. } else {
  643. fwrite($log, "Failed: ".$first_word."\n");
  644. }
  645. }
  646. // Successfully matched. print 'Succeeded'
  647. if ($feature_id) {
  648. fwrite($log, "Succeeded: ".$first_word." => feature id:".$feature_id);
  649. $featurename_xml = $iteration_tags->asXML();
  650. }
  651. // Insert Iteration_hits into analysisfeatureprop and analysisfeature tables
  652. } else if($iteration_tags->getName() == 'Iteration_hits'){
  653. if ($feature_id) {
  654. // Make sure this iteration doesn't exist in analysisfeatureprop. If it does, update but not insert
  655. $sql = "SELECT analysisfeatureprop_id FROM {analysisfeatureprop} AFP ".
  656. "INNER JOIN analysisfeature AF ON AF.analysisfeature_id = AFP.analysisfeature_id ".
  657. "WHERE feature_id=%d ".
  658. "AND analysis_id=%d ".
  659. "AND type_id=%d ";
  660. $result = db_query($sql, $feature_id, $analysis_id, $type_id);
  661. $analysisfeatureprop = db_fetch_object($result);
  662. $xml_content = "<Iteration>\n".$featurename_xml."\n";
  663. // parse all hits
  664. if ($no_parsed == 'all') {
  665. $xml_content .= $iteration_tags->asXML();
  666. // parse only top hits
  667. } else {
  668. $counter = 0;
  669. $xml_content .= "<Iteration_hits>\n";
  670. foreach ($iteration_tags->children() As $hit) {
  671. if ($counter < $no_parsed) {
  672. $xml_content .= $hit->asXML();
  673. } else {
  674. break;
  675. }
  676. $counter ++;
  677. }
  678. $xml_content .= "</Iteration_hits>";
  679. }
  680. $xml_content .= "\n</Iteration>";
  681. // If this Iteration_hits already exists, update it
  682. if ($analysisfeatureprop) {
  683. $sql = "UPDATE {analysisfeatureprop} ".
  684. "SET value = '%s' ".
  685. "WHERE analysisfeatureprop_id = %d ";
  686. db_query($sql, $xml_content, $analysisfeatureprop->analysisfeatureprop_id);
  687. fwrite($log, " (Update)\n"); // write to log
  688. // Otherwise, insert the Iteration_hits into analysisfeature and analysisfeatureprop tables
  689. } else {
  690. //------------------------------------------------------
  691. // Insert into analysisfeature table
  692. //------------------------------------------------------
  693. $sql = "INSERT INTO {analysisfeature} (feature_id, analysis_id) ".
  694. "VALUES (%d, %d)";
  695. db_query ($sql, $feature_id, $analysis_id);
  696. // Get the newly inserted analysisfeature_id
  697. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE feature_id = %d AND analysis_id = %d";
  698. $analysisfeature_id = db_result(db_query($sql, $feature_id, $analysis_id));
  699. //------------------------------------------------------
  700. // Insert into analysisfeatureprop table
  701. //------------------------------------------------------
  702. $sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank)".
  703. "VALUES (%d, %d, '%s', %d)";
  704. db_query($sql, $analysisfeature_id, $type_id, $xml_content, '0');
  705. fwrite($log, " (Insert)\n"); // write to log
  706. }
  707. }
  708. }
  709. }
  710. }
  711. }
  712. }
  713. }
  714. $no_file ++;
  715. }
  716. }
  717. tripal_db_set_active ($previous_db); // Use drupal database
  718. }
  719. print "Done.\nSuccessful and failed entries have been saved in the log file:\n $logfile\n";
  720. fwrite($log, "\n");
  721. fclose($log);
  722. return;
  723. }
  724. /*******************************************************************************
  725. * This function is only called by ajax to get regular expressions for blast
  726. * admin page
  727. */
  728. function tripal_get_blast_regex ($db_id) {
  729. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  730. $blast_regexs = db_fetch_object(db_query($sql, $db_id));
  731. drupal_json(array(
  732. 'name' => $blast_regexs->displayname,
  733. 'genbank_style' => $blast_regexs->genbank_style,
  734. 'reg1' => $blast_regexs->regex_hit_id,
  735. 'reg2' => $blast_regexs->regex_hit_def,
  736. 'reg3' => $blast_regexs->regex_hit_accession)
  737. );
  738. }
  739. /*******************************************************************************
  740. * Provide information to drupal about the node types that we're creating
  741. * in this module
  742. */
  743. function tripal_analysis_blast_node_info() {
  744. $nodes = array();
  745. $nodes['chado_analysis_blast'] = array(
  746. 'name' => t('Analysis: Blast'),
  747. 'module' => 'chado_analysis_blast',
  748. 'description' => t('A blast analysis from the chado database'),
  749. 'has_title' => FALSE,
  750. 'title_label' => t('Analysis: Blast'),
  751. 'has_body' => FALSE,
  752. 'body_label' => t('Blast Analysis Description'),
  753. 'locked' => TRUE
  754. );
  755. return $nodes;
  756. }
  757. /*******************************************************************************
  758. * Provide a Blast Analysis form
  759. */
  760. function chado_analysis_blast_form ($node){
  761. //dprint_r($node);
  762. $type = node_get_types('type', $node);
  763. $form = array();
  764. $form['title']= array(
  765. '#type' => 'hidden',
  766. '#default_value' => $node->title,
  767. );
  768. $form['analysisname']= array(
  769. '#type' => 'textfield',
  770. '#title' => t('Analysis Name'),
  771. '#required' => FALSE,
  772. '#default_value' => $node->analysisname,
  773. '#weight' => 1
  774. );
  775. $form['program']= array(
  776. '#type' => 'textfield',
  777. '#title' => t('Program'),
  778. '#required' => TRUE,
  779. '#default_value' => $node->program,
  780. '#weight' => 2
  781. );
  782. $form['programversion']= array(
  783. '#type' => 'textfield',
  784. '#title' => t('Program Version'),
  785. '#required' => TRUE,
  786. '#default_value' => $node->programversion,
  787. '#weight' => 3
  788. );
  789. $form['algorithm']= array(
  790. '#type' => 'textfield',
  791. '#title' => t('Algorithm'),
  792. '#required' => FALSE,
  793. '#default_value' => $node->algorithm,
  794. '#weight' => 4
  795. );
  796. $form['sourcename']= array(
  797. '#type' => 'textfield',
  798. '#title' => t('Source Name'),
  799. '#required' => FALSE,
  800. '#default_value' => $node->sourcename,
  801. '#weight' => 5
  802. );
  803. $form['sourceversion']= array(
  804. '#type' => 'textfield',
  805. '#title' => t('Source Version'),
  806. '#required' => FALSE,
  807. '#default_value' => $node->sourceversion,
  808. '#weight' => 6
  809. );
  810. $form['sourceuri']= array(
  811. '#type' => 'textfield',
  812. '#title' => t('Source URI'),
  813. '#required' => FALSE,
  814. '#default_value' => $node->sourceuri,
  815. '#weight' => 7
  816. );
  817. // Get time saved in chado
  818. $default_time = $node->timeexecuted;
  819. $year = preg_replace("/^(\d+)-\d+-\d+ .*/", "$1", $default_time);
  820. $month = preg_replace("/^\d+-0?(\d+)-\d+ .*/", "$1", $default_time);
  821. $day = preg_replace("/^\d+-\d+-0?(\d+) .*/", "$1", $default_time);
  822. // If the time is not set, use current time
  823. if (!$default_time) {
  824. $default_time = time();
  825. $year = format_date($default_time, 'custom', 'Y');
  826. $month = format_date($default_time, 'custom', 'n');
  827. $day = format_date($default_time, 'custom', 'j');
  828. }
  829. $form['timeexecuted']= array(
  830. '#type' => 'date',
  831. '#title' => t('Time Executed'),
  832. '#required' => TRUE,
  833. '#default_value' => array(
  834. 'year' => $year,
  835. 'month' => $month,
  836. 'day' => $day,
  837. ),
  838. '#weight' => 8
  839. );
  840. $form['description']= array(
  841. '#type' => 'textarea',
  842. '#rows' => 15,
  843. '#title' => t('Description and/or Program Settings'),
  844. '#required' => FALSE,
  845. '#default_value' => check_plain($node->description),
  846. '#weight' => 9
  847. );
  848. // Blast specific settings
  849. if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {
  850. $prop_values = explode("|", $node->blastdb);
  851. $node->blastdb = $prop_values[0];
  852. $node->blastfile = $prop_values[1];
  853. $node->blastparameters = $prop_values[2];
  854. }
  855. $form['blast'] = array(
  856. '#title' => t('Blast Settings'),
  857. '#type' => 'fieldset',
  858. '#description' => t('Specific Settings for Blast Analysis.'),
  859. '#collapsible' => TRUE,
  860. '#attributes' => array('id' => 'blast-extra-settings'),
  861. '#weight' => 11
  862. );
  863. $previous_db = tripal_db_set_active('chado'); // use chado database
  864. // get a list of db from chado for user to choose
  865. $sql = 'SELECT db_id, name FROM {db} ORDER BY lower(name)';
  866. $results = db_query ($sql);
  867. tripal_db_set_active($previous_db);
  868. $blastdbs = array();
  869. while ($db = db_fetch_object($results)){
  870. $blastdbs[$db->db_id] = $db->name;
  871. }
  872. $form['db_options'] = array(
  873. '#type' => 'value',
  874. '#value' => $blastdbs
  875. );
  876. $form['blast']['blastdb'] = array(
  877. '#title' => t('Database'),
  878. '#type' => 'select',
  879. '#description' => t('The database used for the blast analysis.'),
  880. '#options' => $form['db_options']['#value'],
  881. '#default_value' => $node->blastdb,
  882. );
  883. $form['blast']['blastfile'] = array(
  884. '#title' => t('Blast xml File: (if you input a directory without the tailing slash, all xml files in the directory will be loaded)'),
  885. '#type' => 'textfield',
  886. '#description' => t('The xml output file generated by blast in full path.'),
  887. '#default_value' => $node->blastfile,
  888. );
  889. $form['blast']['no_parsed'] = array(
  890. '#title' => t('Number of hits to be parsed'),
  891. '#type' => 'textfield',
  892. '#description' => t("The number of hits to be parsed. Tripal will parse only top 10 hits if you input '10'' in this field."),
  893. '#default_value' => 'all',
  894. );
  895. $form['blast']['blastjob'] = array(
  896. '#type' => 'checkbox',
  897. '#title' => t('Submit a job to parse the xml output into analysisfeatureprop table'),
  898. '#description' => t('Note: features associated with the blast results must '.
  899. 'exist in chado before parsing the file. Otherwise, blast '.
  900. 'results that cannot be linked to a feature will be '.
  901. 'discarded. '),
  902. '#default_value' => $node->blastjob
  903. );
  904. $form['blast']['blastbesthit'] = array(
  905. '#type' => 'checkbox',
  906. '#title' => t('Submit a job to parse the best hit from the xml output so a best hit report can be created'),
  907. '#description' => t('Note: the xml parser for this blast database needs to be set first. '.
  908. 'Otherwise, the hit description may contain undesirable content.'),
  909. '#default_value' => $node->blastbesthit
  910. );
  911. $form['blast']['blastparameters'] = array(
  912. '#title' => t('Parameters'),
  913. '#type' => 'textfield',
  914. '#description' => t('The parameters for running the blast analysis.'),
  915. '#default_value' => $node->blastparameters,
  916. );
  917. return $form;
  918. }
  919. function chado_analysis_blast_validate($node, &$form){
  920. ##dprint_r($node);
  921. // This validation is being used for three activities:
  922. // CASE A: Update a node that exists in both drupal and chado
  923. // CASE B: Synchronizing a node from chado to drupal
  924. // CASE C: Inserting a new node that exists in niether drupal nor chado
  925. // Only nodes being updated will have an nid already
  926. if($node->nid){
  927. //---------------------------------------------------
  928. // CASE A: We are validating a form for updating an existing node
  929. //---------------------------------------------------
  930. // TO DO: check that the new fields don't yield a non-unique primary key in chado
  931. }
  932. else{
  933. // To differentiate if we are syncing or creating a new analysis altogther, see if an
  934. // analysis_id already exists
  935. if($node->analysis_id){
  936. //---------------------------------------------------
  937. // CASE B: Synchronizing a node from chado to drupal
  938. //---------------------------------------------------
  939. }
  940. else{
  941. //---------------------------------------------------
  942. // CASE C: We are validating a form for inserting a new node
  943. //---------------------------------------------------
  944. // The primary key for the chado analysis table is
  945. // program, programversion, sourcename
  946. // Check to see if this analysis really is new -ie, it doesn't have the same
  947. // primary key as any other analysis
  948. $sql = "SELECT analysis_id ".
  949. "FROM {analysis} ".
  950. "WHERE program='%s'".
  951. "AND programversion='%s'".
  952. "AND sourcename='%s'";
  953. $previous_db = tripal_db_set_active('chado');
  954. $analysis_id = db_result(db_query($sql, $node->program, $node->programversion, $node->sourcename));
  955. tripal_db_set_active($previous_db);
  956. if($analysis_id){
  957. //---------------------------------------------------
  958. // this primary key already exists in chado analysis table!
  959. //---------------------------------------------------
  960. // check to see if it has also been synced with drupal
  961. $sql = "SELECT nid FROM {chado_analysis} ".
  962. "WHERE analysis_id = %d";
  963. $node_id = db_result(db_query($sql, $analysis_id));
  964. if($node_id){
  965. //---------------------------------------------------
  966. // the analysis has already been synced with drupal, redirect the user
  967. // to modify that node or start over
  968. //---------------------------------------------------
  969. $error = 'This analysis already exists in the chado database (analysis id ';
  970. $error .= $analysis_id.') and has been synchronized ';
  971. $error .= 'with drupal. See node '.$node_id.' if you wish to update that analysis. ';
  972. $error .= ' For a new analysis, please select a unique primary key ';
  973. $error .= '(primary key consists of sourcename, program and programversion).';
  974. form_set_error('sourcename', t($error));
  975. }
  976. else{
  977. //---------------------------------------------------
  978. // the analysis does not exist in drupal - tell the user
  979. // to sync from chado or create a new unique primary key
  980. //---------------------------------------------------
  981. $error = 'This analysis already exists in the chado database (analysis id ';
  982. $error .= $analysis_id.') but has not been synchronized ';
  983. $error .= 'with drupal. See the tripal admin pages to synchronize. ';
  984. $error .= ' For a new analysis, please select a unique primary key ';
  985. $error .= '(primary key consists of sourcename, program and programversion).';
  986. form_set_error('sourcename', t($error));
  987. }
  988. }
  989. }
  990. }
  991. }
  992. function chado_analysis_blast_insert($node){
  993. global $user;
  994. // Create a timestamp so we can insert it into the chado database
  995. $time = $node->timeexecuted;
  996. $month = $time['month'];
  997. $day = $time['day'];
  998. $year = $time['year'];
  999. $timestamp = $month.'/'.$day.'/'.$year;
  1000. //---------------------------------------------------
  1001. // First add the item to the chado analysis table
  1002. //---------------------------------------------------
  1003. $sql = "INSERT INTO {analysis} ".
  1004. " (name, description, program, programversion, algorithm, ".
  1005. " sourcename, sourceversion, sourceuri, timeexecuted) ".
  1006. "VALUES ('%s','%s','%s','%s','%s','%s','%s','%s','%s')";
  1007. $previous_db = tripal_db_set_active('chado'); // use chado database
  1008. db_query($sql,$node->analysisname, $node->description,
  1009. $node->program,$node->programversion,$node->algorithm,
  1010. $node->sourcename, $node->sourceversion, $node->sourceuri,
  1011. $timestamp);
  1012. // find the newly entered analysis_id
  1013. $sql = "SELECT analysis_id ".
  1014. "FROM {analysis} ".
  1015. "WHERE program='%s'".
  1016. "AND programversion='%s'".
  1017. "AND sourcename='%s'";
  1018. $analysis_id = db_result(db_query($sql, $node->program,
  1019. $node->programversion, $node->sourcename));
  1020. // Get cvterm_id for 'analysis_blast_settings'
  1021. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  1022. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  1023. "WHERE CVT.name = 'analysis_blast_settings' ".
  1024. "AND CV.name = 'tripal'";
  1025. $type_id = db_result(db_query($sql));
  1026. //---------------------------------------------------
  1027. // Insert into chado {analysisprop} table
  1028. //---------------------------------------------------
  1029. $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) ".
  1030. "VALUES (%d, %d, '%s')";
  1031. $blastsettings = $node->blastdb."|".$node->blastfile."|".$node->blastparameters;
  1032. db_query($sql, $analysis_id, $type_id, $blastsettings);
  1033. // Get cvterm_id for 'analysis_type'
  1034. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  1035. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  1036. "WHERE CVT.name = 'analysis_type' ".
  1037. "AND CV.name = 'tripal'";
  1038. $type_id = db_result(db_query($sql));
  1039. //---------------------------------------------------
  1040. // Insert into chado {analysisprop} table
  1041. //---------------------------------------------------
  1042. $sql = "INSERT INTO {analysisprop} (analysis_id, type_id, value) ".
  1043. "VALUES (%d, %d, '%s')";
  1044. $analysis_type = "tripal_analysis_blast";
  1045. db_query($sql, $analysis_id, $type_id, $analysis_type);
  1046. tripal_db_set_active($previous_db); // switch back to drupal database
  1047. //---------------------------------------------------
  1048. // Add a job if the user wants to parse the xml output
  1049. //---------------------------------------------------
  1050. if($node->blastjob) {
  1051. $job_args[0] = $analysis_id;
  1052. $job_args[1] = $node->blastdb;
  1053. $job_args[2] = $node->blastfile;
  1054. $job_args[3] = $node->no_parsed;
  1055. if (is_readable($node->blastfile)) {
  1056. $fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
  1057. tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
  1058. 'tripal_analysis_blast_parseXMLFile', $job_args, $user->uid);
  1059. } else {
  1060. drupal_set_message("Can not open blast output file. Job not scheduled.");
  1061. }
  1062. }
  1063. if($node->blastbesthit) {
  1064. $j_args[0] = $analysis_id;
  1065. tripal_add_job("Parse best hit: $node->analysisname",'tripal_analysis_blast',
  1066. 'tripal_analysis_blast_parse_best_hit', $j_args, $user->uid);
  1067. }
  1068. // next add the item to the drupal table
  1069. $sql = "INSERT INTO {chado_analysis} (nid, vid, analysis_id) ".
  1070. "VALUES (%d, %d, %d)";
  1071. db_query($sql,$node->nid,$node->vid,$analysis_id);
  1072. // Create a title for the analysis node using the unique keys so when the
  1073. // node is saved, it will have a title
  1074. $record = new stdClass();
  1075. // If the analysis has a name, use it as the node title. If not, construct
  1076. // the title using program, programversion, and sourcename
  1077. if ($node->analysisname) {
  1078. $record->title = $node->analysisname;
  1079. } else {
  1080. //Construct node title as "program (version)
  1081. $record->title = "$node->program ($node->programversion)";
  1082. }
  1083. $record->nid = $node->nid;
  1084. drupal_write_record('node',$record,'nid');
  1085. drupal_write_record('node_revisions',$record,'nid');
  1086. }
  1087. /*******************************************************************************
  1088. * Delete blast anlysis
  1089. */
  1090. function chado_analysis_blast_delete($node){
  1091. // Before removing, get analysis_id so we can remove it from chado database
  1092. // later
  1093. $sql_drupal = "SELECT analysis_id ".
  1094. "FROM {chado_analysis} ".
  1095. "WHERE nid = %d ".
  1096. "AND vid = %d";
  1097. $analysis_id = db_result(db_query($sql_drupal, $node->nid, $node->vid));
  1098. // Remove data from the {chado_analysis}, {node}, and {node_revisions} tables
  1099. $sql_del = "DELETE FROM {chado_analysis} ".
  1100. "WHERE nid = %d ".
  1101. "AND vid = %d";
  1102. db_query($sql_del, $node->nid, $node->vid);
  1103. $sql_del = "DELETE FROM {node} ".
  1104. "WHERE nid = %d ".
  1105. "AND vid = %d";
  1106. db_query($sql_del, $node->nid, $node->vid);
  1107. $sql_del = "DELETE FROM {node_revisions} ".
  1108. "WHERE nid = %d ".
  1109. "AND vid = %d";
  1110. db_query($sql_del, $node->nid, $node->vid);
  1111. //Remove from analysisfeatureprop, analysisfeature, analysis, and analysisprop tables
  1112. $previous_db = tripal_db_set_active('chado');
  1113. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE analysis_id=%d";
  1114. $results = db_query($sql, $analysis_id);
  1115. while ($af = db_fetch_object($results)) {
  1116. db_query("DELETE FROM {analysisfeatureprop} WHERE analysisfeature_id = %d", $af->analysisfeature_id);
  1117. }
  1118. db_query("DELETE FROM {analysisfeature} WHERE analysis_id = %d", $analysis_id);
  1119. db_query("DELETE FROM {analysisprop} WHERE analysis_id = %d", $analysis_id);
  1120. db_query("DELETE FROM {analysis} WHERE analysis_id = %d", $analysis_id);
  1121. tripal_db_set_active($previous_db);
  1122. }
  1123. /*******************************************************************************
  1124. * Update blast analysis
  1125. */
  1126. function chado_analysis_blast_update($node){
  1127. global $user;
  1128. if($node->revision){
  1129. // TODO -- decide what to do about revisions
  1130. } else {
  1131. // Create a timestamp so we can insert it into the chado database
  1132. $time = $node->timeexecuted;
  1133. $month = $time['month'];
  1134. $day = $time['day'];
  1135. $year = $time['year'];
  1136. $timestamp = $month.'/'.$day.'/'.$year;
  1137. // get the analysis_id for this node:
  1138. $sql = "SELECT analysis_id ".
  1139. "FROM {chado_analysis} ".
  1140. "WHERE vid = %d";
  1141. $analysis_id = db_fetch_object(db_query($sql, $node->vid))->analysis_id;
  1142. $sql = "UPDATE {analysis} ".
  1143. "SET name = '%s', ".
  1144. " description = '%s', ".
  1145. " program = '%s', ".
  1146. " programversion = '%s', ".
  1147. " algorithm = '%s', ".
  1148. " sourcename = '%s', ".
  1149. " sourceversion = '%s', ".
  1150. " sourceuri = '%s', ".
  1151. " timeexecuted = '%s' ".
  1152. "WHERE analysis_id = %d ";
  1153. $previous_db = tripal_db_set_active('chado'); // use chado database
  1154. db_query($sql, $node->analysisname, $node->description, $node->program,
  1155. $node->programversion,$node->algorithm,$node->sourcename,
  1156. $node->sourceversion, $node->sourceuri, $timestamp, $analysis_id);
  1157. // Get cvterm_id for 'analysis_blast_settings'
  1158. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  1159. "INNER JOIN cv CV ON CV.cv_id = CVT.cv_id ".
  1160. "WHERE CVT.name = 'analysis_blast_settings' ".
  1161. "AND CV.name = 'tripal'";
  1162. $type_id = db_result(db_query($sql));
  1163. $sql = "UPDATE {analysisprop} ".
  1164. "SET value = '%s' ".
  1165. "WHERE analysis_id = %d AND type_id = %d";
  1166. $blastsettings = $node->blastdb."|".$node->blastfile."|".$node->blastparameters;
  1167. db_query($sql, $blastsettings, $analysis_id, $type_id);
  1168. tripal_db_set_active($previous_db); // switch back to drupal database
  1169. // Add a job if the user wants to parse the xml output
  1170. if($node->blastjob) {
  1171. $job_args[0] = $analysis_id;
  1172. $job_args[1] = $node->blastdb;
  1173. $job_args[2] = $node->blastfile;
  1174. $job_args[3] = $node->no_parsed;
  1175. if (is_readable($node->blastfile)) {
  1176. $fname = preg_replace("/.*\/(.*)/", "$1", $node->blastfile);
  1177. tripal_add_job("Parse blast: $fname",'tripal_analysis_blast',
  1178. 'tripal_analysis_blast_parseXMLFile', $job_args, $user->uid);
  1179. } else {
  1180. drupal_set_message("Can not open blast output file. Job not scheduled.");
  1181. }
  1182. }
  1183. if($node->blastbesthit) {
  1184. $j_args[0] = $analysis_id;
  1185. tripal_add_job("Parse best hit: $node->analysisname",'tripal_analysis_blast',
  1186. 'tripal_analysis_blast_parse_best_hit', $j_args, $user->uid);
  1187. }
  1188. // Create a title for the analysis node using the unique keys so when the
  1189. // node is saved, it will have a title
  1190. $record = new stdClass();
  1191. // If the analysis has a name, use it as the node title. If not, construct
  1192. // the title using program, programversion, and sourcename
  1193. if ($node->analysisname) {
  1194. $record->title = $node->analysisname;
  1195. } else {
  1196. //Construct node title as "program (version)
  1197. $record->title = "$node->program ($node->programversion)";
  1198. }
  1199. $record->nid = $node->nid;
  1200. drupal_write_record('node',$record,'nid');
  1201. drupal_write_record('node_revisions',$record,'nid');
  1202. }
  1203. }
  1204. /*******************************************************************************
  1205. * When a node is requested by the user this function is called to allow us
  1206. * to add auxiliary data to the node object.
  1207. */
  1208. function chado_analysis_blast_load($node){
  1209. // get the analysis_id for this node:
  1210. $sql = "SELECT analysis_id FROM {chado_analysis} WHERE nid = %d";
  1211. $ana_node = db_fetch_object(db_query($sql, $node->nid));
  1212. $additions = new stdClass();
  1213. if ($ana_node) {
  1214. // get analysis information
  1215. $sql = "SELECT Analysis_id, name AS analysisname, description, program, ".
  1216. " programversion, algorithm, sourcename, sourceversion, ".
  1217. " sourceuri, timeexecuted ".
  1218. "FROM {Analysis} ".
  1219. "WHERE Analysis_id = $ana_node->analysis_id";
  1220. $previous_db = tripal_db_set_active('chado'); // use chado database
  1221. $additions = db_fetch_object(db_query($sql));
  1222. // get number of features assc with this analysis
  1223. $sql = "SELECT count(feature_id) as featurecount ".
  1224. "FROM {Analysisfeature} ".
  1225. "WHERE Analysis_id = %d";
  1226. $additions->featurecount = db_result(db_query($sql, $ana_node->analysis_id));
  1227. // get cvterm_id for 'analysis_blast_settings'
  1228. $sql = "SELECT CVT.cvterm_id FROM {cvterm} CVT ".
  1229. "INNER JOIN cv ON cv.cv_id = CVT.cv_id ".
  1230. "WHERE CVT.name = 'analysis_blast_settings' ".
  1231. "AND CV.name = 'tripal'";
  1232. $type_id = db_result(db_query($sql));
  1233. // get analysisprop information
  1234. $sql = "SELECT value FROM {analysisprop} ".
  1235. "WHERE analysis_id = %d ".
  1236. "AND type_id = %d";
  1237. $analysisprop = db_result(db_query($sql, $ana_node->analysis_id, $type_id));
  1238. $prop_values = explode ("|", $analysisprop, 1);
  1239. $additions->blastdb = $prop_values[0];
  1240. $additions->blastfile = $prop_values[1];
  1241. $additions->blastparameters = $prop_values[2];
  1242. tripal_db_set_active($previous_db); // now use drupal database
  1243. }
  1244. // If the analysis has a name, use it as the node title. If not, construct
  1245. // the title using program programversion, and sourcename
  1246. if ($additions->analysisname) {
  1247. $additions->title = $additions->analysisname;
  1248. } else {
  1249. // Construct node title as "program version (source)
  1250. $additions->title = "$additions->program ($additions->programversion)";
  1251. }
  1252. return $additions;
  1253. }
  1254. /*******************************************************************************
  1255. * This function customizes the view of the chado_analysis node. It allows
  1256. * us to generate the markup.
  1257. */
  1258. function chado_analysis_blast_view ($node, $teaser = FALSE, $page = FALSE) {
  1259. // use drupal's default node view:
  1260. //dprint_r($node);
  1261. if (!$teaser) {
  1262. $node = node_prepare($node, $teaser);
  1263. // When previewing a node submitting form, it shows 'Array' instead of
  1264. // correct date format. We need to format the date here
  1265. $time = $node->timeexecuted;
  1266. if(is_array($time)){
  1267. $month = $time['month'];
  1268. $day = $time['day'];
  1269. $year = $time['year'];
  1270. $timestamp = $year.'-'.$month.'-'.$day;
  1271. $node->timeexecuted = $timestamp;
  1272. }
  1273. // When viewing a node, we need to reformat the analysisprop since we
  1274. // separate each value with a bar |
  1275. if (preg_match("/.*\|.*\|.*/",$node->blastdb)) {
  1276. $prop_values = explode("|", $node->blastdb);
  1277. $node->blastdb = $prop_values[0];
  1278. $node->blastfile = $prop_values[1];
  1279. $node->blastparameters = $prop_values[2];
  1280. }
  1281. }
  1282. return $node;
  1283. }
  1284. /*******************************************************************************
  1285. * Set the permission types that the chado module uses. Essentially we
  1286. * want permissionis that protect creation, editing and deleting of chado
  1287. * data objects
  1288. */
  1289. function tripal_analysis_blast_perm(){
  1290. return array(
  1291. 'access chado_analysis_blast content',
  1292. 'create chado_analysis_blast content',
  1293. 'delete chado_analysis_blast content',
  1294. 'edit chado_analysis_blast content',
  1295. );
  1296. }
  1297. /*******************************************************************************
  1298. * The following function proves access control for users trying to
  1299. * perform actions on data managed by this module
  1300. */
  1301. function chado_analysis_blast_access($op, $node, $account){
  1302. if ($op == 'create') {
  1303. return user_access('create chado_analysis_blast content', $account);
  1304. }
  1305. if ($op == 'update') {
  1306. if (user_access('edit chado_analysis_blast content', $account)) {
  1307. return TRUE;
  1308. }
  1309. }
  1310. if ($op == 'delete') {
  1311. if (user_access('delete chado_analysis_blast content', $account)) {
  1312. return TRUE;
  1313. }
  1314. }
  1315. if ($op == 'view') {
  1316. if (user_access('access chado_analysis_blast content', $account)) {
  1317. return TRUE;
  1318. }
  1319. }
  1320. return FALSE;
  1321. }
  1322. /********************************************************************************
  1323. *
  1324. */
  1325. function tripal_analysis_blast_get_result_object($xml_string,$db,$max,$feature_id, $analysis) {
  1326. $blast_object = new stdClass();
  1327. // Get the parser using db_id
  1328. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  1329. $parser = db_fetch_object(db_query($sql, $db->db_id));
  1330. $db_name = $parser->displayname;
  1331. $is_genbank = $parser->genbank_style;
  1332. $regex_hit_id = $parser->regex_hit_id;
  1333. $regex_hit_def = $parser->regex_hit_def;
  1334. $regex_hit_accession = $parser->regex_hit_accession;
  1335. // set default if regular expressions have not been specified
  1336. if(!$regex_hit_id){
  1337. $regex_hit_id = '/^(.*?)\s.*$/';
  1338. } else {
  1339. $regex_hit_id = '/'.$regex_hit_id.'/';
  1340. }
  1341. if(!$regex_hit_def){
  1342. $regex_hit_def = '/^.*?\s(.*)$/';
  1343. } else {
  1344. $regex_hit_def = '/'.$regex_hit_def.'/';
  1345. }
  1346. if(!$regex_hit_accession){
  1347. $regex_hit_accession = '/^(.*?)\s.*$/';
  1348. } else {
  1349. $regex_hit_accession = '/'.$regex_hit_accession.'/';
  1350. }
  1351. // Get analysis information
  1352. $blast_object->analysis = $analysis;
  1353. $blast_object->db = $db;
  1354. if (!$db_name) {
  1355. $blast_object->title = $analysis->name;
  1356. } else {
  1357. $blast_object->title = $db_name;
  1358. }
  1359. // Find node id for the analysis
  1360. $ana_nid = db_result(db_query("SELECT nid FROM {chado_analysis} WHERE analysis_id = %d", $analysis->aid));
  1361. $blast_object->ana_nid = $ana_nid;
  1362. $blast_object->ana_time = $analysis->time;
  1363. $blast_object->ana_name = $analysis->name;
  1364. // Load the file. This XML file should be an extract
  1365. // of the original XML file with only a single iteration.
  1366. // An iteration is essentially all the hits for a single
  1367. // query sequence.
  1368. $xml_output = simplexml_load_string($xml_string);
  1369. $iteration = '';
  1370. // new XML file parser has added the feature name within <Iteration_query-def> tags.
  1371. if ($xml_output->getName() == 'Iteration') {
  1372. foreach ($xml_output->children() as $xml_tag) {
  1373. if ($xml_tag->getName() == 'Iteration_query-def') {
  1374. // Here we show the feature name again to check if we pull the correct data
  1375. $blast_object->xml_tag = $xml_tag;
  1376. } else if ($xml_tag->getName() == 'Iteration_hits') {
  1377. $iteration = $xml_tag;
  1378. }
  1379. }
  1380. // This is for the file parsed by the old parser
  1381. } else {
  1382. $iteration = $xml_output;
  1383. }
  1384. $number_hits = 0;
  1385. foreach($iteration->children() as $hits){
  1386. $number_hits ++;
  1387. }
  1388. // add the links for updating blast info using Ajax
  1389. $blast_object->max = $max;
  1390. $blast_object->number_hits = $number_hits;
  1391. $blast_object->feature_id = $feature_id;
  1392. $hits_array = array();
  1393. $hit_count = 0;
  1394. foreach($iteration->children() as $hits){
  1395. $hsp_array = array();
  1396. $counter = 0;
  1397. foreach($hits->children() as $hit){
  1398. $best_evalue = 0;
  1399. $best_identity = 0;
  1400. $best_len = 0;
  1401. $element_name = $hit->getName();
  1402. if($element_name == 'Hit_id'){
  1403. // if parsing "name, acc, desc" from three tags (1/3)
  1404. if ($is_genbank) {
  1405. $hit_name = $hit;
  1406. }
  1407. } else if($element_name == 'Hit_def'){
  1408. if($is_genbank){
  1409. $description = $hit;
  1410. } else {
  1411. $accession = preg_replace($regex_hit_accession,"$1",$hit);
  1412. $hit_name = preg_replace($regex_hit_id,"$1",$hit);
  1413. $description = preg_replace($regex_hit_def,"$1",$hit);
  1414. }
  1415. } else if($element_name == 'Hit_accession'){
  1416. // if parsing "name, acc, desc" from three tags (3/3)
  1417. if ($is_genbank){
  1418. $accession = $hit;
  1419. }
  1420. // now run through each HSP for this hit
  1421. } else if($element_name == 'Hit_hsps'){
  1422. foreach($hit->children() as $hsp){
  1423. foreach($hsp->children() as $hsp_info){
  1424. $element_name = $hsp_info->getName();
  1425. if($element_name == 'Hsp_num'){
  1426. $hsp_num = $hsp_info;
  1427. }
  1428. if($element_name == 'Hsp_bit-score'){
  1429. $hsp_bit_score = $hsp_info;
  1430. }
  1431. if($element_name == 'Hsp_score'){
  1432. $hsp_score = $hsp_info;
  1433. }
  1434. if($element_name == 'Hsp_evalue'){
  1435. $hsp_evalue = $hsp_info;
  1436. // use the first evalue for this set of HSPs
  1437. // as the best evalue. This get's shown as
  1438. // info for the overall match.
  1439. if(!$best_evalue){
  1440. $best_evalue = $hsp_evalue;
  1441. }
  1442. }
  1443. if($element_name == 'Hsp_query-from'){
  1444. $hsp_query_from = $hsp_info;
  1445. }
  1446. if($element_name == 'Hsp_query-to'){
  1447. $hsp_query_to = $hsp_info;
  1448. }
  1449. if($element_name == 'Hsp_hit-from'){
  1450. $hsp_hit_from = $hsp_info;
  1451. }
  1452. if($element_name == 'Hsp_hit-to'){
  1453. $hsp_hit_to = $hsp_info;
  1454. }
  1455. if($element_name == 'Hsp_query-frame'){
  1456. $hsp_query_frame = $hsp_info;
  1457. }
  1458. if($element_name == 'Hsp_identity'){
  1459. $hsp_identity = $hsp_info;
  1460. // use the first evalue for this set of HSPs
  1461. // as the best evalue. This get's shown as
  1462. // info for the overall match.
  1463. if(!$best_identity){
  1464. $best_identity = $hsp_identity;
  1465. }
  1466. }
  1467. if($element_name == 'Hsp_positive'){
  1468. $hsp_positive = $hsp_info;
  1469. }
  1470. if($element_name == 'Hsp_align-len'){
  1471. $hsp_align_len = $hsp_info;
  1472. // use the first evalue for this set of HSPs
  1473. // as the best evalue. This get's shown as
  1474. // info for the overall match.
  1475. if(!$best_len){
  1476. $best_len = $hsp_align_len;
  1477. }
  1478. }
  1479. if($element_name == 'Hsp_qseq'){
  1480. $hsp_qseq = $hsp_info;
  1481. }
  1482. if($element_name == 'Hsp_hseq'){
  1483. $hsp_hseq = $hsp_info;
  1484. }
  1485. if($element_name == 'Hsp_midline'){
  1486. $hsp_midline = $hsp_info;
  1487. }
  1488. }
  1489. $hsp_content = array();
  1490. $hsp_content['hsp_num'] = $hsp_num;
  1491. $hsp_content['bit_score'] = $hsp_bit_score;
  1492. $hsp_content['score'] = $hsp_score;
  1493. $hsp_content['evalue'] = $hsp_evalue;
  1494. $hsp_content['query_frame'] = $hsp_query_frame;
  1495. $hsp_content['qseq'] = $hsp_qseq;
  1496. $hsp_content['midline'] = $hsp_midline;
  1497. $hsp_content['hseq'] = $hsp_hseq;
  1498. $hsp_content['hit_from'] = $hsp_hit_from;
  1499. $hsp_content['hit_to'] = $hsp_hit_to;
  1500. $hsp_content['identity'] = $hsp_identity;
  1501. $hsp_content['align_len'] = $hsp_align_len;
  1502. $hsp_content['positive'] = $hsp_positive;
  1503. $hsp_content['query_from'] = $hsp_query_from;
  1504. $hsp_content['query_to'] = $hsp_query_to;
  1505. $hsp_array[$counter] = $hsp_content;
  1506. $counter ++;
  1507. }
  1508. }
  1509. }
  1510. $arrowr_url = url(drupal_get_path('theme', 'tripal')."/images/arrow_r.png");
  1511. $hits_array[$hit_count]['arrowr_url'] = $arrowr_url;
  1512. $hits_array[$hit_count]['accession'] = $accession;
  1513. $hits_array[$hit_count]['hit_name'] = $hit_name;
  1514. if($accession && $db->urlprefix){
  1515. $hits_array[$hit_count]['hit_url'] = "$db->urlprefix$accession";
  1516. } else {
  1517. // Test if this is another feature in the database
  1518. $sql = "SELECT feature_id FROM {feature} WHERE uniquename = '%s'";
  1519. $previous_db = db_set_active('chado');
  1520. $hit_feature_id = db_result(db_query($sql, $hit_name));
  1521. db_set_active($previous_db);
  1522. // If it is, add link to that feature
  1523. if ($hit_feature_id) {
  1524. $hits_array[$hit_count]['hit_url'] = "ID$hit_feature_id";
  1525. }
  1526. }
  1527. $hits_array[$hit_count]['best_evalue'] = $best_evalue;
  1528. $percent_identity = number_format($best_identity/$best_len*100, 2);
  1529. $hits_array[$hit_count]['percent_identity'] = $percent_identity;
  1530. $hits_array[$hit_count]['description'] = $description;
  1531. $hits_array[$hit_count]['hsp'] = $hsp_array;
  1532. $hit_count ++;
  1533. // if we've hit the maximum number of hits then return
  1534. if($max > 0 && $hit_count >= $max){
  1535. break;
  1536. }
  1537. }
  1538. $blast_object->hits_array = $hits_array;
  1539. return $blast_object;
  1540. }
  1541. /********************************************************************************
  1542. * Parse the best hit to generate the best hit homology report
  1543. */
  1544. function tripal_analysis_blast_parse_best_hit ($analysis_id) {
  1545. // Select all features for this blast analysis, and save them to the 'featureSet' array
  1546. $sql = "SELECT feature_id
  1547. FROM {analysisfeature} AF
  1548. WHERE analysis_id = %d";
  1549. $previous_db = tripal_db_set_active('chado');
  1550. $result = db_query($sql, $analysis_id);
  1551. $featureSet = array ();
  1552. $counter = 0;
  1553. while ($feature = db_fetch_object($result)) {
  1554. $featureSet [$counter] = $feature->feature_id;
  1555. $counter ++;
  1556. }
  1557. // Get analysis information including 'Time', 'Name', and 'DB Settings'
  1558. $sql = "SELECT value, name, to_char(timeexecuted, 'MM-DD-YYYY') AS time
  1559. FROM {analysis} A
  1560. INNER JOIN {analysisprop} AP ON A.analysis_id = AP.analysis_id
  1561. WHERE A.analysis_id = %d
  1562. AND type_id= (SELECT cvterm_id
  1563. FROM {cvterm}
  1564. WHERE name = 'analysis_blast_settings')";
  1565. $analysis = db_fetch_object(db_query($sql, $analysis_id));
  1566. // Parse the blast settings
  1567. $blastsettings = explode("|", $analysis->value);
  1568. $db_id = $blastsettings [0];
  1569. // Get the xml description parser using db_id
  1570. tripal_db_set_active($previous_db);
  1571. $sql = "SELECT * FROM {tripal_analysis_blast} WHERE db_id = %d";
  1572. $parser = db_fetch_object(db_query($sql, $db_id));
  1573. $db_name = $parser->displayname;
  1574. $is_genbank = $parser->genbank_style;
  1575. $regex_hit_id = $parser->regex_hit_id;
  1576. $regex_hit_def = $parser->regex_hit_def;
  1577. $regex_hit_accession = $parser->regex_hit_accession;
  1578. // set default description parser if regular expressions have not been specified
  1579. if(!$regex_hit_id){
  1580. $regex_hit_id = '/^(.*?)\s.*$/';
  1581. } else {
  1582. $regex_hit_id = '/'.$regex_hit_id.'/';
  1583. }
  1584. if(!$regex_hit_def){
  1585. $regex_hit_def = '/^.*?\s(.*)$/';
  1586. } else {
  1587. $regex_hit_def = '/'.$regex_hit_def.'/';
  1588. }
  1589. if(!$regex_hit_accession){
  1590. $regex_hit_accession = '/^(.*?)\s.*$/';
  1591. } else {
  1592. $regex_hit_accession = '/'.$regex_hit_accession.'/';
  1593. }
  1594. $interval = intval($counter * 0.01);
  1595. for ($i = 0; $i < $counter; $i ++) {
  1596. if ($i !=0 && $i % $interval == 0) {
  1597. $percentage = (int) ($i / $counter * 100);
  1598. tripal_job_set_progress($job_id, $percentage);
  1599. print $percentage."% ";
  1600. }
  1601. $sql = "SELECT value
  1602. FROM {analysisfeatureprop} AFP
  1603. INNER JOIN {analysisfeature} AF ON AFP.analysisfeature_id = AF.analysisfeature_id
  1604. WHERE analysis_id = %d
  1605. AND feature_id = %d
  1606. AND type_id = (SELECT cvterm_id FROM cvterm WHERE name='analysis_blast_output_iteration_hits' AND cv_id = (SELECT cv_id FROM cv WHERE name='tripal'))";
  1607. $previous_db = tripal_db_set_active('chado');
  1608. $xml_output = simplexml_load_string(db_result(db_query($sql, $analysis_id, $featureSet[$i])));
  1609. $iteration = '';
  1610. // new XML file parser has added the feature name within <Iteration_query-def> tags.
  1611. if ($xml_output->getName() == 'Iteration') {
  1612. $query = "";
  1613. foreach ($xml_output->children() as $xml_tag) {
  1614. if ($xml_tag->getName() == 'Iteration_query-def') {
  1615. // Here we show the feature name again to check if we pull the correct data
  1616. $query = $xml_tag;
  1617. } else if ($xml_tag->getName() == 'Iteration_hits') {
  1618. $iteration = $xml_tag;
  1619. }
  1620. }
  1621. // This is for the file parsed by the old parser
  1622. } else {
  1623. $iteration = $xml_output;
  1624. }
  1625. $number_hits = 0;
  1626. foreach($iteration->children() as $hits){
  1627. $number_hits ++;
  1628. }
  1629. $query = explode(" ", $query) ;
  1630. $query = $query [0];
  1631. if ($number_hits == 0) {
  1632. continue;
  1633. }
  1634. // now run through the blast hits/hsps of this iteration
  1635. // and generate the rows of the table
  1636. foreach($iteration->children() as $hits){
  1637. $hit_count++;
  1638. foreach($hits->children() as $hit){
  1639. $best_evalue = 0;
  1640. $best_identity = 0;
  1641. $best_len = 0;
  1642. $element_name = $hit->getName();
  1643. if($element_name == 'Hit_id'){
  1644. // if parsing "name, acc, desc" from three tags (1/3)
  1645. if ($is_genbank) {
  1646. $hit_name = $hit;
  1647. }
  1648. } else if($element_name == 'Hit_def'){
  1649. if($is_genbank){
  1650. $description = $hit;
  1651. } else {
  1652. $accession = preg_replace($regex_hit_accession,"$1",$hit);
  1653. $hit_name = preg_replace($regex_hit_id,"$1",$hit);
  1654. $description = preg_replace($regex_hit_def,"$1",$hit);
  1655. }
  1656. } else if($element_name == 'Hit_accession'){
  1657. // if parsing "name, acc, desc" from three tags (3/3)
  1658. if ($is_genbank){
  1659. $accession = $hit;
  1660. }
  1661. // now run through each HSP for this hit
  1662. } else if($element_name == 'Hit_hsps'){
  1663. foreach($hit->children() as $hsp){
  1664. foreach($hsp->children() as $hsp_info){
  1665. $element_name = $hsp_info->getName();
  1666. if($element_name == 'Hsp_num'){
  1667. $hsp_num = $hsp_info;
  1668. }
  1669. if($element_name == 'Hsp_bit-score'){
  1670. $hsp_bit_score = $hsp_info;
  1671. }
  1672. if($element_name == 'Hsp_score'){
  1673. $hsp_score = $hsp_info;
  1674. }
  1675. if($element_name == 'Hsp_evalue'){
  1676. $hsp_evalue = $hsp_info;
  1677. // use the first evalue for this set of HSPs
  1678. // as the best evalue. This get's shown as
  1679. // info for the overall match.
  1680. if(!$best_evalue){
  1681. $best_evalue = $hsp_evalue;
  1682. }
  1683. }
  1684. if($element_name == 'Hsp_query-from'){
  1685. $hsp_query_from = $hsp_info;
  1686. }
  1687. if($element_name == 'Hsp_query-to'){
  1688. $hsp_query_to = $hsp_info;
  1689. }
  1690. if($element_name == 'Hsp_hit-from'){
  1691. $hsp_hit_from = $hsp_info;
  1692. }
  1693. if($element_name == 'Hsp_hit-to'){
  1694. $hsp_hit_to = $hsp_info;
  1695. }
  1696. if($element_name == 'Hsp_query-frame'){
  1697. $hsp_query_frame = $hsp_info;
  1698. }
  1699. if($element_name == 'Hsp_identity'){
  1700. $hsp_identity = $hsp_info;
  1701. // use the first evalue for this set of HSPs
  1702. // as the best evalue. This get's shown as
  1703. // info for the overall match.
  1704. if(!$best_identity){
  1705. $best_identity = $hsp_identity;
  1706. }
  1707. }
  1708. if($element_name == 'Hsp_positive'){
  1709. $hsp_positive = $hsp_info;
  1710. }
  1711. if($element_name == 'Hsp_align-len'){
  1712. $hsp_align_len = $hsp_info;
  1713. // use the first evalue for this set of HSPs
  1714. // as the best evalue. This get's shown as
  1715. // info for the overall match.
  1716. if(!$best_len){
  1717. $best_len = $hsp_align_len;
  1718. }
  1719. }
  1720. if($element_name == 'Hsp_qseq'){
  1721. $hsp_qseq = $hsp_info;
  1722. }
  1723. if($element_name == 'Hsp_hseq'){
  1724. $hsp_hseq = $hsp_info;
  1725. }
  1726. if($element_name == 'Hsp_midline'){
  1727. $hsp_midline = $hsp_info;
  1728. }
  1729. }
  1730. }
  1731. }
  1732. }
  1733. // Get analysisfeature_id
  1734. $sql = "SELECT analysisfeature_id FROM {analysisfeature} WHERE analysis_id = %d AND feature_id = %d";
  1735. $af_id = db_result(db_query($sql, $analysis_id, $featureSet[$i]));
  1736. // Get type_id
  1737. $sql = "SELECT cvterm_id FROM {cvterm} WHERE name = '%s' AND cv_id = (SELECT cv_id FROM {cv} WHERE name = 'tripal')";
  1738. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_query'));
  1739. $sql_test ="SELECT analysisfeatureprop_id FROM {analysisfeatureprop} WHERE analysisfeature_id = $af_id AND type_id = %d";
  1740. $test_afpid = db_result(db_query($sql_test, $type_id));
  1741. //Insert only if this blast query not exists.
  1742. if (!$test_afpid) {
  1743. $afp_sql = "INSERT INTO {analysisfeatureprop} (analysisfeature_id, type_id, value, rank) VALUES (%d, %d, '%s', 0)";
  1744. //$query;
  1745. db_query($afp_sql, $af_id, $type_id, $query);
  1746. //$hit_name;
  1747. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_match'));
  1748. db_query($afp_sql, $af_id, $type_id, $hit_name);
  1749. //$description;
  1750. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_description'));
  1751. db_query($afp_sql, $af_id, $type_id, $description);
  1752. //$best_evalue;
  1753. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_evalue'));
  1754. $e_digit = explode("e-", $best_evalue);
  1755. if (count($e_digit) == 2) {
  1756. $evalue_shown = number_format($e_digit [0],1);
  1757. $best_evalue = $evalue_shown."e-".$e_digit[1];
  1758. }
  1759. db_query($afp_sql, $af_id, $type_id, $best_evalue);
  1760. //$best_identity;
  1761. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_identity'));
  1762. $percent_identity = number_format($best_identity/$best_len*100, 1);
  1763. db_query($afp_sql, $af_id, $type_id, $percent_identity);
  1764. //$best_len;
  1765. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_length'));
  1766. db_query($afp_sql, $af_id, $type_id, $best_len);
  1767. // Otherwise, update all instead
  1768. } else {
  1769. $afp_sql = "UPDATE {analysisfeatureprop} SET analysisfeature_id = %d, type_id = %d, value = '%s', rank = 0 WHERE analysisfeatureprop_id = %d";
  1770. //$query;
  1771. db_query($afp_sql, $af_id, $type_id, $query, $test_afpid);
  1772. //$hit_name;
  1773. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_match'));
  1774. $test_afpid = db_result(db_query($sql_test, $type_id));
  1775. db_query($afp_sql, $af_id, $type_id, $hit_name, $test_afpid);
  1776. //$description;
  1777. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_description'));
  1778. $test_afpid = db_result(db_query($sql_test, $type_id));
  1779. db_query($afp_sql, $af_id, $type_id, $description, $test_afpid);
  1780. //$best_evalue;
  1781. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_evalue'));
  1782. $test_afpid = db_result(db_query($sql_test, $type_id));
  1783. $e_digit = explode("e-", $best_evalue);
  1784. if (count($e_digit) == 2) {
  1785. $evalue_shown = number_format($e_digit [0],1);
  1786. $best_evalue = $evalue_shown."e-".$e_digit[1];
  1787. }
  1788. db_query($afp_sql, $af_id, $type_id, $best_evalue, $test_afpid);
  1789. //$best_identity;
  1790. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_identity'));
  1791. $test_afpid = db_result(db_query($sql_test, $type_id));
  1792. $percent_identity = number_format($best_identity/$best_len*100, 1);
  1793. db_query($afp_sql, $af_id, $type_id, $percent_identity, $test_afpid);
  1794. //$best_len;
  1795. $type_id = db_result(db_query($sql, 'analysis_blast_besthit_length'));
  1796. $test_afpid = db_result(db_query($sql_test, $type_id));
  1797. db_query($afp_sql, $af_id, $type_id, $best_len, $test_afpid);
  1798. }
  1799. tripal_db_set_active($previous_db);
  1800. break;
  1801. }
  1802. }
  1803. print "100%\n";
  1804. return;
  1805. }