tripal_phylogeny.taxonomy.inc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. <?php
  2. /**
  3. * Generates a page that contains the taxonomy view.
  4. */
  5. function tripal_phylogeny_taxonomy_view() {
  6. $values = array(
  7. 'type_id' => array(
  8. 'name' => 'taxonomy',
  9. ),
  10. );
  11. $phylotree = chado_generate_var('phylotree', $values);
  12. $node = new stdClass();
  13. $node->phylotree = $phylotree;
  14. return theme('tripal_phylogeny_taxonomic_tree', array('node' => $node));
  15. }
  16. /**
  17. *
  18. */
  19. function tripal_phylogeny_taxonomy_load_form($form, &$form_state) {
  20. $form['instructions'] = array(
  21. '#type' => 'item',
  22. '#markup' => '',
  23. );
  24. $form['import_existing'] = array(
  25. '#type' => 'checkbox',
  26. '#title' => 'Import taxonomy for existing species.',
  27. '#description' => t('The NCBI Taxonmic Importer examines the organisms
  28. currently present in the database and queries NCBI for the
  29. taxonomic details. If the importer is able to match the
  30. genus and species with NCBI the species details will be imported,
  31. and a page containing the taxonomic tree will be created.'),
  32. );
  33. $form['submit'] = array(
  34. '#type' => 'submit',
  35. '#name' => 'import',
  36. '#value' => 'Submit',
  37. );
  38. return $form;
  39. }
  40. /**
  41. *
  42. * @param unknown $form
  43. * @param unknown $form_state
  44. */
  45. function tripal_phylogeny_taxonomy_load_form_validate($form, &$form_state) {
  46. global $user;
  47. if (!$form_state['values']['import_existing']) {
  48. form_set_error('import_exists', 'Please confirm the import by clicking the checkbox.');
  49. }
  50. }
  51. /**
  52. *
  53. * @param unknown $form
  54. * @param unknown $form_state
  55. */
  56. function tripal_phylogeny_taxonomy_load_form_submit($form, &$form_state) {
  57. global $user;
  58. if ($form_state['values']['import_existing']) {
  59. $args = array();
  60. tripal_add_job("Import NCBI Taxonomy", 'tripal_phylogeny',
  61. 'tripal_phylogeny_ncbi_taxonomy_import', $args, $user->uid);
  62. }
  63. }
  64. /**
  65. *
  66. * @param unknown $job_id
  67. */
  68. function tripal_phylogeny_ncbi_taxonomy_import($job_id) {
  69. print "\nNOTE: Importing of NCBI taxonomy data is performed using a database transaction. \n" .
  70. "If the load fails or is terminated prematurely then the entire set of \n" .
  71. "insertions/updates is rolled back and will not be found in the database\n\n";
  72. $transaction = db_transaction();
  73. try {
  74. // TDDO: there should be an API function named tripal_insert_analysis().
  75. // But until then we have to insert the analysis manually.
  76. // Get the version of this module for the analysis record:
  77. $info = system_get_info('module', 'tripal_phylogeny');
  78. $version = $info['version'];
  79. $analysis_name = 'NCBI Taxonomy Tree Import';
  80. // If the analysis record already exists then don't add it again.
  81. $analysis = chado_select_record('analysis', array('*'), array('name' => $analysis_name));
  82. if (count($analysis) == 0) {
  83. $values = array(
  84. 'name' => 'NCBI Taxonomy Tree Import',
  85. 'description' => 'Used to import NCBI taxonomy details for organisms in this database.',
  86. 'program' => 'Tripal Phylogeny Module NCBI Taxonomy Importer',
  87. 'programversion' => $version,
  88. 'sourcename' => 'NCBI Taxonomy',
  89. 'sourceuri' => 'http://www.ncbi.nlm.nih.gov/taxonomy',
  90. );
  91. $analysis = chado_insert_record('analysis', $values);
  92. if (!$analysis) {
  93. throw new Exception("Cannot add NCBI Taxonomy Tree Import Analysis.");
  94. }
  95. }
  96. else {
  97. $analysis = $analysis[0];
  98. }
  99. // If the tree already exists then don't insert it again.
  100. global $site_name;
  101. $tree_name = $site_name . 'Taxonomy Tree';
  102. $phylotree = chado_select_record('phylotree', array('*'), array('name' => $tree_name));
  103. if (count($phylotree) == 0) {
  104. // Add the taxonomic tree.
  105. $options = array(
  106. 'name' => $site_name . 'Taxonomy Tree',
  107. 'description' => 'The taxonomic tree of species present on this site.',
  108. 'leaf_type' => 'taxonomy',
  109. 'analysis_id' => $analysis->analysis_id,
  110. 'tree_file' => '/dev/null',
  111. 'format' => 'taxonomy',
  112. 'no_load' => TRUE,
  113. );
  114. $errors = array();
  115. $warnings = array();
  116. $success = tripal_insert_phylotree($options, $errors, $warnings);
  117. if (!$success) {
  118. throw new Exception("Cannot add the Taxonomy Tree record.");
  119. }
  120. $phylotree = (object) $options;
  121. }
  122. else {
  123. $phylotree = $phylotree[0];
  124. }
  125. // Clean out the phylotree in the event this is a reload
  126. chado_delete_record('phylonode', array('phylotree_id' => $phylotree->phylotree_id));
  127. // The taxonomic tree must have a root, so create that first.
  128. $tree = array(
  129. 'name' => 'root',
  130. 'depth' => 0,
  131. 'is_root' => 1,
  132. 'is_leaf' => 0,
  133. 'is_internal' => 0,
  134. 'left_index' => 0,
  135. 'right_index' => 0,
  136. 'branch_set' => array(),
  137. );
  138. // Get the "rank" cvterm. It requires that the TAXRANK vocabulary is loaded.
  139. $rank_cvterm = tripal_get_cvterm(array(
  140. 'name' => 'rank',
  141. 'cv_id' => array('name' => 'tripal_phylogeny')
  142. ));
  143. // Get the list of organisms
  144. $sql = "SELECT O.* FROM {organism} O";
  145. $organisms = chado_query($sql);
  146. while ($organism = $organisms->fetchObject()) {
  147. // Build the query string to get the information about this species.
  148. $term = $organism->genus . ' ' . $organism->species;
  149. $term = urlencode($term);
  150. $search_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?".
  151. "db=taxonomy" .
  152. "&term=$term";
  153. // Get the search response from NCBI.
  154. $rfh = fopen($search_url, "r");
  155. $xml_text = '';
  156. while (!feof($rfh)) {
  157. $xml_text .= fread($rfh, 255);
  158. }
  159. fclose($rfh);
  160. // Parse the XML to get the taxonomy ID
  161. $xml = new SimpleXMLElement($xml_text);
  162. if ($xml) {
  163. $taxid = (string) $xml->IdList->Id;
  164. if ($taxid) {
  165. print "$taxid\t$organism->genus $organism->species\n";
  166. // If we have a taxonomy ID we can now get the details.
  167. $fetch_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?".
  168. "db=taxonomy" .
  169. "&id=$taxid";
  170. // Get the search response from NCBI.
  171. $rfh = fopen($fetch_url, "r");
  172. $xml_text = '';
  173. while (!feof($rfh)) {
  174. $xml_text .= fread($rfh, 255);
  175. }
  176. fclose($rfh);
  177. $xml = new SimpleXMLElement($xml_text);
  178. if ($xml) {
  179. $taxon = $xml->Taxon;
  180. // Add in the organism properties
  181. $lineage = (string) $taxon->Lineage;
  182. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'lineage', $lineage);
  183. $genetic_code = (string) $taxon->GeneticCode->GCId;
  184. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genetic_code', $genetic_code);
  185. $genetic_code_name = (string) $taxon->GeneticCode->GCName;
  186. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genetic_code_name', $genetic_code_name);
  187. $mito_genetic_code = (string) $taxon->MitoGeneticCode->MGCId;
  188. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'mitochondrial_genetic_code', $mito_genetic_code);
  189. $mito_genetic_code_name = (string) $taxon->MitoGeneticCode->MGCName;
  190. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'mitochondrial_genetic_code_name', $mito_genetic_code_name);
  191. $division = (string) $taxon->Division;
  192. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'division', $division);
  193. $name_ranks = array();
  194. foreach ($taxon->OtherNames->children() as $child) {
  195. $type = $child->getName();
  196. $name = (string) $child;
  197. if (!array_key_exists($type, $name_ranks)) {
  198. $name_ranks[$type] = 0;
  199. }
  200. switch ($type) {
  201. case 'GenbankCommonName':
  202. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genbank_common_name', $name, $name_ranks[$type]);
  203. break;
  204. case 'Synonym':
  205. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'synonym', $name, $name_ranks[$type]);
  206. break;
  207. case 'CommonName':
  208. case 'Includes':
  209. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'other_name', $name, $name_ranks[$type]);
  210. break;
  211. case 'EquivalentName':
  212. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'equivalent_name', $name, $name_ranks[$type]);
  213. break;
  214. case 'Anamorph':
  215. tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'anamorph', $name, $name_ranks[$type]);
  216. break;
  217. case 'Name':
  218. // skip the Name stanza
  219. break;
  220. default:
  221. print "NOTICE: Skipping unrecognzed name type: $type\n";
  222. // do nothing for unrecognized types
  223. }
  224. $name_ranks[$type]++;
  225. }
  226. // Generate a nested array structure that can be used for importing the tree.
  227. $parent = (string) $taxon->ParentTaxId;
  228. $rank = (string) $taxon->Rank;
  229. $sci_name = (string) $taxon->ScientificName;
  230. $lineage_depth = preg_split('/;\s*/', $lineage);
  231. $parent = $tree;
  232. $i = 1;
  233. foreach ($taxon->LineageEx->children() as $child) {
  234. $tid = (string) $child->TaxID;
  235. $name = (string) $child->ScientificName;
  236. $node_rank = (string) $child->Rank;
  237. $node = array(
  238. 'name' => $name,
  239. 'depth' => $i,
  240. 'is_root' => 0,
  241. 'is_leaf' => 0,
  242. 'is_internal' => 1,
  243. 'left_index' => 0,
  244. 'right_index' => 0,
  245. 'parent' => $parent,
  246. 'branch_set' => array(),
  247. 'parent' => $parent['name'],
  248. 'properties' => array(
  249. $rank_cvterm->cvterm_id => $node_rank,
  250. ),
  251. );
  252. $parent = $node;
  253. tripal_phylogeny_taxonomy_import_add_node($tree, $node, $lineage_depth);
  254. $i++;
  255. }
  256. // Now add in the leaf node
  257. $node = array(
  258. 'name' => $sci_name,
  259. 'depth' => $i,
  260. 'is_root' => 0,
  261. 'is_leaf' => 1,
  262. 'is_internal' => 0,
  263. 'left_index' => 0,
  264. 'right_index' => 0,
  265. 'parent' => $parent['name'],
  266. 'organism_id' => $organism->organism_id,
  267. 'properties' => array(
  268. $rank_cvterm->cvterm_id => $rank,
  269. ),
  270. );
  271. tripal_phylogeny_taxonomy_import_add_node($tree, $node, $lineage_depth);
  272. // Set the indecies for the tree.
  273. tripal_phylogeny_assign_tree_indices($tree);
  274. } // end: if ($xml) { ...
  275. } // end: if ($taxid) { ...
  276. } // end: if ($xml) { ...
  277. } // end: while ($organism = $organisms->fetchObject()) { ...
  278. // print json_encode(($tree));
  279. // Now add the tree
  280. $options = array('taxonomy' => 1);
  281. tripal_phylogeny_import_tree($tree, $phylotree, $options);
  282. // If ther user requested to sync the tree then do it.
  283. //if ($sync) {
  284. chado_node_sync_records('phylotree', FALSE, FALSE,
  285. array(), $ids = array($phylotree->phylotree_id));
  286. //}
  287. }
  288. catch (Exception $e) {
  289. $transaction->rollback();
  290. print "\n"; // make sure we start errors on new line
  291. watchdog_exception('tripal_phylogeny', $e);
  292. print "FAILED: Rolling back database changes...\n";
  293. }
  294. }
  295. /**
  296. *
  297. * @param unknown $node
  298. */
  299. function tripal_phylogeny_taxonomy_import_add_node(&$tree, $node, $lineage_depth) {
  300. // Get the branch set for the tree root.
  301. $branch_set = &$tree['branch_set'];
  302. // Iterate through the tree up until the depth where this node will
  303. // be placed.
  304. $node_depth = $node['depth'];
  305. for ($i = 1; $i <= $node_depth; $i++) {
  306. // Iterate through any existing nodes in the branch set to see if
  307. // the node name matches the correct name for the lineage at this
  308. // depth. If it matches then it is inside of this branch set that
  309. // we will place the node.
  310. for ($j = 0; $j < count($branch_set); $j++) {
  311. // If this node already exists in the tree then return.
  312. if ($branch_set[$j]['name'] == $node['name'] and
  313. $branch_set[$j]['depth'] = $node['depth']) {
  314. return;
  315. }
  316. // Otherwise, set the branch to be the current branch and continue.
  317. if ($branch_set[$j]['name'] == $lineage_depth[$i-1]) {
  318. $branch_set = &$branch_set[$j]['branch_set'];
  319. break;
  320. }
  321. }
  322. }
  323. // Add the node to the last branch set. This should be where this node goes.
  324. $branch_set[] = $node;
  325. }
  326. /**
  327. *
  328. * @param unknown $organism_id
  329. * @param unknown $term_name
  330. * @param unknown $value
  331. */
  332. function tripal_phylogeny_taxonomy_add_organism_property($organism_id, $term_name, $value, $rank = 0) {
  333. if (!$value) {
  334. return;
  335. }
  336. $record = array(
  337. 'table' => 'organism',
  338. 'id' => $organism_id
  339. );
  340. $property = array(
  341. 'type_name' => $term_name,
  342. 'cv_name' => organism_property,
  343. 'value' => $value
  344. );
  345. // Delete all properties of this type if the rank is zero.
  346. if ($rank == 0) {
  347. chado_delete_property($record, $property);
  348. }
  349. chado_insert_property($record, $property);
  350. }