tripal_chado.phylotree.api.inc 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850
  1. <?php
  2. /**
  3. * Validates an $options array for insert or update of a phylotree record.
  4. *
  5. * If validation passes then any values that needed validation lookups
  6. * (such as the dbxref, analysis, leaf_type, etc) will have their approriate
  7. * primary_keys added to the $options array, and missing default values
  8. * will also be added.
  9. *
  10. * @param $val_type
  11. * The type of validation. Can be either 'insert' or 'update'.
  12. * @param $options
  13. * An array of key/value pairs containing any of the valid keys for
  14. * either the tripal_insert_phylotree() or tripal_update_phylotree()
  15. * functions.
  16. * @param $errors
  17. * An empty array where validation error messages will be set. The keys
  18. * of the array will be name of the field from the options array and the
  19. * value is the error message.
  20. * @param $warnings
  21. * An empty array where validation warning messagges will be set. The
  22. * warnings should not stop an insert or an update but should be provided
  23. * to the user as information by a drupal_set_message() if appropriate. The
  24. * keys of the array will be name of the field from the options array and the
  25. * value is the error message.
  26. * @return
  27. * If validation failes then FALSE is returned. Any options that do not pass
  28. * validation checks will be added in the $errors array with the key being
  29. * the option and the value being the error message. If validation
  30. * is successful then TRUE is returned.
  31. *
  32. */
  33. function tripal_validate_phylotree($val_type, &$options, &$errors, &$warnings) {
  34. if ($val_type != 'insert' and $val_type != 'update') {
  35. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, "The $val_type argument must be either 'update or 'insert'.");
  36. }
  37. // Set Defaults.
  38. if ($val_type == 'insert') {
  39. // Match by feature name.
  40. if (!array_key_exists('match', $options)) {
  41. $options['match'] = 'name';
  42. }
  43. // The regular expression is to match the entire node name.
  44. if (!array_key_exists('name_re', $options)) {
  45. $options['name_re'] = '^(.*)$';
  46. }
  47. // A dbxref is not required by Tripal but is required by the database
  48. // field in the phylotree table. Therefore, if the dbxref is not provided
  49. // we can set this to be the null database and null dbxref which
  50. // is represented as 'null:local:null'
  51. if (!array_key_exists('dbxref', $options)) {
  52. $options['dbxref'] = "null:local:null";
  53. }
  54. }
  55. // Make sure required values are set.
  56. if ($val_type == 'insert') {
  57. if (!array_key_exists('name', $options)) {
  58. $errors['name'] = t('Please provide the name of the tree.');
  59. return FALSE;
  60. }
  61. if (!array_key_exists('description', $options)) {
  62. $errors['description'] = t('Please provide a description for this tree.');
  63. return FALSE;
  64. }
  65. if (!array_key_exists('tree_file', $options)) {
  66. $errors['tree_file'] = t('Please provide either the full path to the tree_file or a Drupal managed file ID number.');
  67. return FALSE;
  68. }
  69. if (!array_key_exists('format', $options) or !$options['format']) {
  70. $errors['format'] = t('Please provide a file format for the tree file.');
  71. return FALSE;
  72. }
  73. // Make sure the file format is correct
  74. if ($options['format'] != 'newick' and $options['format'] != 'taxonomy') {
  75. $errors['format'] = t('The file format is not supported. Currently only the "newick" file format is supported.');
  76. return FALSE;
  77. }
  78. }
  79. else {
  80. // Does the phylotree ID exist and is it valid
  81. if (!array_key_exists('phylotree_id', $options)) {
  82. $errors['phylotree_id'] = t('Please provide the ID for the tree.');
  83. return FALSE;
  84. }
  85. $exists = chado_select_record('phylotree', array('phylotree_id'),
  86. array('phylotree_id' => $options['phylotree_id']), array('has_record' => 1));
  87. if (!$exists) {
  88. $errors['phylotree_id'] = t('The phylotree_id does not exist.');
  89. return FALSE;
  90. }
  91. }
  92. // Make sure the file exists if one is specified
  93. if (array_key_exists('tree_file', $options) and $options['tree_file']) {
  94. // If this is a numeric Drupal file then all is good, no need to check.
  95. if (!is_numeric($options['tree_file'])) {
  96. if (!file_exists($options['tree_file'])) {
  97. $errors['tree_file'] = t('The file provided does not exists.');
  98. return FALSE;
  99. }
  100. }
  101. // Make sure the file format is correct
  102. if (!array_key_exists('format', $options) or
  103. ($options['format'] != 'newick' and $options['format'] != 'taxonomy')) {
  104. $errors['format'] = t('Please provide a supported file format. Currently only the "newick" file format is supported.');
  105. return FALSE;
  106. }
  107. // If no leaf type is provided then use the polypeptide term.
  108. if (!array_key_exists('leaf_type', $options) or !$options['leaf_type']) {
  109. $options['leaf_type'] = 'polypeptide';
  110. }
  111. }
  112. // Make sure the analysis exists.
  113. $analysis = NULL;
  114. if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
  115. $analysis = chado_select_record('analysis', array('analysis_id'), array('analysis_id' => $options['analysis_id']));
  116. if (!$analysis) {
  117. $errors['analysis_id'] = t('The analysis name provided does not exist.');
  118. return FALSE;
  119. }
  120. $options['analysis_id'] = $analysis[0]->analysis_id;
  121. }
  122. if (array_key_exists('analysis', $options) and $options['analysis']) {
  123. $analysis = chado_select_record('analysis', array('analysis_id'), array('name' => $options['analysis']));
  124. if (!$analysis) {
  125. $errors['analysis'] = t('The analysis ID provided does not exist.');
  126. return FALSE;
  127. }
  128. $options['analysis_id'] = $analysis[0]->analysis_id;
  129. }
  130. // Make sure the leaf type exists.
  131. $type = NULL;
  132. if (array_key_exists('leaf_type', $options) and $options['leaf_type']) {
  133. if ($options['leaf_type'] == 'taxonomy') {
  134. $values = array(
  135. 'cv_id' => array(
  136. 'name' => 'EDAM'
  137. ),
  138. 'name' => 'Species tree'
  139. );
  140. $type = chado_select_record('cvterm', array('cvterm_id'), $values);
  141. }
  142. else {
  143. $values = array(
  144. 'cv_id' => array(
  145. 'name' => 'sequence'
  146. ),
  147. 'name' => $options['leaf_type']
  148. );
  149. $type = chado_select_record('cvterm', array('cvterm_id'), $values);
  150. if (!$type) {
  151. $errors['leaf_type'] = t('The leaf_type provided is not a valid Sequence Ontology term: %term.');
  152. return FALSE;
  153. }
  154. }
  155. $options['type_id'] = $type[0]->cvterm_id;
  156. }
  157. // A Dbxref is required by the phylotree module, but if the
  158. // tree was generated in-house and the site admin doens't want to
  159. // assign a local dbxref then we will set it to the null db
  160. // and the local:null dbxref.
  161. if (array_key_exists('dbxref', $options)) {
  162. if (!$options['dbxref']) {
  163. $options['dbxref'] = 'null:local:null';
  164. }
  165. $matches = array();
  166. preg_match('/^(.*?):(.*)$/', $options['dbxref'], $matches);
  167. $db_name = $matches[1];
  168. $accession = $matches[2];
  169. $values = array(
  170. 'accession' => $accession,
  171. 'db_id' => array(
  172. 'name' => $db_name
  173. ),
  174. );
  175. $dbxref = chado_generate_var('dbxref', $values);
  176. if (!$dbxref) {
  177. $errors['dbxref'] = t('The dbxref provided does not exist in the database: %dbxref.', array('%dbxref' => $dbxref));
  178. return FALSE;
  179. }
  180. $options['dbxref_id'] = $dbxref->dbxref_id;
  181. }
  182. // Make sure the tree name is unique
  183. if (array_key_exists('name', $options) and $options['name']) {
  184. $sql = "
  185. SELECT *
  186. FROM {phylotree} P
  187. WHERE
  188. P.name = :name
  189. ";
  190. $args = array(':name' => $options['name']);
  191. if ($val_type == 'update') {
  192. $sql .= " AND NOT P.phylotree_id = :phylotree_id";
  193. $args[':phylotree_id'] = $options['phylotree_id'];
  194. }
  195. $result = chado_query($sql, $args)->fetchObject();
  196. if ($result) {
  197. $errors['name'] = t("The tree name is in use by another tree. Please provide a different unique name for this tree.");
  198. }
  199. }
  200. return TRUE;
  201. }
  202. /**
  203. * Inserts a phylotree record into Chado.
  204. *
  205. * This function validates the options passed prior to insertion of the record,
  206. * and if validation passes then any values in the options array that needed
  207. * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
  208. * their approriate primary key values added to the options array.
  209. *
  210. * @param $options
  211. * An array of key value pairs with the following keys required:
  212. * 'name': The name of the tree. This will be displayed to users.
  213. * 'description: A description about the tree
  214. * 'anlaysis_id: The ID of the analysis to which this phylotree should be
  215. * associated.
  216. * 'analysis': If the analysis_id key is not used then the analysis name
  217. * may be provided to identify the analysis to which the tree
  218. * should be associated.
  219. * 'leaf_type': A sequence ontology term or the word 'organism'. If the
  220. * type is 'organism' then this tree represents a
  221. * taxonomic tree. The default, if not specified, is the
  222. * term 'polypeptide'.
  223. * 'tree_file': The path of the file containing the phylogenetic tree to
  224. * import or a Drupal managed_file numeric ID.
  225. * 'format': The file format. Currently only 'newick is supported'.
  226. * Optional keys:
  227. * 'dbxref': A database cross-reference of the form DB:ACCESSION.
  228. * Where DB is the database name, which is already present
  229. * in Chado, and ACCESSION is the unique identifier for
  230. * this tree in the remote database.
  231. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
  232. * this field can be a regular expression to pull out
  233. * the name of the feature from the node label in the
  234. * intput tree. If no value is provided the entire label is
  235. * used.
  236. * 'match': Set to 'uniquename' if the leaf nodes should be matched
  237. * with the feature uniquename.
  238. * 'load_now': If set, the tree will be loaded immediately if a tree_file
  239. * is provided. Otherwise, the tree will be loaded via
  240. * a Tripal jobs call.
  241. * 'no_load': If set the tree file will not be loaded.
  242. * @param $errors
  243. * An empty array where validation error messages will be set. The keys
  244. * of the array will be name of the field from the options array and the
  245. * value is the error message.
  246. * @param $warnings
  247. * An empty array where validation warning messagges will be set. The
  248. * warnings should not stop an insert or an update but should be provided
  249. * to the user as information by a drupal_set_message() if appropriate. The
  250. * keys of the array will be name of the field from the options array and the
  251. * value is the error message.
  252. * @return
  253. * TRUE for success and FALSE for failure.
  254. */
  255. function tripal_insert_phylotree(&$options, &$errors, &$warnings) {
  256. global $user;
  257. $options['name_re'] = trim($options['name_re']);
  258. $options['leaf_type'] = trim($options['leaf_type']);
  259. $options['name'] = trim($options['name']);
  260. $options['format'] = trim($options['format']);
  261. $options['tree_file'] = trim($options['tree_file']);
  262. // Validate the incoming options.
  263. $success = tripal_validate_phylotree('insert', $options, $errors, $warnings);
  264. if (!$success) {
  265. foreach ($errors as $field => $message) {
  266. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
  267. }
  268. return FALSE;
  269. }
  270. // If we're here then all is good, so add the phylotree record.
  271. $values = array(
  272. 'analysis_id' => $options['analysis_id'],
  273. 'name' => $options['name'],
  274. 'dbxref_id' => $options['dbxref_id'],
  275. 'comment' => $options['description'],
  276. 'type_id' => $options['type_id'],
  277. );
  278. $phylotree = chado_insert_record('phylotree', $values);
  279. if (!$phylotree) {
  280. drupal_set_message(t('Unable to add phylotree.'), 'warning');
  281. tripal_report_error('tripal_phylogeny', TRIPAL_WARNING, 'Insert phylotree: Unable to create phylotree where values: %values',
  282. array('%values' => print_r($values, TRUE)));
  283. return FALSE;
  284. }
  285. $phylotree_id = $phylotree['phylotree_id'];
  286. $options['phylotree_id'] = $phylotree_id;
  287. // If the tree_file is numeric then it is a Drupal managed file and
  288. // we want to make the file permanent and associated with the tree.
  289. if (is_numeric($options['tree_file'])) {
  290. $file = NULL;
  291. $file = file_load($options['tree_file']);
  292. $file->status = FILE_STATUS_PERMANENT;
  293. $file = file_save($file);
  294. file_usage_add($file, 'tripal_phylogeny', $options['format'], $phylotree_id);
  295. $real_file_path = drupal_realpath($file->uri);
  296. }
  297. else {
  298. $real_file_path = $options['tree_file'];
  299. }
  300. // If caller has requested to load the file now then do so, otherwise
  301. // submit using a Tripal job.
  302. if (!array_key_exists('no_load', $options) or !$options['no_load']) {
  303. if (array_key_exists('load_now', $options) and $options['load_now']) {
  304. $args = array(
  305. 'phylotree_id' => $phylotree_id,
  306. 'leaf_type' => $options['leaf_type'],
  307. 'match' => $options['match'] ? 'uniquename' : 'name',
  308. 'name_re' => $options['name_re'],
  309. );
  310. tripal_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
  311. }
  312. else {
  313. $args = array(
  314. $real_file_path,
  315. 'newick',
  316. array(
  317. 'phylotree_id' => $phylotree_id,
  318. 'leaf_type' => $options['leaf_type'],
  319. 'match' => $options['match'] ? 'uniquename' : 'name',
  320. 'name_re' => $options['name_re'],
  321. ),
  322. );
  323. if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
  324. 'tripal_phylogeny_import_tree_file', $args, $user->uid)) {
  325. drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
  326. }
  327. }
  328. }
  329. return TRUE;
  330. }
  331. /**
  332. * Updates a phylotree record into Chado.
  333. *
  334. * This function validates the options passed prior to update of the record
  335. * and if validation passes then any values in the options array that needed
  336. * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
  337. * their approriate primary key values added to the options array. A Drupal
  338. * File object will be added to the options array for the tree file if one
  339. * is provided.
  340. *
  341. *
  342. * @param $phylotree_id
  343. * The ID of the phylotree to update.
  344. * @param $options
  345. * An array of key value pairs with the following optional keys:
  346. * 'name': The name of the tree. This will be displayed to users.
  347. * 'description: A description about the tree
  348. * 'anlaysis_id: The ID of the analysis to which this phylotree should be
  349. * associated.
  350. * 'analysis': If the analysis_id key is not used then the analysis name
  351. * may be provided to identify the analysis to which the tree
  352. * should be associated.
  353. * 'leaf_type': A sequence ontology term or the word 'organism'. If the
  354. * type is 'organism' then this tree represents a
  355. * taxonomic tree. The default, if not specified, is the
  356. * term 'polypeptide'.
  357. * 'tree_file': The path of the file containing the phylogenetic tree to
  358. * import or a Drupal managed_file numeric ID.
  359. * 'format': The file format. Currently only 'newick is supported'
  360. * 'dbxref': A database cross-reference of the form DB:ACCESSION.
  361. * Where DB is the database name, which is already present
  362. * in Chado, and ACCESSION is the unique identifier for
  363. * this tree in the remote database.
  364. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
  365. * this field can be a regular expression to pull out
  366. * the name of the feature from the node label in the
  367. * intput tree. If no value is provided the entire label is
  368. * used.
  369. * 'match': Set to 'uniquename' if the leaf nodes should be matched
  370. * with the feature uniquename.
  371. * 'load_now': If set, the tree will be loaded immediately if a tree_file
  372. * is provided. Otherwise, the tree will be loaded via
  373. * a Tripal jobs call.
  374. */
  375. function tripal_update_phylotree($phylotree_id, &$options) {
  376. global $user;
  377. // Validate the incoming options.
  378. $errors = array();
  379. $warnings = array();
  380. $success = tripal_validate_phylotree('update', $options, $errors, $warnings);
  381. if (!$success) {
  382. foreach ($errors as $field => $message) {
  383. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
  384. }
  385. return FALSE;
  386. }
  387. // If we're here then all is good, so update the phylotree record.
  388. $match = array(
  389. 'phylotree_id' => $phylotree_id,
  390. );
  391. if (array_key_exists('name', $options) and $options['name']) {
  392. $values['name'] = $options['name'];
  393. }
  394. if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
  395. $values['analysis_id'] = $options['analysis_id'];
  396. }
  397. if (array_key_exists('dbxref_id', $options) and $options['dbxref_id']) {
  398. $values['dbxref_id'] = $options['dbxref_id'];
  399. }
  400. if (array_key_exists('description', $options) and $options['description']) {
  401. $values['comment'] = $options['description'];
  402. }
  403. if (array_key_exists('type_id', $options) and $options['type_id']) {
  404. $values['type_id'] = $options['type_id'];
  405. }
  406. $phylotree = chado_update_record('phylotree', $match, $values, array('return_record' => TRUE));
  407. if (!$phylotree) {
  408. drupal_set_message(t('Unable to update phylotree.'), 'warning');
  409. tripal_report_error('tripal_phylogeny', TRIPAL_WARNING,
  410. 'Update phylotree: Unable to update phylotree where values: %values',
  411. array('%values' => print_r($values, TRUE))
  412. );
  413. }
  414. // If we have a tree file, then import the tree
  415. if (array_key_exists('tree_file', $options) and $options['tree_file']) {
  416. // Remove any existing nodes
  417. chado_delete_record('phylonode', array('phylotree_id' => $options['phylotree_id']));
  418. // Make sure if we already have a file that we remove the old one.
  419. $sql = "
  420. SELECT FM.fid
  421. FROM {file_managed} FM
  422. INNER JOIN {file_usage} FU on FM.fid = FU.fid
  423. WHERE FU.id = :id and FU.module = 'tripal_phylogeny'
  424. ";
  425. $fid = db_query($sql, array(':id' => $options['phylotree_id']))->fetchField();
  426. if ($fid) {
  427. $file = file_load($fid);
  428. file_delete($file, TRUE);
  429. }
  430. // If the tree_file is numeric then it is a Drupal managed file and
  431. // we want to make the file permanent and associated with the tree.
  432. if (is_numeric($options['tree_file'])) {
  433. $file = file_load($options['tree_file']);
  434. $file->status = FILE_STATUS_PERMANENT;
  435. $file = file_save($file);
  436. file_usage_add($file, 'tripal_phylogeny', 'newick', $options['phylotree_id']);
  437. // Add a job to parse the new node tree.
  438. $real_file_path = drupal_realpath($file->uri);
  439. }
  440. else {
  441. $real_file_path = $options['tree_file'];
  442. }
  443. // If caller has requested to load the file now then do so, otherwise
  444. // submit using a Tripal job.
  445. if (array_key_exists('load_now', $options) and $options['load_now']) {
  446. $args = array(
  447. 'phylotree_id' => $options['phylotree_id'],
  448. 'leaf_type' => $options['leaf_type'],
  449. 'match' => $options['match'] ? 'uniquename' : 'name',
  450. 'name_re' => $options['name_re'],
  451. );
  452. tripal_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
  453. }
  454. else {
  455. $args = array(
  456. $real_file_path,
  457. 'newick',
  458. array(
  459. 'phylotree_id' => $options['phylotree_id'],
  460. 'leaf_type' => $options['leaf_type'],
  461. 'match' => $options['match'] ? 'uniquename' : 'name',
  462. 'name_re' => $options['name_re'],
  463. ),
  464. );
  465. if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
  466. 'tripal_phylogeny_import_tree_file', $args, $user->uid)) {
  467. drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
  468. }
  469. }
  470. }
  471. return TRUE;
  472. }
  473. /**
  474. * Deletes a phylotree record from Chado.
  475. *
  476. * @param $phylotree_id
  477. *
  478. * @return
  479. * TRUE on success, FALSE on failure.
  480. */
  481. function tripal_delete_phylotree($phylotree_id) {
  482. // if we don't have a phylotree id for this node then this isn't a node of
  483. // type chado_phylotree or the entry in the chado_phylotree table was lost.
  484. if (!$phylotree_id) {
  485. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  486. 'Please provide a phylotree_id to delete a tree.');
  487. return FALSE;
  488. }
  489. // Remove the tree
  490. $values = array('phylotree_id' => $phylotree_id);
  491. return chado_delete_record('phylotree', $values);
  492. }
  493. /**
  494. * Iterates through the tree and sets the left and right indicies .
  495. *
  496. * @param $tree
  497. * The tree array.
  498. * @param $index
  499. * This parameters is not used when the function is first called. It
  500. * is used for recursive calls.
  501. */
  502. function tripal_assign_phylogeny_tree_indices(&$tree, &$index = 1) {
  503. // Assign a left and right index to each node. The child node must
  504. // have a right and left index less than that of it's parents. We
  505. // increment the index by 100 to give space for new nodes that might
  506. // be added later.
  507. if (array_key_exists('name', $tree)) {
  508. $tree['left_index'] = $index += 100;
  509. if (array_key_exists('is_leaf', $tree)) {
  510. $tree['right_index'] = $index += 100;
  511. }
  512. }
  513. if (array_key_exists('branch_set', $tree)) {
  514. foreach ($tree['branch_set'] as $key => $node) {
  515. tripal_assign_phylogeny_tree_indices($tree['branch_set'][$key], $index);
  516. $tree['right_index'] = $index += 100;
  517. }
  518. }
  519. }
  520. /**
  521. * Iterates through the tree array and creates phylonodes in Chado.
  522. *
  523. * The function iterates through the tree in a top-down approach adding
  524. * parent internal nodes prior to leaf nodes. Each node of the tree should have
  525. * the following fields:
  526. *
  527. * -name: The name (or label) for this node.
  528. * -depth: The depth of the node in the tree.
  529. * -is_root: Set to 1 if this node is a root node.
  530. * -is_leaf: Set to 1 if this node is a leaf node.
  531. * -is_internal: Set to 1 if this node is an internal node.
  532. * -left_index: The index of the node to the left in the tree.
  533. * -right_index: The index of the node to the right in the tree.
  534. * -branch_set: An array containing a list of nodes of that are children
  535. * of the node.
  536. * -parent: The name of the parent node.
  537. * -organism_id: The organism_id for associtating the node with an organism.
  538. * -properties: An array of key/value pairs where the key is the cvterm_id
  539. * and the value is the property value. These properties
  540. * will be assocaited with the phylonode.
  541. *
  542. * Prior to importing the tree the indicies can be set by using the
  543. * tripal_assign_phylogeny_tree_indices() function.
  544. *
  545. * @param $tree
  546. * The tree array.
  547. * @param $phylotree.
  548. * The phylotree object (from Chado).
  549. * @param $options
  550. * The options provide some direction for how the tree is imported. The
  551. * following keys can be used:
  552. * -taxonomy: Set to 1 if this tree is a taxonomic tree. Set to 0
  553. * otherwise.
  554. * -leaf_type: Set to the leaf type name. If this is a non-taxonomic tree
  555. * that is associated with features, then this should be the
  556. * Sequence Ontology term for the feature (e.g. polypeptide).
  557. * If this is a taxonomic tree then this option is not needed.
  558. * -match: Set to either 'name' or 'uniquename'. This is used for
  559. * matching the feature name or uniquename with the node name.
  560. * This is not needed for taxonomic trees.
  561. * -match_re: Set to a regular that can be used for matching the node
  562. * name with the feature name if the node name is not
  563. * identical to the feature name.
  564. * @param $vocab
  565. * Optional. An array containing a set of key/value pairs that maps node
  566. * types to CV terms. The keys must be 'root', 'internal' or 'leaf'. If
  567. * no vocab is provded then the terms provided by the tripal_phylogeny
  568. * CV will be used.
  569. * @param $parent
  570. * This argument is not needed when the funtion is first called. This
  571. * function is recursive and this argument is used on recursive calls.
  572. */
  573. function tripal_phylogeny_import_tree(&$tree, $phylotree, $options, $vocab = array(), $parent = NULL) {
  574. // Get the vocabulary terms used to describe nodes in the tree if one
  575. // wasn't provided.
  576. if (count($vocab) == 0) {
  577. $vocab = tripal_phylogeny_get_node_types_vocab();
  578. }
  579. if (is_array($tree) and array_key_exists('name', $tree)) {
  580. $values = array(
  581. 'phylotree_id' => $phylotree->phylotree_id,
  582. 'left_idx' => $tree['left_index'],
  583. 'right_idx' => $tree['right_index'],
  584. );
  585. // Add in any optional values to the $values array if they are present
  586. if (!empty($tree['name']) and $tree['name'] != '') {
  587. $values['label'] = $tree['name'];
  588. }
  589. if (!empty($tree['length']) and $tree['length'] != '') {
  590. $values['distance'] = $tree['length'];
  591. }
  592. // Set the type of node
  593. if ($tree['is_root']) {
  594. $values['type_id'] = $vocab['root']->cvterm_id;
  595. }
  596. else if ($tree['is_internal']) {
  597. $values['type_id'] = $vocab['internal']->cvterm_id;
  598. $values['parent_phylonode_id'] = $parent['phylonode_id'];
  599. // TOOD: a feature may be associated here but it is recommended that it
  600. // be a feature of type SO:match and should represent the alignment of
  601. // all features beneath it.
  602. }
  603. else if ($tree['is_leaf']) {
  604. $values['type_id'] = $vocab['leaf']->cvterm_id;
  605. $values['parent_phylonode_id'] = $parent['phylonode_id'];
  606. // Match this leaf node with an organism or feature depending on the
  607. // type of tree. But we can't do that if we don't have a name.
  608. if (!empty($tree['name']) and $tree['name'] != '') {
  609. if (!$options['taxonomy']) {
  610. // This is a sequence-based tree. Try to match leaf nodes with features.
  611. // First, Get the Name and uniquename for the feature
  612. $matches = array();
  613. $sel_values = array();
  614. if ($options['match'] == "name") {
  615. $sel_values['name'] = $tree['name'];
  616. $re = $options['name_re'];
  617. if (preg_match("/$re/", $tree['name'], $matches)) {
  618. $sel_values['name'] = $matches[1];
  619. }
  620. }
  621. else {
  622. $sel_values['uniquename'] = $tree['name'];
  623. $re = $options['name_re'];
  624. if (preg_match("/$re/", $tree['name'], $matches)) {
  625. $sel_values['uniquename'] = $matches[1];
  626. }
  627. }
  628. $sel_values['type_id'] = array(
  629. 'name' => $options['leaf_type'],
  630. 'cv_id' => array(
  631. 'name' => 'sequence'
  632. ),
  633. );
  634. $sel_columns = array('feature_id');
  635. $feature = chado_select_record('feature', $sel_columns, $sel_values);
  636. if (count($feature) > 1) {
  637. // Found multiple features, cannot make an association.
  638. }
  639. else if (count($feature) == 1) {
  640. $values['feature_id'] = $feature[0]->feature_id;
  641. }
  642. else {
  643. // Could not find a feature that matches the name or uniquename
  644. }
  645. }
  646. }
  647. }
  648. // Insert the new node and then add it's assigned phylonode_id to the node
  649. $phylonode = chado_insert_record('phylonode', $values);
  650. $tree['phylonode_id'] = $phylonode['phylonode_id'];
  651. // This is a taxonomic tree, so assocaite this node with an
  652. // organism if one is provided.
  653. if (array_key_exists('organism_id', $tree)) {
  654. $values = array(
  655. 'phylonode_id' => $tree['phylonode_id'],
  656. 'organism_id' => $tree['organism_id']
  657. );
  658. $pylonode_organism = chado_insert_record('phylonode_organism', $values);
  659. }
  660. // Associate any properties
  661. if (array_key_exists('properties', $tree)) {
  662. foreach ($tree['properties'] as $type_id => $value) {
  663. $values = array(
  664. 'phylonode_id' => $tree['phylonode_id'],
  665. 'type_id' => $type_id,
  666. 'value' => $value,
  667. );
  668. $pylonode_organism = chado_insert_record('phylonodeprop', $values);
  669. }
  670. }
  671. }
  672. if (is_array($tree) and array_key_exists('branch_set', $tree)) {
  673. foreach ($tree['branch_set'] as $key => $node) {
  674. tripal_phylogeny_import_tree($tree['branch_set'][$key], $phylotree, $options, $vocab, $tree);
  675. }
  676. }
  677. }
  678. /**
  679. *
  680. * @return boolean|multitype:Either
  681. */
  682. function tripal_phylogeny_get_node_types_vocab() {
  683. // Get the vocabulary terms used to describe nodes in the tree
  684. $values = array(
  685. 'name' => 'phylo_leaf',
  686. 'cv_id' => array(
  687. 'name' => 'tripal_phylogeny',
  688. ),
  689. );
  690. $leaf = chado_generate_var('cvterm', $values);
  691. if (!$leaf) {
  692. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  693. "Could not find the leaf vocabulary term: 'phylo_leaf'. It should " .
  694. "already be present as part of the tripal_phylogeny vocabulary.");
  695. return FALSE;
  696. }
  697. $values['name'] = 'phylo_interior';
  698. $internal = chado_generate_var('cvterm', $values);
  699. if (!$internal) {
  700. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  701. "Could not find the leaf vocabulary term: 'phylo_interior'. It should " .
  702. "already be present as part of the tripal_phylogeny vocabulary.");
  703. return FALSE;
  704. }
  705. $values['name'] = 'phylo_root';
  706. $root = chado_generate_var('cvterm', $values);
  707. if (!$root) {
  708. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  709. "Could not find the leaf vocabulary term: 'phylo_root'. It should " .
  710. "already be present as part of the tripal_phylogeny vocabulary.");
  711. return FALSE;
  712. }
  713. $vocab = array(
  714. 'leaf' => $leaf,
  715. 'internal' => $internal,
  716. 'root' => $root,
  717. );
  718. return $vocab;
  719. }
  720. /**
  721. * Imports a tree file.
  722. *
  723. * This function is used as a wrapper for loading a phylogenetic tree using
  724. * any number of file loaders.
  725. *
  726. * @param $file_name
  727. * The name of the file containing the phylogenetic tree to import.
  728. * @param $format
  729. * The format of the file. Currently only the 'newick' file format is
  730. * supported.
  731. * @param $options
  732. * Options if the phylotree record already exists:
  733. * 'phylotree_id': The imported nodes will be associated with this tree.
  734. * 'leaf_type': A sequence ontology term or the word 'organism'. If the
  735. * type is 'organism' then this tree represents a
  736. * taxonomic tree. The default, if not specified, is the
  737. * term 'polypeptide'.
  738. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
  739. * this field can be a regular expression to pull out
  740. * the name of the feature from the node label in the
  741. * intput tree. If no value is provided the entire label is
  742. * used.
  743. * 'match': Set to 'uniquename' if the leaf nodes should be matched
  744. * with the feature uniquename.
  745. *
  746. */
  747. function tripal_phylogeny_import_tree_file($file_name, $format, $options = array(), $job_id = NULL) {
  748. // Set some option details.
  749. if (!array_key_exists('leaf_type', $options)) {
  750. $options['leaf_type'] = 'polypeptide';
  751. }
  752. if (!array_key_exists('match', $options)) {
  753. $options['match'] = 'name';
  754. }
  755. if (!array_key_exists('name_re', $options)) {
  756. $options['name_re'] = '^(.*)$';
  757. }
  758. $options['name_re'] = trim($options['name_re']);
  759. // If a phylotree ID is not passed in then make sure we have the other
  760. // required fields for creating a tree.
  761. if (!array_key_exists('phylotree_id', $options)) {
  762. if (!array_key_exists('name', $options)) {
  763. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  764. 'The phylotree_id is required for importing the tree.');
  765. return FALSE;
  766. }
  767. }
  768. // get the phylotree record.
  769. $values = array('phylotree_id' => $options['phylotree_id']);
  770. $phylotree = chado_generate_var('phylotree', $values);
  771. if (!$phylotree) {
  772. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  773. 'Could not find the phylotree using the ID provided: %phylotree_id.',
  774. array('%phylotree_id' => $options['phylotree_id']));
  775. return FALSE;
  776. }
  777. $transaction = db_transaction();
  778. print "\nNOTE: Loading of this tree file is performed using a database transaction. \n" .
  779. "If the load fails or is terminated prematurely then the entire set of \n" .
  780. "insertions/updates is rolled back and will not be found in the database\n\n";
  781. try {
  782. // Parse the file according to the format indicated.
  783. if ($format == 'newick') {
  784. // Parse the tree into the expected nested node format.
  785. module_load_include('inc', 'tripal_phylogeny', 'includes/parsers/tripal_phylogeny.newick_parser');
  786. $tree = tripal_phylogeny_parse_newick_file($file_name);
  787. // Assign the right and left indecies to the tree ndoes
  788. tripal_assign_phylogeny_tree_indices($tree);
  789. }
  790. // Iterate through the tree nodes and add them to Chado in accordance
  791. // with the details in the $options array.
  792. tripal_phylogeny_import_tree($tree, $phylotree, $options);
  793. }
  794. catch (Exception $e) {
  795. $transaction->rollback();
  796. watchdog_exception('tripal_phylogeny', $e);
  797. }
  798. }