tripal_chado.phylotree.api.inc 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910
  1. <?php
  2. /**
  3. * @file
  4. * Provides API functions specifically for managing phylogenetic and taxonomic
  5. * tree records in Chado.
  6. */
  7. /**
  8. * @defgroup tripal_phylotree_api Chado Phylotree
  9. * @ingroup tripal_chado_api
  10. * @{
  11. * Provides API functions specifically for managing phylogenetic and taxonomic
  12. * tree records in Chado. The API consists of functions for creation,
  13. * retrieval, update and deltion (CRUD) for phylogenetic tree records as
  14. * well as importing of trees in the newick file format.
  15. * @}
  16. */
  17. /**
  18. * Validates an $options array for insert or update of a phylotree record.
  19. *
  20. * If validation passes then any values that needed validation lookups
  21. * (such as the dbxref, analysis, leaf_type, etc) will have their approriate
  22. * primary_keys added to the $options array, and missing default values
  23. * will also be added.
  24. *
  25. * @param $val_type
  26. * The type of validation. Can be either 'insert' or 'update'.
  27. * @param $options
  28. * An array of key/value pairs containing any of the valid keys for
  29. * either the chado_insert_phylotree() or chado_update_phylotree()
  30. * functions.
  31. * @param $errors
  32. * An empty array where validation error messages will be set. The keys
  33. * of the array will be name of the field from the options array and the
  34. * value is the error message.
  35. * @param $warnings
  36. * An empty array where validation warning messagges will be set. The
  37. * warnings should not stop an insert or an update but should be provided
  38. * to the user as information by a drupal_set_message() if appropriate. The
  39. * keys of the array will be name of the field from the options array and the
  40. * value is the error message.
  41. *
  42. * @return
  43. * If validation fails then FALSE is returned. Any options that do not pass
  44. * validation checks will be added in the $errors array with the key being
  45. * the option and the value being the error message. If validation
  46. * is successful then TRUE is returned.
  47. *
  48. * @ingroup tripal_phylotree_api
  49. */
  50. function chado_validate_phylotree($val_type, &$options, &$errors, &$warnings) {
  51. if ($val_type != 'insert' and $val_type != 'update') {
  52. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, "The $val_type argument must be either 'update or 'insert'.");
  53. }
  54. // Set Defaults.
  55. if ($val_type == 'insert') {
  56. // Match by feature name.
  57. if (!array_key_exists('match', $options)) {
  58. $options['match'] = 'name';
  59. }
  60. // The regular expression is to match the entire node name.
  61. if (!array_key_exists('name_re', $options)) {
  62. $options['name_re'] = '^(.*)$';
  63. }
  64. // A dbxref is not required by Tripal but is required by the database
  65. // field in the phylotree table. Therefore, if the dbxref is not provided
  66. // we can set this to be the null database and null dbxref which
  67. // is represented as 'null:local:null'
  68. if (!array_key_exists('dbxref', $options)) {
  69. $options['dbxref'] = "null:local:null";
  70. }
  71. }
  72. // Make sure required values are set.
  73. if ($val_type == 'insert') {
  74. if (!array_key_exists('name', $options)) {
  75. $errors['name'] = t('Please provide the name of the tree.');
  76. return FALSE;
  77. }
  78. if (!array_key_exists('description', $options)) {
  79. $errors['description'] = t('Please provide a description for this tree.');
  80. return FALSE;
  81. }
  82. if (!array_key_exists('format', $options) or !$options['format']) {
  83. $errors['format'] = t('Please provide a file format for the tree file.');
  84. return FALSE;
  85. }
  86. // Make sure the file format is correct.
  87. if ($options['format'] != 'newick' and $options['format'] != 'taxonomy') {
  88. $errors['format'] = t('The file format is not supported. Currently only the "newick" file format is supported.');
  89. return FALSE;
  90. }
  91. }
  92. else {
  93. // Does the phylotree ID exist and is it valid.
  94. if (!array_key_exists('phylotree_id', $options)) {
  95. $errors['phylotree_id'] = t('Please provide the ID for the tree.');
  96. return FALSE;
  97. }
  98. $exists = chado_select_record('phylotree', ['phylotree_id'],
  99. ['phylotree_id' => $options['phylotree_id']], ['has_record' => 1]);
  100. if (!$exists) {
  101. $errors['phylotree_id'] = t('The phylotree_id does not exist.');
  102. return FALSE;
  103. }
  104. }
  105. // Make sure the file exists if one is specified.
  106. if (array_key_exists('tree_file', $options) and $options['tree_file']) {
  107. // If this is a numeric Drupal file then all is good, no need to check.
  108. if (!is_numeric($options['tree_file'])) {
  109. if (!file_exists($options['tree_file'])) {
  110. $errors['tree_file'] = t('The file provided does not exist.');
  111. return FALSE;
  112. }
  113. }
  114. // Make sure the file format is correct.
  115. if (!array_key_exists('format', $options) or
  116. ($options['format'] != 'newick' and $options['format'] != 'taxonomy')) {
  117. $errors['format'] = t('Please provide a supported file format. Currently only the "newick" file format is supported.');
  118. return FALSE;
  119. }
  120. // If no leaf type is provided then use the polypeptide term.
  121. if (!array_key_exists('leaf_type', $options) or !$options['leaf_type']) {
  122. $options['leaf_type'] = 'polypeptide';
  123. }
  124. }
  125. // Make sure the analysis exists.
  126. $analysis = NULL;
  127. if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
  128. $analysis = chado_select_record('analysis', ['analysis_id'], ['analysis_id' => $options['analysis_id']]);
  129. if (!$analysis) {
  130. $errors['analysis_id'] = t('The analysis name provided does not exist.');
  131. return FALSE;
  132. }
  133. $options['analysis_id'] = $analysis[0]->analysis_id;
  134. }
  135. if (array_key_exists('analysis', $options) and $options['analysis']) {
  136. $analysis = chado_select_record('analysis', ['analysis_id'], ['name' => $options['analysis']]);
  137. if (!$analysis) {
  138. $errors['analysis'] = t('The analysis ID provided does not exist.');
  139. return FALSE;
  140. }
  141. $options['analysis_id'] = $analysis[0]->analysis_id;
  142. }
  143. // Make sure the leaf type exists.
  144. $type = NULL;
  145. if (array_key_exists('leaf_type', $options) and $options['leaf_type']) {
  146. if ($options['leaf_type'] == 'taxonomy') {
  147. $values = [
  148. 'cv_id' => [
  149. 'name' => 'EDAM',
  150. ],
  151. 'name' => 'Species tree',
  152. ];
  153. $type = chado_select_record('cvterm', ['cvterm_id'], $values);
  154. }
  155. else {
  156. $values = [
  157. 'cv_id' => [
  158. 'name' => 'sequence',
  159. ],
  160. 'name' => $options['leaf_type'],
  161. ];
  162. $type = chado_select_record('cvterm', ['cvterm_id'], $values);
  163. if (!$type) {
  164. $errors['leaf_type'] = t('The leaf_type provided is not a valid Sequence Ontology term: %term.');
  165. return FALSE;
  166. }
  167. }
  168. $options['type_id'] = $type[0]->cvterm_id;
  169. }
  170. // A Dbxref is required by the phylotree module, but if the
  171. // tree was generated in-house and the site admin doens't want to
  172. // assign a local dbxref then we will set it to the null db
  173. // and the local:null dbxref.
  174. if (array_key_exists('dbxref', $options)) {
  175. if (!$options['dbxref']) {
  176. $options['dbxref'] = 'null:local:null';
  177. }
  178. $matches = [];
  179. preg_match('/^(.*?):(.*)$/', $options['dbxref'], $matches);
  180. $db_name = $matches[1];
  181. $accession = $matches[2];
  182. $values = [
  183. 'accession' => $accession,
  184. 'db_id' => [
  185. 'name' => $db_name,
  186. ],
  187. ];
  188. $dbxref = chado_generate_var('dbxref', $values);
  189. if (!$dbxref) {
  190. $db = chado_generate_var('db', ['name' => $db_name]);
  191. if (!$db) {
  192. $errors['dbxref'] = t('
  193. dbxref could not be created for db: %dbxref.', ['%dbxref' => $dbxref]);
  194. return FALSE;
  195. }
  196. $dbxref = chado_insert_record('dbxref', $values);
  197. if (!$dbxref) {
  198. $errors['dbxref'] = t('
  199. dbxref could not be created for db: %dbxref.', ['%dbxref' => $dbxref]);
  200. return FALSE;
  201. }
  202. }
  203. if (is_array($dbxref)) {
  204. $options['dbxref_id'] = $dbxref['dbxref_id'];
  205. }
  206. else {
  207. $options['dbxref_id'] = $dbxref->dbxref_id;
  208. }
  209. }
  210. // Make sure the tree name is unique.
  211. if (array_key_exists('name', $options) and $options['name']) {
  212. $sql = "
  213. SELECT *
  214. FROM {phylotree} P
  215. WHERE
  216. P.name = :name
  217. ";
  218. $args = [':name' => $options['name']];
  219. if ($val_type == 'update') {
  220. $sql .= " AND NOT P.phylotree_id = :phylotree_id";
  221. $args[':phylotree_id'] = $options['phylotree_id'];
  222. }
  223. $result = chado_query($sql, $args)->fetchObject();
  224. if ($result) {
  225. $errors['name'] = t("The tree name is in use by another tree. Please provide a different unique name for this tree.");
  226. }
  227. }
  228. return TRUE;
  229. }
  230. /**
  231. * Inserts a phylotree record into Chado.
  232. *
  233. * This function validates the options passed prior to insertion of the record,
  234. * and if validation passes then any values in the options array that needed
  235. * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
  236. * their approriate primary key values added to the options array.
  237. *
  238. * @param $options
  239. * An array of key value pairs with the following keys required:
  240. * 'name': The name of the tree. This will be displayed to users.
  241. * 'description: A description about the tree
  242. * 'anlaysis_id: The ID of the analysis to which this phylotree should be
  243. * associated.
  244. * 'analysis': If the analysis_id key is not used then the analysis name
  245. * may be provided to identify the analysis to which the tree
  246. * should be associated.
  247. * 'leaf_type': A sequence ontology term or the word 'organism'. If the
  248. * type is 'organism' then this tree represents a
  249. * taxonomic tree. The default, if not specified, is the
  250. * term 'polypeptide'.
  251. * 'tree_file': The path of the file containing the phylogenetic tree to
  252. * import or a Drupal managed_file numeric ID.
  253. * 'format': The file format. Currently only 'newick is supported'.
  254. *
  255. * Optional keys:
  256. * 'dbxref': A database cross-reference of the form DB:ACCESSION.
  257. * Where DB is the database name, which is already present
  258. * in Chado, and ACCESSION is the unique identifier for
  259. * this tree in the remote database.
  260. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
  261. * this field can be a regular expression to pull out
  262. * the name of the feature from the node label in the
  263. * intput tree. If no value is provided the entire label is
  264. * used.
  265. * 'match': Set to 'uniquename' if the leaf nodes should be matched
  266. * with the feature uniquename.
  267. * 'load_now': If set, the tree will be loaded immediately if a tree_file
  268. * is provided. Otherwise, the tree will be loaded via
  269. * a Tripal jobs call.
  270. * 'no_load': If set the tree file will not be loaded.
  271. * @param $errors
  272. * An empty array where validation error messages will be set. The keys
  273. * of the array will be name of the field from the options array and the
  274. * value is the error message.
  275. * @param $warnings
  276. * An empty array where validation warning messagges will be set. The
  277. * warnings should not stop an insert or an update but should be provided
  278. * to the user as information by a drupal_set_message() if appropriate. The
  279. * keys of the array will be name of the field from the options array and the
  280. * value is the error message.
  281. *
  282. * @return
  283. * TRUE for success and FALSE for failure.
  284. *
  285. * @ingroup tripal_phylotree_api
  286. */
  287. function chado_insert_phylotree(&$options, &$errors, &$warnings) {
  288. global $user;
  289. $options['name_re'] = $options['name_re'] ? trim($options['name_re']) : NULL;
  290. $options['leaf_type'] = trim($options['leaf_type']);
  291. $options['name'] = trim($options['name']);
  292. $options['format'] = trim($options['format']);
  293. $options['tree_file'] = trim($options['tree_file']);
  294. $options['analysis_id'] = $options['analysis_id'] ? $options['analysis_id'] : NULL;
  295. // Validate the incoming options.
  296. $success = chado_validate_phylotree('insert', $options, $errors, $warnings);
  297. if (!$success) {
  298. foreach ($errors as $field => $message) {
  299. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
  300. }
  301. return FALSE;
  302. }
  303. // If we're here then all is good, so add the phylotree record.
  304. $values = [
  305. 'analysis_id' => $options['analysis_id'],
  306. 'name' => $options['name'],
  307. 'dbxref_id' => $options['dbxref_id'],
  308. 'comment' => $options['description'],
  309. 'type_id' => $options['type_id'],
  310. ];
  311. $phylotree = chado_insert_record('phylotree', $values);
  312. if (!$phylotree) {
  313. drupal_set_message(t('Unable to add phylotree.'), 'warning');
  314. tripal_report_error('tripal_phylogeny', TRIPAL_WARNING, 'Insert phylotree: Unable to create phylotree where values: %values',
  315. ['%values' => print_r($values, TRUE)]);
  316. return FALSE;
  317. }
  318. $phylotree_id = $phylotree['phylotree_id'];
  319. $options['phylotree_id'] = $phylotree_id;
  320. // If the tree_file is numeric then it is a Drupal managed file and
  321. // we want to make the file permanent and associated with the tree.
  322. if (is_numeric($options['tree_file'])) {
  323. $file = NULL;
  324. $file = file_load($options['tree_file']);
  325. $file->status = FILE_STATUS_PERMANENT;
  326. $file = file_save($file);
  327. file_usage_add($file, 'tripal_phylogeny', $options['format'], $phylotree_id);
  328. $real_file_path = drupal_realpath($file->uri);
  329. }
  330. else {
  331. $real_file_path = $options['tree_file'];
  332. }
  333. // If caller has requested to load the file now then do so, otherwise
  334. // submit using a Tripal job.
  335. if (!array_key_exists('no_load', $options) or !$options['no_load']) {
  336. if (array_key_exists('load_now', $options) and $options['load_now']) {
  337. $args = [
  338. 'phylotree_id' => $phylotree_id,
  339. 'leaf_type' => $options['leaf_type'],
  340. 'match' => $options['match'] ? 'uniquename' : 'name',
  341. 'name_re' => $options['name_re'],
  342. ];
  343. chado_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
  344. }
  345. else {
  346. $args = [
  347. $real_file_path,
  348. 'newick',
  349. [
  350. 'phylotree_id' => $phylotree_id,
  351. 'leaf_type' => $options['leaf_type'],
  352. 'match' => $options['match'] ? 'uniquename' : 'name',
  353. 'name_re' => $options['name_re'],
  354. ],
  355. ];
  356. if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
  357. 'chado_phylogeny_import_tree_file', $args, $user->uid)) {
  358. drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
  359. }
  360. }
  361. }
  362. return TRUE;
  363. }
  364. /**
  365. * Updates a phylotree record into Chado.
  366. *
  367. * This function validates the options passed prior to update of the record
  368. * and if validation passes then any values in the options array that needed
  369. * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
  370. * their approriate primary key values added to the options array. A Drupal
  371. * File object will be added to the options array for the tree file if one
  372. * is provided.
  373. *
  374. *
  375. * @param $phylotree_id
  376. * The ID of the phylotree to update.
  377. * @param $options
  378. * An array of key value pairs with the following optional keys:
  379. * 'name': The name of the tree. This will be displayed to users.
  380. * 'description: A description about the tree
  381. * 'anlaysis_id: The ID of the analysis to which this phylotree should be
  382. * associated.
  383. * 'analysis': If the analysis_id key is not used then the analysis name
  384. * may be provided to identify the analysis to which the tree
  385. * should be associated.
  386. * 'leaf_type': A sequence ontology term or the word 'organism'. If the
  387. * type is 'organism' then this tree represents a
  388. * taxonomic tree. The default, if not specified, is the
  389. * term 'polypeptide'.
  390. * 'tree_file': The path of the file containing the phylogenetic tree to
  391. * import or a Drupal managed_file numeric ID.
  392. * 'format': The file format. Currently only 'newick is supported'
  393. * 'dbxref': A database cross-reference of the form DB:ACCESSION.
  394. * Where DB is the database name, which is already present
  395. * in Chado, and ACCESSION is the unique identifier for
  396. * this tree in the remote database.
  397. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
  398. * this field can be a regular expression to pull out
  399. * the name of the feature from the node label in the
  400. * intput tree. If no value is provided the entire label is
  401. * used.
  402. * 'match': Set to 'uniquename' if the leaf nodes should be matched
  403. * with the feature uniquename.
  404. * 'load_now': If set, the tree will be loaded immediately if a tree_file
  405. * is provided. Otherwise, the tree will be loaded via
  406. * a Tripal jobs call.
  407. *
  408. * @ingroup tripal_phylotree_api
  409. */
  410. function chado_update_phylotree($phylotree_id, &$options) {
  411. global $user;
  412. // Validate the incoming options.
  413. $errors = [];
  414. $warnings = [];
  415. $success = chado_validate_phylotree('update', $options, $errors, $warnings);
  416. if (!$success) {
  417. foreach ($errors as $field => $message) {
  418. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
  419. }
  420. return FALSE;
  421. }
  422. // If we're here then all is good, so update the phylotree record.
  423. $match = [
  424. 'phylotree_id' => $phylotree_id,
  425. ];
  426. if (array_key_exists('name', $options) and $options['name']) {
  427. $values['name'] = $options['name'];
  428. }
  429. if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
  430. $values['analysis_id'] = $options['analysis_id'];
  431. }
  432. if (array_key_exists('dbxref_id', $options) and $options['dbxref_id']) {
  433. $values['dbxref_id'] = $options['dbxref_id'];
  434. }
  435. if (array_key_exists('description', $options) and $options['description']) {
  436. $values['comment'] = $options['description'];
  437. }
  438. if (array_key_exists('type_id', $options) and $options['type_id']) {
  439. $values['type_id'] = $options['type_id'];
  440. }
  441. $phylotree = chado_update_record('phylotree', $match, $values, ['return_record' => TRUE]);
  442. if (!$phylotree) {
  443. drupal_set_message(t('Unable to update phylotree.'), 'warning');
  444. tripal_report_error('tripal_phylogeny', TRIPAL_WARNING,
  445. 'Update phylotree: Unable to update phylotree where values: %values',
  446. ['%values' => print_r($values, TRUE)]
  447. );
  448. }
  449. // If we have a tree file, then import the tree.
  450. if (array_key_exists('tree_file', $options) and $options['tree_file']) {
  451. // Remove any existing nodes
  452. chado_delete_record('phylonode', ['phylotree_id' => $options['phylotree_id']]);
  453. // Make sure if we already have a file that we remove the old one.
  454. $sql = "
  455. SELECT FM.fid
  456. FROM {file_managed} FM
  457. INNER JOIN {file_usage} FU on FM.fid = FU.fid
  458. WHERE FU.id = :id and FU.module = 'tripal_phylogeny'
  459. ";
  460. $fid = db_query($sql, [':id' => $options['phylotree_id']])->fetchField();
  461. if ($fid) {
  462. $file = file_load($fid);
  463. file_delete($file, TRUE);
  464. }
  465. // If the tree_file is numeric then it is a Drupal managed file and
  466. // we want to make the file permanent and associated with the tree.
  467. if (is_numeric($options['tree_file'])) {
  468. $file = file_load($options['tree_file']);
  469. $file->status = FILE_STATUS_PERMANENT;
  470. $file = file_save($file);
  471. file_usage_add($file, 'tripal_phylogeny', 'newick', $options['phylotree_id']);
  472. // Add a job to parse the new node tree.
  473. $real_file_path = drupal_realpath($file->uri);
  474. }
  475. else {
  476. $real_file_path = $options['tree_file'];
  477. }
  478. // If caller has requested to load the file now then do so, otherwise
  479. // submit using a Tripal job.
  480. if (array_key_exists('load_now', $options) and $options['load_now']) {
  481. $args = [
  482. 'phylotree_id' => $options['phylotree_id'],
  483. 'leaf_type' => $options['leaf_type'],
  484. 'match' => $options['match'] ? 'uniquename' : 'name',
  485. 'name_re' => $options['name_re'],
  486. ];
  487. chado_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
  488. }
  489. else {
  490. $args = [
  491. $real_file_path,
  492. 'newick',
  493. [
  494. 'phylotree_id' => $options['phylotree_id'],
  495. 'leaf_type' => $options['leaf_type'],
  496. 'match' => $options['match'] ? 'uniquename' : 'name',
  497. 'name_re' => $options['name_re'],
  498. ],
  499. ];
  500. if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
  501. 'chado_phylogeny_import_tree_file', $args, $user->uid)) {
  502. drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
  503. }
  504. }
  505. }
  506. return TRUE;
  507. }
  508. /**
  509. * Deletes a phylotree record from Chado.
  510. *
  511. * @param $phylotree_id
  512. *
  513. * @return
  514. * TRUE on success, FALSE on failure.
  515. *
  516. * @ingroup tripal_phylotree_api
  517. */
  518. function chado_delete_phylotree($phylotree_id) {
  519. // If we don't have a phylotree id for this node then this isn't a node of
  520. // type chado_phylotree or the entry in the chado_phylotree table was lost.
  521. if (!$phylotree_id) {
  522. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  523. 'Please provide a phylotree_id to delete a tree.');
  524. return FALSE;
  525. }
  526. // Remove the tree
  527. $values = ['phylotree_id' => $phylotree_id];
  528. return chado_delete_record('phylotree', $values);
  529. }
  530. /**
  531. * Iterates through the tree and sets the left and right indicies.
  532. *
  533. * @param $tree
  534. * The tree array.
  535. * @param $index
  536. * This parameters is not used when the function is first called. It
  537. * is used for recursive calls.
  538. *
  539. * @ingroup tripal_phylotree_api
  540. */
  541. function chado_assign_phylogeny_tree_indices(&$tree, &$index = 1) {
  542. // Assign a left and right index to each node. The child node must
  543. // have a right and left index less than that of it's parents. We
  544. // increment the index by 100 to give space for new nodes that might
  545. // be added later.
  546. if (array_key_exists('name', $tree)) {
  547. $tree['left_index'] = $index += 100;
  548. if (array_key_exists('is_leaf', $tree)) {
  549. $tree['right_index'] = $index += 100;
  550. }
  551. }
  552. if (array_key_exists('branch_set', $tree)) {
  553. foreach ($tree['branch_set'] as $key => $node) {
  554. chado_assign_phylogeny_tree_indices($tree['branch_set'][$key], $index);
  555. $tree['right_index'] = $index += 100;
  556. }
  557. }
  558. }
  559. /**
  560. * Iterates through the tree array and creates phylonodes in Chado.
  561. *
  562. * The function iterates through the tree in a top-down approach adding
  563. * parent internal nodes prior to leaf nodes. Each node of the tree should have
  564. * the following fields:
  565. *
  566. * -name: The name (or label) for this node.
  567. * -depth: The depth of the node in the tree.
  568. * -is_root: Set to 1 if this node is a root node.
  569. * -is_leaf: Set to 1 if this node is a leaf node.
  570. * -is_internal: Set to 1 if this node is an internal node.
  571. * -left_index: The index of the node to the left in the tree.
  572. * -right_index: The index of the node to the right in the tree.
  573. * -branch_set: An array containing a list of nodes of that are children
  574. * of the node.
  575. * -parent: The name of the parent node.
  576. * -organism_id: The organism_id for associtating the node with an organism.
  577. * -properties: An array of key/value pairs where the key is the cvterm_id
  578. * and the value is the property value. These properties
  579. * will be associated with the phylonode.
  580. *
  581. * Prior to importing the tree the indicies can be set by using the
  582. * chado_assign_phylogeny_tree_indices() function.
  583. *
  584. * @param $tree
  585. * The tree array.
  586. * @param $phylotree .
  587. * The phylotree object (from Chado).
  588. * @param $options
  589. * The options provide some direction for how the tree is imported. The
  590. * following keys can be used:
  591. * -taxonomy: Set to 1 if this tree is a taxonomic tree. Set to 0
  592. * otherwise.
  593. * -leaf_type: Set to the leaf type name. If this is a non-taxonomic tree
  594. * that is associated with features, then this should be the
  595. * Sequence Ontology term for the feature (e.g. polypeptide).
  596. * If this is a taxonomic tree then this option is not needed.
  597. * -match: Set to either 'name' or 'uniquename'. This is used for
  598. * matching the feature name or uniquename with the node name.
  599. * This is not needed for taxonomic trees.
  600. * -match_re: Set to a regular that can be used for matching the node
  601. * name with the feature name if the node name is not
  602. * identical to the feature name.
  603. * @param $vocab
  604. * Optional. An array containing a set of key/value pairs that maps node
  605. * types to CV terms. The keys must be 'root', 'internal' or 'leaf'. If
  606. * no vocab is provded then the terms provided by the tripal_phylogeny
  607. * CV will be used.
  608. * @param $parent
  609. * This argument is not needed when the funtion is first called. This
  610. * function is recursive and this argument is used on recursive calls.
  611. *
  612. * @ingroup tripal_phylotree_api
  613. */
  614. function chado_phylogeny_import_tree(&$tree, $phylotree, $options, $vocab = [], $parent = NULL) {
  615. // Get the vocabulary terms used to describe nodes in the tree if one
  616. // wasn't provided.
  617. if (count($vocab) == 0) {
  618. $vocab = chado_phylogeny_get_node_types_vocab();
  619. }
  620. if (is_array($tree) and array_key_exists('name', $tree)) {
  621. $values = [
  622. 'phylotree_id' => $phylotree->phylotree_id,
  623. 'left_idx' => $tree['left_index'],
  624. 'right_idx' => $tree['right_index'],
  625. ];
  626. // Add in any optional values to the $values array if they are present.
  627. if (!empty($tree['name']) and $tree['name'] != '') {
  628. $values['label'] = $tree['name'];
  629. }
  630. if (!empty($tree['length']) and $tree['length'] != '') {
  631. $values['distance'] = $tree['length'];
  632. }
  633. // Set the type of node.
  634. if ($tree['is_root']) {
  635. $values['type_id'] = $vocab['root']->cvterm_id;
  636. }
  637. else {
  638. if ($tree['is_internal']) {
  639. $values['type_id'] = $vocab['internal']->cvterm_id;
  640. $values['parent_phylonode_id'] = $parent['phylonode_id'];
  641. // TOOD: a feature may be associated here but it is recommended that it
  642. // be a feature of type SO:match and should represent the alignment of
  643. // all features beneath it.
  644. }
  645. else {
  646. if ($tree['is_leaf']) {
  647. $values['type_id'] = $vocab['leaf']->cvterm_id;
  648. $values['parent_phylonode_id'] = $parent['phylonode_id'];
  649. // Match this leaf node with an organism or feature depending on the
  650. // type of tree. But we can't do that if we don't have a name.
  651. if (!empty($tree['name']) and $tree['name'] != '') {
  652. if (!$options['taxonomy']) {
  653. // This is a sequence-based tree. Try to match leaf nodes with
  654. // features.
  655. // First, Get the Name and uniquename for the feature.
  656. $matches = [];
  657. $sel_values = [];
  658. if ($options['match'] == "name") {
  659. $sel_values['name'] = $tree['name'];
  660. $re = $options['name_re'];
  661. if (preg_match("/$re/", $tree['name'], $matches)) {
  662. $sel_values['name'] = $matches[1];
  663. }
  664. }
  665. else {
  666. $sel_values['uniquename'] = $tree['name'];
  667. $re = $options['name_re'];
  668. if (preg_match("/$re/", $tree['name'], $matches)) {
  669. $sel_values['uniquename'] = $matches[1];
  670. }
  671. }
  672. $sel_values['type_id'] = [
  673. 'name' => $options['leaf_type'],
  674. 'cv_id' => [
  675. 'name' => 'sequence',
  676. ],
  677. ];
  678. $sel_columns = ['feature_id'];
  679. $feature = chado_select_record('feature', $sel_columns, $sel_values);
  680. if (count($feature) > 1) {
  681. // Found multiple features, cannot make an association.
  682. }
  683. else {
  684. if (count($feature) == 1) {
  685. $values['feature_id'] = $feature[0]->feature_id;
  686. }
  687. else {
  688. // Could not find a feature that matches the name or uniquename
  689. }
  690. }
  691. }
  692. }
  693. }
  694. }
  695. }
  696. // Insert the new node and then add it's assigned phylonode_id to the node.
  697. $phylonode = chado_insert_record('phylonode', $values);
  698. $tree['phylonode_id'] = $phylonode['phylonode_id'];
  699. // This is a taxonomic tree, so associate this node with an
  700. // organism if one is provided.
  701. if (array_key_exists('organism_id', $tree)) {
  702. $values = [
  703. 'phylonode_id' => $tree['phylonode_id'],
  704. 'organism_id' => $tree['organism_id'],
  705. ];
  706. $pylonode_organism = chado_insert_record('phylonode_organism', $values);
  707. }
  708. // Associate any properties.
  709. if (array_key_exists('properties', $tree)) {
  710. foreach ($tree['properties'] as $type_id => $value) {
  711. $values = [
  712. 'phylonode_id' => $tree['phylonode_id'],
  713. 'type_id' => $type_id,
  714. 'value' => $value,
  715. ];
  716. $pylonode_organism = chado_insert_record('phylonodeprop', $values);
  717. }
  718. }
  719. }
  720. if (is_array($tree) and array_key_exists('branch_set', $tree)) {
  721. foreach ($tree['branch_set'] as $key => $node) {
  722. chado_phylogeny_import_tree($tree['branch_set'][$key], $phylotree, $options, $vocab, $tree);
  723. }
  724. }
  725. }
  726. /**
  727. * Get the vocabulary terms used to describe nodes in the tree.
  728. *
  729. * @return
  730. * Array of vocab info or FALSE on failure.
  731. *
  732. * @ingroup tripal_phylotree_api
  733. */
  734. function chado_phylogeny_get_node_types_vocab() {
  735. // Get the vocabulary terms used to describe nodes in the tree.
  736. $values = [
  737. 'name' => 'phylo_leaf',
  738. 'cv_id' => [
  739. 'name' => 'tripal_phylogeny',
  740. ],
  741. ];
  742. $leaf = chado_generate_var('cvterm', $values);
  743. if (!$leaf) {
  744. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  745. "Could not find the leaf vocabulary term: 'phylo_leaf'. It should " .
  746. "already be present as part of the tripal_phylogeny vocabulary.");
  747. return FALSE;
  748. }
  749. $values['name'] = 'phylo_interior';
  750. $internal = chado_generate_var('cvterm', $values);
  751. if (!$internal) {
  752. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  753. "Could not find the leaf vocabulary term: 'phylo_interior'. It should " .
  754. "already be present as part of the tripal_phylogeny vocabulary.");
  755. return FALSE;
  756. }
  757. $values['name'] = 'phylo_root';
  758. $root = chado_generate_var('cvterm', $values);
  759. if (!$root) {
  760. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  761. "Could not find the leaf vocabulary term: 'phylo_root'. It should " .
  762. "already be present as part of the tripal_phylogeny vocabulary.");
  763. return FALSE;
  764. }
  765. $vocab = [
  766. 'leaf' => $leaf,
  767. 'internal' => $internal,
  768. 'root' => $root,
  769. ];
  770. return $vocab;
  771. }
  772. /**
  773. * Imports a tree file.
  774. *
  775. * This function is used as a wrapper for loading a phylogenetic tree using
  776. * any number of file loaders.
  777. *
  778. * @param $file_name
  779. * The name of the file containing the phylogenetic tree to import.
  780. * @param $format
  781. * The format of the file. Currently only the 'newick' file format is
  782. * supported.
  783. * @param $options
  784. * Options if the phylotree record already exists:
  785. * 'phylotree_id': The imported nodes will be associated with this tree.
  786. * 'leaf_type': A sequence ontology term or the word 'organism'. If the
  787. * type is 'organism' then this tree represents a
  788. * taxonomic tree. The default, if not specified, is the
  789. * term 'polypeptide'.
  790. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
  791. * this field can be a regular expression to pull out
  792. * the name of the feature from the node label in the
  793. * intput tree. If no value is provided the entire label is
  794. * used.
  795. * 'match': Set to 'uniquename' if the leaf nodes should be matched
  796. * with the feature uniquename.
  797. *
  798. * @ingroup tripal_phylotree_api
  799. */
  800. function chado_phylogeny_import_tree_file($file_name, $format, $options = [], $job_id = NULL) {
  801. // Set some option details.
  802. if (!array_key_exists('leaf_type', $options)) {
  803. $options['leaf_type'] = 'polypeptide';
  804. }
  805. if (!array_key_exists('match', $options)) {
  806. $options['match'] = 'name';
  807. }
  808. if (!array_key_exists('name_re', $options)) {
  809. $options['name_re'] = '^(.*)$';
  810. }
  811. $options['name_re'] = trim($options['name_re']);
  812. // If a phylotree ID is not passed in then make sure we have the other
  813. // required fields for creating a tree.
  814. if (!array_key_exists('phylotree_id', $options)) {
  815. if (!array_key_exists('name', $options)) {
  816. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  817. 'The phylotree_id is required for importing the tree.');
  818. return FALSE;
  819. }
  820. }
  821. // Get the phylotree record.
  822. $values = ['phylotree_id' => $options['phylotree_id']];
  823. $phylotree = chado_generate_var('phylotree', $values);
  824. if (!$phylotree) {
  825. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  826. 'Could not find the phylotree using the ID provided: %phylotree_id.',
  827. ['%phylotree_id' => $options['phylotree_id']]);
  828. return FALSE;
  829. }
  830. $transaction = db_transaction();
  831. print "\nNOTE: Loading of this tree file is performed using a database transaction. \n" .
  832. "If the load fails or is terminated prematurely then the entire set of \n" .
  833. "insertions/updates is rolled back and will not be found in the database\n\n";
  834. try {
  835. // Parse the file according to the format indicated.
  836. if ($format == 'newick') {
  837. // Parse the tree into the expected nested node format.
  838. module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.phylotree_newick');
  839. $tree = tripal_phylogeny_parse_newick_file($file_name);
  840. // Assign the right and left indecies to the tree ndoes.
  841. chado_assign_phylogeny_tree_indices($tree);
  842. }
  843. // Iterate through the tree nodes and add them to Chado in accordance
  844. // with the details in the $options array.
  845. chado_phylogeny_import_tree($tree, $phylotree, $options);
  846. } catch (Exception $e) {
  847. $transaction->rollback();
  848. watchdog_exception('tripal_phylogeny', $e);
  849. }
  850. }