tripal_phylogeny.api.inc 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. <?php
  2. /**
  3. * Validates an $options array for insert or update of a phylotree record.
  4. *
  5. * If validation passes then any values that needed validation lookups
  6. * (such as the dbxref, analysis, leaf_type, etc) will have their approriate
  7. * primary_keys added to the $options array, and missing default values
  8. * will also be added.
  9. *
  10. * @param $val_type
  11. * The type of validation. Can be either 'insert' or 'update'.
  12. * @param $options
  13. * An array of key/value pairs containing any of the valid keys for
  14. * either the tripal_insert_phylotree() or tripal_update_phylotree()
  15. * functions.
  16. * @param $errors
  17. * An empty array where validation error messages will be set. The keys
  18. * of the array will be name of the field from the options array and the
  19. * value is the error message.
  20. * @param $warnings
  21. * An empty array where validation warning messagges will be set. The
  22. * warnings should not stop an insert or an update but should be provided
  23. * to the user as information by a drupal_set_message() if appropriate. The
  24. * keys of the array will be name of the field from the options array and the
  25. * value is the error message.
  26. * @return
  27. * If validation failes then FALSE is returned. Any options that do not pass
  28. * validation checks will be added in the $errors array with the key being
  29. * the option and the value being the error message. If validation
  30. * is successful then TRUE is returned.
  31. *
  32. */
  33. function tripal_validate_phylotree($val_type, &$options, &$errors, &$warnings) {
  34. if ($val_type != 'insert' and $val_type != 'update') {
  35. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, "The $val_type argument must be either 'update or 'insert'.");
  36. }
  37. // Set Defaults.
  38. if ($val_type == 'insert') {
  39. // Match by feature name.
  40. if (!array_key_exists('match', $options)) {
  41. $options['match'] = 'name';
  42. }
  43. // The regular expression is to match the entire node name.
  44. if (!array_key_exists('name_re', $options)) {
  45. $options['name_re'] = '^(.*)$';
  46. }
  47. // A dbxref is not required by Tripal but is required by the database
  48. // field in the phylotree table. Therefore, if the dbxref is not provided
  49. // we can set this to be the null database and null dbxref which
  50. // is represented as 'null:local:null'
  51. if (!array_key_exists('dbxref', $options)) {
  52. $options['dbxref'] = "null:local:null";
  53. }
  54. }
  55. // Make sure required values are set.
  56. if ($val_type == 'insert') {
  57. if (!array_key_exists('name', $options)) {
  58. $errors['name'] = t('Please provide the name of the tree.');
  59. return FALSE;
  60. }
  61. if (!array_key_exists('description', $options)) {
  62. $errors['description'] = t('Please provide a description for this tree.');
  63. return FALSE;
  64. }
  65. if (!array_key_exists('analysis', $options) and !array_key_exists('analysis_id', $options)) {
  66. $errors['analysis'] = t('Please provide an analysis or analysis_id for this tree.');
  67. return FALSE;
  68. }
  69. if (!array_key_exists('tree_file', $options)) {
  70. $errors['tree_file'] = t('Please provide either the full path to the tree_file or a Drupal managed file ID number.');
  71. return FALSE;
  72. }
  73. if (!array_key_exists('format', $options) or !$options['format']) {
  74. $errors['format'] = t('Please provide a file format for the tree file.');
  75. return FALSE;
  76. }
  77. // Make sure the file format is correct
  78. if ($options['format'] != 'newick' and $options['format'] != 'taxonomy') {
  79. $errors['format'] = t('The file format is not supported. Currently only the "newick" file format is supported.');
  80. return FALSE;
  81. }
  82. }
  83. else {
  84. // Does the phylotree ID exist and is it valid
  85. if (!array_key_exists('phylotree_id', $options)) {
  86. $errors['phylotree_id'] = t('Please provide the ID for the tree.');
  87. return FALSE;
  88. }
  89. $exists = chado_select_record('phylotree', array('phylotree_id'),
  90. array('phylotree_id' => $options['phylotree_id']), array('has_record' => 1));
  91. if (!$exists) {
  92. $errors['phylotree_id'] = t('The phylotree_id does not exist.');
  93. return FALSE;
  94. }
  95. }
  96. // Make sure the file exists if one is specified
  97. if (array_key_exists('tree_file', $options) and $options['tree_file']) {
  98. // If this is a numeric Drupal file then all is good, no need to check.
  99. if (!is_numeric($options['tree_file'])) {
  100. if (!file_exists($options['tree_file'])) {
  101. $errors['tree_file'] = t('The file provided does not exists.');
  102. return FALSE;
  103. }
  104. }
  105. // Make sure the file format is correct
  106. if (!array_key_exists('format', $options) or
  107. ($options['format'] != 'newick' and $options['format'] != 'taxonomy')) {
  108. $errors['format'] = t('Please provide a supported file format. Currently only the "newick" file format is supported.');
  109. return FALSE;
  110. }
  111. // If no leaf type is provided then use the polypeptide term.
  112. if (!array_key_exists('leaf_type', $options) or !$options['leaf_type']) {
  113. $options['leaf_type'] = 'polypeptide';
  114. }
  115. }
  116. // Make sure the analysis exists.
  117. $analysis = NULL;
  118. if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
  119. $analysis = chado_select_record('analysis', array('analysis_id'), array('analysis_id' => $options['analysis_id']));
  120. if (!$analysis) {
  121. $errors['analysis_id'] = t('The analysis name provided does not exist.');
  122. return FALSE;
  123. }
  124. $options['analysis_id'] = $analysis[0]->analysis_id;
  125. }
  126. if (array_key_exists('analysis', $options) and $options['analysis']) {
  127. $analysis = chado_select_record('analysis', array('analysis_id'), array('name' => $options['analysis']));
  128. if (!$analysis) {
  129. $errors['analysis'] = t('The analysis ID provided does not exist.');
  130. return FALSE;
  131. }
  132. $options['analysis_id'] = $analysis[0]->analysis_id;
  133. }
  134. // Make sure the leaf type exists.
  135. $type = NULL;
  136. if (array_key_exists('leaf_type', $options) and $options['leaf_type']) {
  137. if ($options['leaf_type'] == 'taxonomy') {
  138. $values = array(
  139. 'cv_id' => array(
  140. 'name' => 'tripal_phylogeny'
  141. ),
  142. 'name' => 'taxonomy'
  143. );
  144. $type = chado_select_record('cvterm', array('cvterm_id'), $values);
  145. }
  146. else {
  147. $values = array(
  148. 'cv_id' => array(
  149. 'name' => 'sequence'
  150. ),
  151. 'name' => $options['leaf_type']
  152. );
  153. $type = chado_select_record('cvterm', array('cvterm_id'), $values);
  154. if (!$type) {
  155. $errors['leaf_type'] = t('The leaf_type provided is not a valid Sequence Ontology term: %term.');
  156. return FALSE;
  157. }
  158. }
  159. $options['type_id'] = $type[0]->cvterm_id;
  160. }
  161. // A Dbxref is required by the phylotree module, but if the
  162. // tree was generated in-house and the site admin doens't want to
  163. // assign a local dbxref then we will set it to the null db
  164. // and the local:null dbxref.
  165. if (array_key_exists('dbxref', $options)) {
  166. if (!$options['dbxref']) {
  167. $options['dbxref'] = 'null:local:null';
  168. }
  169. $matches = array();
  170. preg_match('/^(.*?):(.*)$/', $options['dbxref'], $matches);
  171. $db_name = $matches[1];
  172. $accession = $matches[2];
  173. $values = array(
  174. 'accession' => $accession,
  175. 'db_id' => array(
  176. 'name' => $db_name
  177. ),
  178. );
  179. $dbxref = chado_generate_var('dbxref', $values);
  180. if (!$dbxref) {
  181. $errors['dbxref'] = t('The dbxref provided does not exist in the database: %dbxref.', array('%dbxref' => $dbxref));
  182. return FALSE;
  183. }
  184. $options['dbxref_id'] = $dbxref->dbxref_id;
  185. }
  186. // Make sure the tree name is unique
  187. if (array_key_exists('name', $options) and $options['name']) {
  188. $sql = "
  189. SELECT *
  190. FROM {phylotree} P
  191. WHERE
  192. P.name = :name
  193. ";
  194. $args = array(':name' => $options['name']);
  195. if ($val_type == 'update') {
  196. $sql .= " AND NOT P.phylotree_id = :phylotree_id";
  197. $args[':phylotree_id'] = $options['phylotree_id'];
  198. }
  199. $result = chado_query($sql, $args)->fetchObject();
  200. if ($result) {
  201. $errors['name'] = t("The tree name is in use by another tree. Please provide a different unique name for this tree.");
  202. }
  203. }
  204. return TRUE;
  205. }
  206. /**
  207. * Inserts a phylotree record into Chado.
  208. *
  209. * This function validates the options passed prior to insertion of the record,
  210. * and if validation passes then any values in the options array that needed
  211. * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
  212. * their approriate primary key values added to the options array.
  213. *
  214. * @param $options
  215. * An array of key value pairs with the following keys required:
  216. * 'name': The name of the tree. This will be displayed to users.
  217. * 'description: A description about the tree
  218. * 'anlaysis_id: The ID of the analysis to which this phylotree should be
  219. * associated.
  220. * 'analysis': If the analysis_id key is not used then the analysis name
  221. * may be provided to identify the analysis to which the tree
  222. * should be associated.
  223. * 'leaf_type': A sequence ontology term or the word 'organism'. If the
  224. * type is 'organism' then this tree represents a
  225. * taxonomic tree. The default, if not specified, is the
  226. * term 'polypeptide'.
  227. * 'tree_file': The path of the file containing the phylogenetic tree to
  228. * import or a Drupal managed_file numeric ID.
  229. * 'format': The file format. Currently only 'newick is supported'
  230. * Optional keys:
  231. * 'dbxref': A database cross-reference of the form DB:ACCESSION.
  232. * Where DB is the database name, which is already present
  233. * in Chado, and ACCESSION is the unique identifier for
  234. * this tree in the remote database.
  235. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
  236. * this field can be a regular expression to pull out
  237. * the name of the feature from the node label in the
  238. * intput tree. If no value is provided the entire label is
  239. * used.
  240. * 'match': Set to 'uniquename' if the leaf nodes should be matched
  241. * with the feature uniquename.
  242. * 'load_now': If set, the tree will be loaded immediately if a tree_file
  243. * is provided. Otherwise, the tree will be loaded via
  244. * a Tripal jobs call.
  245. * 'no_load': If set the tree file will not be loaded.
  246. * @param $errors
  247. * An empty array where validation error messages will be set. The keys
  248. * of the array will be name of the field from the options array and the
  249. * value is the error message.
  250. * @param $warnings
  251. * An empty array where validation warning messagges will be set. The
  252. * warnings should not stop an insert or an update but should be provided
  253. * to the user as information by a drupal_set_message() if appropriate. The
  254. * keys of the array will be name of the field from the options array and the
  255. * value is the error message.
  256. * @return
  257. * TRUE for success and FALSE for failure.
  258. */
  259. function tripal_insert_phylotree(&$options, &$errors, &$warnings) {
  260. global $user;
  261. $options['name_re'] = trim($options['name_re']);
  262. $options['leaf_type'] = trim($options['leaf_type']);
  263. $options['name'] = trim($options['name']);
  264. $options['format'] = trim($options['format']);
  265. $options['tree_file'] = trim($options['tree_file']);
  266. // Validate the incoming options.
  267. $success = tripal_validate_phylotree('insert', $options, $errors, $warnings);
  268. if (!$success) {
  269. foreach ($errors as $field => $message) {
  270. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
  271. }
  272. return FALSE;
  273. }
  274. // If we're here then all is good, so add the phylotree record.
  275. $values = array(
  276. 'analysis_id' => $options['analysis_id'],
  277. 'name' => $options['name'],
  278. 'dbxref_id' => $options['dbxref_id'],
  279. 'comment' => $options['description'],
  280. 'type_id' => $options['type_id'],
  281. );
  282. $phylotree = chado_insert_record('phylotree', $values);
  283. if (!$phylotree) {
  284. drupal_set_message(t('Unable to add phylotree.'), 'warning');
  285. tripal_report_error('tripal_phylogeny', TRIPAL_WARNING, 'Insert phylotree: Unable to create phylotree where values: %values',
  286. array('%values' => print_r($values, TRUE)));
  287. return FALSE;
  288. }
  289. $phylotree_id = $phylotree['phylotree_id'];
  290. $options['phylotree_id'] = $phylotree_id;
  291. // If the tree_file is numeric then it is a Drupal managed file and
  292. // we want to make the file permanent and associated with the tree.
  293. if (is_numeric($options['tree_file'])) {
  294. $file = NULL;
  295. $file = file_load($options['tree_file']);
  296. $file->status = FILE_STATUS_PERMANENT;
  297. $file = file_save($file);
  298. file_usage_add($file, 'tripal_phylogeny', $options['format'], $phylotree_id);
  299. $real_file_path = drupal_realpath($file->uri);
  300. }
  301. else {
  302. $real_file_path = $options['tree_file'];
  303. }
  304. // If caller has requested to load the file now then do so, otherwise
  305. // submit using a Tripal job.
  306. if (!array_key_exists('no_load', $options) or !$options['no_load']) {
  307. if (array_key_exists('load_now', $options) and $options['load_now']) {
  308. $args = array(
  309. 'phylotree_id' => $phylotree_id,
  310. 'leaf_type' => $options['leaf_type'],
  311. 'match' => $options['match'] ? 'uniquename' : 'name',
  312. 'name_re' => $options['name_re'],
  313. );
  314. tripal_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
  315. }
  316. else {
  317. $args = array(
  318. $real_file_path,
  319. 'newick',
  320. array(
  321. 'phylotree_id' => $phylotree_id,
  322. 'leaf_type' => $options['leaf_type'],
  323. 'match' => $options['match'] ? 'uniquename' : 'name',
  324. 'name_re' => $options['name_re'],
  325. ),
  326. );
  327. if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
  328. 'tripal_phylogeny_import_tree_file', $args, $user->uid)) {
  329. drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
  330. }
  331. }
  332. }
  333. return TRUE;
  334. }
  335. /**
  336. * Updates a phylotree record into Chado.
  337. *
  338. * This function validates the options passed prior to update of the record
  339. * and if validation passes then any values in the options array that needed
  340. * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
  341. * their approriate primary key values added to the options array. A Drupal
  342. * File object will be added to the options array for the tree file if one
  343. * is provided.
  344. *
  345. *
  346. * @param $phylotree_id
  347. * The ID of the phylotree to update.
  348. * @param $options
  349. * An array of key value pairs with the following optional keys:
  350. * 'name': The name of the tree. This will be displayed to users.
  351. * 'description: A description about the tree
  352. * 'anlaysis_id: The ID of the analysis to which this phylotree should be
  353. * associated.
  354. * 'analysis': If the analysis_id key is not used then the analysis name
  355. * may be provided to identify the analysis to which the tree
  356. * should be associated.
  357. * 'leaf_type': A sequence ontology term or the word 'organism'. If the
  358. * type is 'organism' then this tree represents a
  359. * taxonomic tree. The default, if not specified, is the
  360. * term 'polypeptide'.
  361. * 'tree_file': The path of the file containing the phylogenetic tree to
  362. * import or a Drupal managed_file numeric ID.
  363. * 'format': The file format. Currently only 'newick is supported'
  364. * 'dbxref': A database cross-reference of the form DB:ACCESSION.
  365. * Where DB is the database name, which is already present
  366. * in Chado, and ACCESSION is the unique identifier for
  367. * this tree in the remote database.
  368. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
  369. * this field can be a regular expression to pull out
  370. * the name of the feature from the node label in the
  371. * intput tree. If no value is provided the entire label is
  372. * used.
  373. * 'match': Set to 'uniquename' if the leaf nodes should be matched
  374. * with the feature uniquename.
  375. * 'load_now': If set, the tree will be loaded immediately if a tree_file
  376. * is provided. Otherwise, the tree will be loaded via
  377. * a Tripal jobs call.
  378. */
  379. function tripal_update_phylotree($phylotree_id, &$options) {
  380. global $user;
  381. // Validate the incoming options.
  382. $errors = array();
  383. $warnings = array();
  384. $success = tripal_validate_phylotree('update', $options, $errors, $warnings);
  385. if (!$success) {
  386. foreach ($errors as $field => $message) {
  387. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
  388. }
  389. return FALSE;
  390. }
  391. // If we're here then all is good, so update the phylotree record.
  392. $match = array(
  393. 'phylotree_id' => $phylotree_id,
  394. );
  395. if (array_key_exists('name', $options) and $options['name']) {
  396. $values['name'] = $options['name'];
  397. }
  398. if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
  399. $values['analysis_id'] = $options['analysis_id'];
  400. }
  401. if (array_key_exists('dbxref_id', $options) and $options['dbxref_id']) {
  402. $values['dbxref_id'] = $options['dbxref_id'];
  403. }
  404. if (array_key_exists('description', $options) and $options['description']) {
  405. $values['comment'] = $options['description'];
  406. }
  407. if (array_key_exists('type_id', $options) and $options['type_id']) {
  408. $values['type_id'] = $options['type_id'];
  409. }
  410. $phylotree = chado_update_record('phylotree', $match, $values, array('return_record' => TRUE));
  411. if (!$phylotree) {
  412. drupal_set_message(t('Unable to update phylotree.'), 'warning');
  413. tripal_report_error('tripal_phylogeny', TRIPAL_WARNING,
  414. 'Update phylotree: Unable to update phylotree where values: %values',
  415. array('%values' => print_r($values, TRUE))
  416. );
  417. }
  418. // If we have a tree file, then import the tree
  419. if (array_key_exists('tree_file', $options) and $options['tree_file']) {
  420. // Remove any existing nodes
  421. chado_delete_record('phylonode', array('phylotree_id' => $options['phylotree_id']));
  422. // Make sure if we already have a file that we remove the old one.
  423. $sql = "
  424. SELECT FM.fid
  425. FROM {file_managed} FM
  426. INNER JOIN {file_usage} FU on FM.fid = FU.fid
  427. WHERE FU.id = :id and FU.module = 'tripal_phylogeny'
  428. ";
  429. $fid = db_query($sql, array(':id' => $options['phylotree_id']))->fetchField();
  430. if ($fid) {
  431. $file = file_load($fid);
  432. file_delete($file, TRUE);
  433. }
  434. // If the tree_file is numeric then it is a Drupal managed file and
  435. // we want to make the file permanent and associated with the tree.
  436. if (is_numeric($options['tree_file'])) {
  437. $file = file_load($options['tree_file']);
  438. $file->status = FILE_STATUS_PERMANENT;
  439. $file = file_save($file);
  440. file_usage_add($file, 'tripal_phylogeny', 'newick', $options['phylotree_id']);
  441. // Add a job to parse the new node tree.
  442. $real_file_path = drupal_realpath($file->uri);
  443. }
  444. else {
  445. $real_file_path = $options['tree_file'];
  446. }
  447. // If caller has requested to load the file now then do so, otherwise
  448. // submit using a Tripal job.
  449. if (array_key_exists('load_now', $options) and $options['load_now']) {
  450. $args = array(
  451. 'phylotree_id' => $options['phylotree_id'],
  452. 'leaf_type' => $options['leaf_type'],
  453. 'match' => $options['match'] ? 'uniquename' : 'name',
  454. 'name_re' => $options['name_re'],
  455. );
  456. tripal_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
  457. }
  458. else {
  459. $args = array(
  460. $real_file_path,
  461. 'newick',
  462. array(
  463. 'phylotree_id' => $options['phylotree_id'],
  464. 'leaf_type' => $options['leaf_type'],
  465. 'match' => $options['match'] ? 'uniquename' : 'name',
  466. 'name_re' => $options['name_re'],
  467. ),
  468. );
  469. if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
  470. 'tripal_phylogeny_import_tree_file', $args, $user->uid)) {
  471. drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
  472. }
  473. }
  474. }
  475. return TRUE;
  476. }
  477. /**
  478. * Deletes a phylotree record from Chado.
  479. *
  480. * @param $phylotree_id
  481. *
  482. * @return
  483. * TRUE on success, FALSE on failure.
  484. */
  485. function tripal_delete_phylotree($phylotree_id) {
  486. // if we don't have a phylotree id for this node then this isn't a node of
  487. // type chado_phylotree or the entry in the chado_phylotree table was lost.
  488. if (!$phylotree_id) {
  489. tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
  490. 'Please provide a phylotree_id to delete a tree.');
  491. return FALSE;
  492. }
  493. // Remove the tree
  494. $values = array('phylotree_id' => $phylotree_id);
  495. return chado_delete_record('phylotree', $values);
  496. }