|
@@ -0,0 +1,853 @@
|
|
|
+<?php
|
|
|
+
|
|
|
+/**
|
|
|
+ * Validates an $options array for insert or update of a phylotree record.
|
|
|
+ *
|
|
|
+ * If validation passes then any values that needed validation lookups
|
|
|
+ * (such as the dbxref, analysis, leaf_type, etc) will have their approriate
|
|
|
+ * primary_keys added to the $options array, and missing default values
|
|
|
+ * will also be added.
|
|
|
+ *
|
|
|
+ * @param $val_type
|
|
|
+ * The type of validation. Can be either 'insert' or 'update'.
|
|
|
+ * @param $options
|
|
|
+ * An array of key/value pairs containing any of the valid keys for
|
|
|
+ * either the tripal_insert_phylotree() or tripal_update_phylotree()
|
|
|
+ * functions.
|
|
|
+ * @param $errors
|
|
|
+ * An empty array where validation error messages will be set. The keys
|
|
|
+ * of the array will be name of the field from the options array and the
|
|
|
+ * value is the error message.
|
|
|
+ * @param $warnings
|
|
|
+ * An empty array where validation warning messagges will be set. The
|
|
|
+ * warnings should not stop an insert or an update but should be provided
|
|
|
+ * to the user as information by a drupal_set_message() if appropriate. The
|
|
|
+ * keys of the array will be name of the field from the options array and the
|
|
|
+ * value is the error message.
|
|
|
+ * @return
|
|
|
+ * If validation failes then FALSE is returned. Any options that do not pass
|
|
|
+ * validation checks will be added in the $errors array with the key being
|
|
|
+ * the option and the value being the error message. If validation
|
|
|
+ * is successful then TRUE is returned.
|
|
|
+ *
|
|
|
+ */
|
|
|
+function tripal_validate_phylotree($val_type, &$options, &$errors, &$warnings) {
|
|
|
+
|
|
|
+ if ($val_type != 'insert' and $val_type != 'update') {
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, "The $val_type argument must be either 'update or 'insert'.");
|
|
|
+ }
|
|
|
+
|
|
|
+ // Set Defaults.
|
|
|
+ if ($val_type == 'insert') {
|
|
|
+ // Match by feature name.
|
|
|
+ if (!array_key_exists('match', $options)) {
|
|
|
+ $options['match'] = 'name';
|
|
|
+ }
|
|
|
+ // The regular expression is to match the entire node name.
|
|
|
+ if (!array_key_exists('name_re', $options)) {
|
|
|
+ $options['name_re'] = '^(.*)$';
|
|
|
+ }
|
|
|
+ // A dbxref is not required by Tripal but is required by the database
|
|
|
+ // field in the phylotree table. Therefore, if the dbxref is not provided
|
|
|
+ // we can set this to be the null database and null dbxref which
|
|
|
+ // is represented as 'null:local:null'
|
|
|
+ if (!array_key_exists('dbxref', $options)) {
|
|
|
+ $options['dbxref'] = "null:local:null";
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Make sure required values are set.
|
|
|
+ if ($val_type == 'insert') {
|
|
|
+ if (!array_key_exists('name', $options)) {
|
|
|
+ $errors['name'] = t('Please provide the name of the tree.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ if (!array_key_exists('description', $options)) {
|
|
|
+ $errors['description'] = t('Please provide a description for this tree.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ if (!array_key_exists('analysis', $options) and !array_key_exists('analysis_id', $options)) {
|
|
|
+ $errors['analysis'] = t('Please provide an analysis or analysis_id for this tree.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ if (!array_key_exists('tree_file', $options)) {
|
|
|
+ $errors['tree_file'] = t('Please provide either the full path to the tree_file or a Drupal managed file ID number.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ if (!array_key_exists('format', $options) or !$options['format']) {
|
|
|
+ $errors['format'] = t('Please provide a file format for the tree file.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ // Make sure the file format is correct
|
|
|
+ if ($options['format'] != 'newick' and $options['format'] != 'taxonomy') {
|
|
|
+ $errors['format'] = t('The file format is not supported. Currently only the "newick" file format is supported.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ // Does the phylotree ID exist and is it valid
|
|
|
+ if (!array_key_exists('phylotree_id', $options)) {
|
|
|
+ $errors['phylotree_id'] = t('Please provide the ID for the tree.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ $exists = chado_select_record('phylotree', array('phylotree_id'),
|
|
|
+ array('phylotree_id' => $options['phylotree_id']), array('has_record' => 1));
|
|
|
+ if (!$exists) {
|
|
|
+ $errors['phylotree_id'] = t('The phylotree_id does not exist.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ // Make sure the file exists if one is specified
|
|
|
+ if (array_key_exists('tree_file', $options) and $options['tree_file']) {
|
|
|
+ // If this is a numeric Drupal file then all is good, no need to check.
|
|
|
+ if (!is_numeric($options['tree_file'])) {
|
|
|
+ if (!file_exists($options['tree_file'])) {
|
|
|
+ $errors['tree_file'] = t('The file provided does not exists.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // Make sure the file format is correct
|
|
|
+ if (!array_key_exists('format', $options) or
|
|
|
+ ($options['format'] != 'newick' and $options['format'] != 'taxonomy')) {
|
|
|
+ $errors['format'] = t('Please provide a supported file format. Currently only the "newick" file format is supported.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+ // If no leaf type is provided then use the polypeptide term.
|
|
|
+ if (!array_key_exists('leaf_type', $options) or !$options['leaf_type']) {
|
|
|
+ $options['leaf_type'] = 'polypeptide';
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Make sure the analysis exists.
|
|
|
+ $analysis = NULL;
|
|
|
+ if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
|
|
|
+ $analysis = chado_select_record('analysis', array('analysis_id'), array('analysis_id' => $options['analysis_id']));
|
|
|
+ if (!$analysis) {
|
|
|
+ $errors['analysis_id'] = t('The analysis name provided does not exist.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ $options['analysis_id'] = $analysis[0]->analysis_id;
|
|
|
+ }
|
|
|
+ if (array_key_exists('analysis', $options) and $options['analysis']) {
|
|
|
+ $analysis = chado_select_record('analysis', array('analysis_id'), array('name' => $options['analysis']));
|
|
|
+ if (!$analysis) {
|
|
|
+ $errors['analysis'] = t('The analysis ID provided does not exist.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ $options['analysis_id'] = $analysis[0]->analysis_id;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Make sure the leaf type exists.
|
|
|
+ $type = NULL;
|
|
|
+ if (array_key_exists('leaf_type', $options) and $options['leaf_type']) {
|
|
|
+ if ($options['leaf_type'] == 'taxonomy') {
|
|
|
+ $values = array(
|
|
|
+ 'cv_id' => array(
|
|
|
+ 'name' => 'tripal_phylogeny'
|
|
|
+ ),
|
|
|
+ 'name' => 'taxonomy'
|
|
|
+ );
|
|
|
+ $type = chado_select_record('cvterm', array('cvterm_id'), $values);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $values = array(
|
|
|
+ 'cv_id' => array(
|
|
|
+ 'name' => 'sequence'
|
|
|
+ ),
|
|
|
+ 'name' => $options['leaf_type']
|
|
|
+ );
|
|
|
+ $type = chado_select_record('cvterm', array('cvterm_id'), $values);
|
|
|
+ if (!$type) {
|
|
|
+ $errors['leaf_type'] = t('The leaf_type provided is not a valid Sequence Ontology term: %term.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ $options['type_id'] = $type[0]->cvterm_id;
|
|
|
+ }
|
|
|
+
|
|
|
+ // A Dbxref is required by the phylotree module, but if the
|
|
|
+ // tree was generated in-house and the site admin doens't want to
|
|
|
+ // assign a local dbxref then we will set it to the null db
|
|
|
+ // and the local:null dbxref.
|
|
|
+ if (array_key_exists('dbxref', $options)) {
|
|
|
+ if (!$options['dbxref']) {
|
|
|
+ $options['dbxref'] = 'null:local:null';
|
|
|
+ }
|
|
|
+ $matches = array();
|
|
|
+ preg_match('/^(.*?):(.*)$/', $options['dbxref'], $matches);
|
|
|
+ $db_name = $matches[1];
|
|
|
+ $accession = $matches[2];
|
|
|
+ $values = array(
|
|
|
+ 'accession' => $accession,
|
|
|
+ 'db_id' => array(
|
|
|
+ 'name' => $db_name
|
|
|
+ ),
|
|
|
+ );
|
|
|
+ $dbxref = chado_generate_var('dbxref', $values);
|
|
|
+ if (!$dbxref) {
|
|
|
+ $errors['dbxref'] = t('The dbxref provided does not exist in the database: %dbxref.', array('%dbxref' => $dbxref));
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ $options['dbxref_id'] = $dbxref->dbxref_id;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Make sure the tree name is unique
|
|
|
+ if (array_key_exists('name', $options) and $options['name']) {
|
|
|
+ $sql = "
|
|
|
+ SELECT *
|
|
|
+ FROM {phylotree} P
|
|
|
+ WHERE
|
|
|
+ P.name = :name
|
|
|
+ ";
|
|
|
+ $args = array(':name' => $options['name']);
|
|
|
+ if ($val_type == 'update') {
|
|
|
+ $sql .= " AND NOT P.phylotree_id = :phylotree_id";
|
|
|
+ $args[':phylotree_id'] = $options['phylotree_id'];
|
|
|
+ }
|
|
|
+ $result = chado_query($sql, $args)->fetchObject();
|
|
|
+ if ($result) {
|
|
|
+ $errors['name'] = t("The tree name is in use by another tree. Please provide a different unique name for this tree.");
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+}
|
|
|
+/**
|
|
|
+ * Inserts a phylotree record into Chado.
|
|
|
+ *
|
|
|
+ * This function validates the options passed prior to insertion of the record,
|
|
|
+ * and if validation passes then any values in the options array that needed
|
|
|
+ * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
|
|
|
+ * their approriate primary key values added to the options array.
|
|
|
+ *
|
|
|
+ * @param $options
|
|
|
+ * An array of key value pairs with the following keys required:
|
|
|
+ * 'name': The name of the tree. This will be displayed to users.
|
|
|
+ * 'description: A description about the tree
|
|
|
+ * 'anlaysis_id: The ID of the analysis to which this phylotree should be
|
|
|
+ * associated.
|
|
|
+ * 'analysis': If the analysis_id key is not used then the analysis name
|
|
|
+ * may be provided to identify the analysis to which the tree
|
|
|
+ * should be associated.
|
|
|
+ * 'leaf_type': A sequence ontology term or the word 'organism'. If the
|
|
|
+ * type is 'organism' then this tree represents a
|
|
|
+ * taxonomic tree. The default, if not specified, is the
|
|
|
+ * term 'polypeptide'.
|
|
|
+ * 'tree_file': The path of the file containing the phylogenetic tree to
|
|
|
+ * import or a Drupal managed_file numeric ID.
|
|
|
+ * 'format': The file format. Currently only 'newick is supported'
|
|
|
+ * Optional keys:
|
|
|
+ * 'dbxref': A database cross-reference of the form DB:ACCESSION.
|
|
|
+ * Where DB is the database name, which is already present
|
|
|
+ * in Chado, and ACCESSION is the unique identifier for
|
|
|
+ * this tree in the remote database.
|
|
|
+ * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
|
|
|
+ * this field can be a regular expression to pull out
|
|
|
+ * the name of the feature from the node label in the
|
|
|
+ * intput tree. If no value is provided the entire label is
|
|
|
+ * used.
|
|
|
+ * 'match': Set to 'uniquename' if the leaf nodes should be matched
|
|
|
+ * with the feature uniquename.
|
|
|
+ * 'load_now': If set, the tree will be loaded immediately if a tree_file
|
|
|
+ * is provided. Otherwise, the tree will be loaded via
|
|
|
+ * a Tripal jobs call.
|
|
|
+ * 'no_load': If set the tree file will not be loaded.
|
|
|
+ * @param $errors
|
|
|
+ * An empty array where validation error messages will be set. The keys
|
|
|
+ * of the array will be name of the field from the options array and the
|
|
|
+ * value is the error message.
|
|
|
+ * @param $warnings
|
|
|
+ * An empty array where validation warning messagges will be set. The
|
|
|
+ * warnings should not stop an insert or an update but should be provided
|
|
|
+ * to the user as information by a drupal_set_message() if appropriate. The
|
|
|
+ * keys of the array will be name of the field from the options array and the
|
|
|
+ * value is the error message.
|
|
|
+ * @return
|
|
|
+ * TRUE for success and FALSE for failure.
|
|
|
+ */
|
|
|
+function tripal_insert_phylotree(&$options, &$errors, &$warnings) {
|
|
|
+ global $user;
|
|
|
+
|
|
|
+ $options['name_re'] = trim($options['name_re']);
|
|
|
+ $options['leaf_type'] = trim($options['leaf_type']);
|
|
|
+ $options['name'] = trim($options['name']);
|
|
|
+ $options['format'] = trim($options['format']);
|
|
|
+ $options['tree_file'] = trim($options['tree_file']);
|
|
|
+
|
|
|
+ // Validate the incoming options.
|
|
|
+ $success = tripal_validate_phylotree('insert', $options, $errors, $warnings);
|
|
|
+ if (!$success) {
|
|
|
+ foreach ($errors as $field => $message) {
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
|
|
|
+ }
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+ // If we're here then all is good, so add the phylotree record.
|
|
|
+ $values = array(
|
|
|
+ 'analysis_id' => $options['analysis_id'],
|
|
|
+ 'name' => $options['name'],
|
|
|
+ 'dbxref_id' => $options['dbxref_id'],
|
|
|
+ 'comment' => $options['description'],
|
|
|
+ 'type_id' => $options['type_id'],
|
|
|
+ );
|
|
|
+ $phylotree = chado_insert_record('phylotree', $values);
|
|
|
+ if (!$phylotree) {
|
|
|
+ drupal_set_message(t('Unable to add phylotree.'), 'warning');
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_WARNING, 'Insert phylotree: Unable to create phylotree where values: %values',
|
|
|
+ array('%values' => print_r($values, TRUE)));
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ $phylotree_id = $phylotree['phylotree_id'];
|
|
|
+ $options['phylotree_id'] = $phylotree_id;
|
|
|
+
|
|
|
+ // If the tree_file is numeric then it is a Drupal managed file and
|
|
|
+ // we want to make the file permanent and associated with the tree.
|
|
|
+ if (is_numeric($options['tree_file'])) {
|
|
|
+ $file = NULL;
|
|
|
+ $file = file_load($options['tree_file']);
|
|
|
+ $file->status = FILE_STATUS_PERMANENT;
|
|
|
+ $file = file_save($file);
|
|
|
+ file_usage_add($file, 'tripal_phylogeny', $options['format'], $phylotree_id);
|
|
|
+ $real_file_path = drupal_realpath($file->uri);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $real_file_path = $options['tree_file'];
|
|
|
+ }
|
|
|
+
|
|
|
+ // If caller has requested to load the file now then do so, otherwise
|
|
|
+ // submit using a Tripal job.
|
|
|
+ if (!array_key_exists('no_load', $options) or !$options['no_load']) {
|
|
|
+ if (array_key_exists('load_now', $options) and $options['load_now']) {
|
|
|
+ $args = array(
|
|
|
+ 'phylotree_id' => $phylotree_id,
|
|
|
+ 'leaf_type' => $options['leaf_type'],
|
|
|
+ 'match' => $options['match'] ? 'uniquename' : 'name',
|
|
|
+ 'name_re' => $options['name_re'],
|
|
|
+ );
|
|
|
+ tripal_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $args = array(
|
|
|
+ $real_file_path,
|
|
|
+ 'newick',
|
|
|
+ array(
|
|
|
+ 'phylotree_id' => $phylotree_id,
|
|
|
+ 'leaf_type' => $options['leaf_type'],
|
|
|
+ 'match' => $options['match'] ? 'uniquename' : 'name',
|
|
|
+ 'name_re' => $options['name_re'],
|
|
|
+ ),
|
|
|
+ );
|
|
|
+ if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
|
|
|
+ 'tripal_phylogeny_import_tree_file', $args, $user->uid)) {
|
|
|
+ drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Updates a phylotree record into Chado.
|
|
|
+ *
|
|
|
+ * This function validates the options passed prior to update of the record
|
|
|
+ * and if validation passes then any values in the options array that needed
|
|
|
+ * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
|
|
|
+ * their approriate primary key values added to the options array. A Drupal
|
|
|
+ * File object will be added to the options array for the tree file if one
|
|
|
+ * is provided.
|
|
|
+ *
|
|
|
+ *
|
|
|
+ * @param $phylotree_id
|
|
|
+ * The ID of the phylotree to update.
|
|
|
+ * @param $options
|
|
|
+ * An array of key value pairs with the following optional keys:
|
|
|
+ * 'name': The name of the tree. This will be displayed to users.
|
|
|
+ * 'description: A description about the tree
|
|
|
+ * 'anlaysis_id: The ID of the analysis to which this phylotree should be
|
|
|
+ * associated.
|
|
|
+ * 'analysis': If the analysis_id key is not used then the analysis name
|
|
|
+ * may be provided to identify the analysis to which the tree
|
|
|
+ * should be associated.
|
|
|
+ * 'leaf_type': A sequence ontology term or the word 'organism'. If the
|
|
|
+ * type is 'organism' then this tree represents a
|
|
|
+ * taxonomic tree. The default, if not specified, is the
|
|
|
+ * term 'polypeptide'.
|
|
|
+ * 'tree_file': The path of the file containing the phylogenetic tree to
|
|
|
+ * import or a Drupal managed_file numeric ID.
|
|
|
+ * 'format': The file format. Currently only 'newick is supported'
|
|
|
+ * 'dbxref': A database cross-reference of the form DB:ACCESSION.
|
|
|
+ * Where DB is the database name, which is already present
|
|
|
+ * in Chado, and ACCESSION is the unique identifier for
|
|
|
+ * this tree in the remote database.
|
|
|
+ * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
|
|
|
+ * this field can be a regular expression to pull out
|
|
|
+ * the name of the feature from the node label in the
|
|
|
+ * intput tree. If no value is provided the entire label is
|
|
|
+ * used.
|
|
|
+ * 'match': Set to 'uniquename' if the leaf nodes should be matched
|
|
|
+ * with the feature uniquename.
|
|
|
+ * 'load_now': If set, the tree will be loaded immediately if a tree_file
|
|
|
+ * is provided. Otherwise, the tree will be loaded via
|
|
|
+ * a Tripal jobs call.
|
|
|
+ */
|
|
|
+function tripal_update_phylotree($phylotree_id, &$options) {
|
|
|
+ global $user;
|
|
|
+
|
|
|
+ // Validate the incoming options.
|
|
|
+ $errors = array();
|
|
|
+ $warnings = array();
|
|
|
+ $success = tripal_validate_phylotree('update', $options, $errors, $warnings);
|
|
|
+ if (!$success) {
|
|
|
+ foreach ($errors as $field => $message) {
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
|
|
|
+ }
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+ // If we're here then all is good, so update the phylotree record.
|
|
|
+ $match = array(
|
|
|
+ 'phylotree_id' => $phylotree_id,
|
|
|
+ );
|
|
|
+ if (array_key_exists('name', $options) and $options['name']) {
|
|
|
+ $values['name'] = $options['name'];
|
|
|
+ }
|
|
|
+ if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
|
|
|
+ $values['analysis_id'] = $options['analysis_id'];
|
|
|
+ }
|
|
|
+ if (array_key_exists('dbxref_id', $options) and $options['dbxref_id']) {
|
|
|
+ $values['dbxref_id'] = $options['dbxref_id'];
|
|
|
+ }
|
|
|
+ if (array_key_exists('description', $options) and $options['description']) {
|
|
|
+ $values['comment'] = $options['description'];
|
|
|
+ }
|
|
|
+ if (array_key_exists('type_id', $options) and $options['type_id']) {
|
|
|
+ $values['type_id'] = $options['type_id'];
|
|
|
+ }
|
|
|
+
|
|
|
+ $phylotree = chado_update_record('phylotree', $match, $values, array('return_record' => TRUE));
|
|
|
+ if (!$phylotree) {
|
|
|
+ drupal_set_message(t('Unable to update phylotree.'), 'warning');
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_WARNING,
|
|
|
+ 'Update phylotree: Unable to update phylotree where values: %values',
|
|
|
+ array('%values' => print_r($values, TRUE))
|
|
|
+ );
|
|
|
+ }
|
|
|
+
|
|
|
+ // If we have a tree file, then import the tree
|
|
|
+ if (array_key_exists('tree_file', $options) and $options['tree_file']) {
|
|
|
+
|
|
|
+ // Remove any existing nodes
|
|
|
+ chado_delete_record('phylonode', array('phylotree_id' => $options['phylotree_id']));
|
|
|
+
|
|
|
+ // Make sure if we already have a file that we remove the old one.
|
|
|
+ $sql = "
|
|
|
+ SELECT FM.fid
|
|
|
+ FROM {file_managed} FM
|
|
|
+ INNER JOIN {file_usage} FU on FM.fid = FU.fid
|
|
|
+ WHERE FU.id = :id and FU.module = 'tripal_phylogeny'
|
|
|
+ ";
|
|
|
+ $fid = db_query($sql, array(':id' => $options['phylotree_id']))->fetchField();
|
|
|
+ if ($fid) {
|
|
|
+ $file = file_load($fid);
|
|
|
+ file_delete($file, TRUE);
|
|
|
+ }
|
|
|
+
|
|
|
+ // If the tree_file is numeric then it is a Drupal managed file and
|
|
|
+ // we want to make the file permanent and associated with the tree.
|
|
|
+ if (is_numeric($options['tree_file'])) {
|
|
|
+ $file = file_load($options['tree_file']);
|
|
|
+ $file->status = FILE_STATUS_PERMANENT;
|
|
|
+ $file = file_save($file);
|
|
|
+ file_usage_add($file, 'tripal_phylogeny', 'newick', $options['phylotree_id']);
|
|
|
+
|
|
|
+ // Add a job to parse the new node tree.
|
|
|
+ $real_file_path = drupal_realpath($file->uri);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $real_file_path = $options['tree_file'];
|
|
|
+ }
|
|
|
+
|
|
|
+ // If caller has requested to load the file now then do so, otherwise
|
|
|
+ // submit using a Tripal job.
|
|
|
+ if (array_key_exists('load_now', $options) and $options['load_now']) {
|
|
|
+ $args = array(
|
|
|
+ 'phylotree_id' => $options['phylotree_id'],
|
|
|
+ 'leaf_type' => $options['leaf_type'],
|
|
|
+ 'match' => $options['match'] ? 'uniquename' : 'name',
|
|
|
+ 'name_re' => $options['name_re'],
|
|
|
+ );
|
|
|
+ tripal_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $args = array(
|
|
|
+ $real_file_path,
|
|
|
+ 'newick',
|
|
|
+ array(
|
|
|
+ 'phylotree_id' => $options['phylotree_id'],
|
|
|
+ 'leaf_type' => $options['leaf_type'],
|
|
|
+ 'match' => $options['match'] ? 'uniquename' : 'name',
|
|
|
+ 'name_re' => $options['name_re'],
|
|
|
+ ),
|
|
|
+ );
|
|
|
+ if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
|
|
|
+ 'tripal_phylogeny_import_tree_file', $args, $user->uid)) {
|
|
|
+ drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return TRUE;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Deletes a phylotree record from Chado.
|
|
|
+ *
|
|
|
+ * @param $phylotree_id
|
|
|
+ *
|
|
|
+ * @return
|
|
|
+ * TRUE on success, FALSE on failure.
|
|
|
+ */
|
|
|
+function tripal_delete_phylotree($phylotree_id) {
|
|
|
+
|
|
|
+ // if we don't have a phylotree id for this node then this isn't a node of
|
|
|
+ // type chado_phylotree or the entry in the chado_phylotree table was lost.
|
|
|
+ if (!$phylotree_id) {
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
|
|
|
+ 'Please provide a phylotree_id to delete a tree.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Remove the tree
|
|
|
+ $values = array('phylotree_id' => $phylotree_id);
|
|
|
+ return chado_delete_record('phylotree', $values);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Iterates through the tree and sets the left and right indicies .
|
|
|
+ *
|
|
|
+ * @param $tree
|
|
|
+ * The tree array.
|
|
|
+ * @param $index
|
|
|
+ * This parameters is not used when the function is first called. It
|
|
|
+ * is used for recursive calls.
|
|
|
+ */
|
|
|
+function tripal_assign_phylogeny_tree_indices(&$tree, &$index = 1) {
|
|
|
+ // Assign a left and right index to each node. The child node must
|
|
|
+ // have a right and left index less than that of it's parents. We
|
|
|
+ // increment the index by 100 to give space for new nodes that might
|
|
|
+ // be added later.
|
|
|
+ if (array_key_exists('name', $tree)) {
|
|
|
+ $tree['left_index'] = $index += 100;
|
|
|
+ if (array_key_exists('is_leaf', $tree)) {
|
|
|
+ $tree['right_index'] = $index += 100;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (array_key_exists('branch_set', $tree)) {
|
|
|
+ foreach ($tree['branch_set'] as $key => $node) {
|
|
|
+ tripal_assign_phylogeny_tree_indices($tree['branch_set'][$key], $index);
|
|
|
+ $tree['right_index'] = $index += 100;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Iterates through the tree array and creates phylonodes in Chado.
|
|
|
+ *
|
|
|
+ * The function iterates through the tree in a top-down approach adding
|
|
|
+ * parent internal nodes prior to leaf nodes. Each node of the tree should have
|
|
|
+ * the following fields:
|
|
|
+ *
|
|
|
+ * -name: The name (or label) for this node.
|
|
|
+ * -depth: The depth of the node in the tree.
|
|
|
+ * -is_root: Set to 1 if this node is a root node.
|
|
|
+ * -is_leaf: Set to 1 if this node is a leaf node.
|
|
|
+ * -is_internal: Set to 1 if this node is an internal node.
|
|
|
+ * -left_index: The index of the node to the left in the tree.
|
|
|
+ * -right_index: The index of the node to the right in the tree.
|
|
|
+ * -branch_set: An array containing a list of nodes of that are children
|
|
|
+ * of the node.
|
|
|
+ * -parent: The name of the parent node.
|
|
|
+ * -organism_id: The organism_id for associtating the node with an organism.
|
|
|
+ * -properties: An array of key/value pairs where the key is the cvterm_id
|
|
|
+ * and the value is the property value. These properties
|
|
|
+ * will be assocaited with the phylonode.
|
|
|
+ *
|
|
|
+ * Prior to importing the tree the indicies can be set by using the
|
|
|
+ * tripal_assign_phylogeny_tree_indices() function.
|
|
|
+ *
|
|
|
+ * @param $tree
|
|
|
+ * The tree array.
|
|
|
+ * @param $options
|
|
|
+ * The options provide some direction for how the tree is imported. The
|
|
|
+ * following keys can be used:
|
|
|
+ * -taxonomy: Set to 1 if this tree is a taxonomic tree. Set to 0
|
|
|
+ * otherwise.
|
|
|
+ * -leaf_type: Set to the leaf type name. If this is a non-taxonomic tree
|
|
|
+ * that is associated with features, then this should be the
|
|
|
+ * Sequence Ontology term for the feature (e.g. polypeptide).
|
|
|
+ * If this is a taxonomic tree then this option is not needed.
|
|
|
+ * -match: Set to either 'name' or 'uniquename'. This is used for
|
|
|
+ * matching the feature name or uniquename with the node name.
|
|
|
+ * This is not needed for taxonomic trees.
|
|
|
+ * -match_re: Set to a regular that can be used for matching the node
|
|
|
+ * name with the feature name if the node name is not
|
|
|
+ * identical to the feature name.
|
|
|
+ * @param $vocab
|
|
|
+ * Optional. An array containing a set of key/value pairs that maps node
|
|
|
+ * types to CV terms. The keys must be 'root', 'internal' or 'leaf'. If
|
|
|
+ * no vocab is provded then the terms provided by the tripal_phylogeny
|
|
|
+ * CV will be used.
|
|
|
+ * @param $parent
|
|
|
+ * This argument is not needed when the funtion is first called. This
|
|
|
+ * function is recursive and this argument is used on recursive calls.
|
|
|
+ */
|
|
|
+function tripal_phylogeny_import_tree(&$tree, $phylotree, $options, $vocab = array(), $parent = NULL) {
|
|
|
+
|
|
|
+ // Get the vocabulary terms used to describe nodes in the tree if one
|
|
|
+ // wasn't provided.
|
|
|
+ if (count($vocab) == 0) {
|
|
|
+ $vocab = tripal_phylogeny_get_node_types_vocab();
|
|
|
+ }
|
|
|
+
|
|
|
+ if (is_array($tree) and array_key_exists('name', $tree)) {
|
|
|
+ $values = array(
|
|
|
+ 'phylotree_id' => $phylotree->phylotree_id,
|
|
|
+ 'left_idx' => $tree['left_index'],
|
|
|
+ 'right_idx' => $tree['right_index'],
|
|
|
+ );
|
|
|
+ // Add in any optional values to the $values array if they are present
|
|
|
+ if (!empty($tree['name']) and $tree['name'] != '') {
|
|
|
+ $values['label'] = $tree['name'];
|
|
|
+ }
|
|
|
+ if (!empty($tree['length']) and $tree['length'] != '') {
|
|
|
+ $values['distance'] = $tree['length'];
|
|
|
+ }
|
|
|
+ // Set the type of node
|
|
|
+ if ($tree['is_root']) {
|
|
|
+ $values['type_id'] = $vocab['root']->cvterm_id;
|
|
|
+ }
|
|
|
+ else if ($tree['is_internal']) {
|
|
|
+ $values['type_id'] = $vocab['internal']->cvterm_id;
|
|
|
+ $values['parent_phylonode_id'] = $parent['phylonode_id'];
|
|
|
+ // TOOD: a feature may be associated here but it is recommended that it
|
|
|
+ // be a feature of type SO:match and should represent the alignment of
|
|
|
+ // all features beneath it.
|
|
|
+ }
|
|
|
+ else if ($tree['is_leaf']) {
|
|
|
+ $values['type_id'] = $vocab['leaf']->cvterm_id;
|
|
|
+ $values['parent_phylonode_id'] = $parent['phylonode_id'];
|
|
|
+
|
|
|
+ // Match this leaf node with an organism or feature depending on the
|
|
|
+ // type of tree. But we can't do that if we don't have a name.
|
|
|
+ if (!empty($tree['name']) and $tree['name'] != '') {
|
|
|
+ if (!$options['taxonomy']) {
|
|
|
+
|
|
|
+ // This is a sequence-based tree. Try to match leaf nodes with features.
|
|
|
+ // First, Get the Name and uniquename for the feature
|
|
|
+ $matches = array();
|
|
|
+ $sel_values = array();
|
|
|
+ if ($options['match'] == "name") {
|
|
|
+ $sel_values['name'] = $tree['name'];
|
|
|
+ $re = $options['name_re'];
|
|
|
+ if (preg_match("/$re/", $tree['name'], $matches)) {
|
|
|
+ $sel_values['name'] = $matches[1];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $sel_values['uniquename'] = $tree['name'];
|
|
|
+ $re = $options['name_re'];
|
|
|
+ if (preg_match("/$re/", $tree['name'], $matches)) {
|
|
|
+ $sel_values['uniquename'] = $matches[1];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ $sel_values['type_id'] = array(
|
|
|
+ 'name' => $options['leaf_type'],
|
|
|
+ 'cv_id' => array(
|
|
|
+ 'name' => 'sequence'
|
|
|
+ ),
|
|
|
+ );
|
|
|
+ $sel_columns = array('feature_id');
|
|
|
+ $feature = chado_select_record('feature', $sel_columns, $sel_values);
|
|
|
+ if (count($feature) > 1) {
|
|
|
+ // Found multiple features, cannot make an association.
|
|
|
+ }
|
|
|
+ else if (count($feature) == 1) {
|
|
|
+ $values['feature_id'] = $feature[0]->feature_id;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ // Could not find a feature that matches the name or uniquename
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Insert the new node and then add it's assigned phylonode_id to the node
|
|
|
+ $phylonode = chado_insert_record('phylonode', $values);
|
|
|
+ $tree['phylonode_id'] = $phylonode['phylonode_id'];
|
|
|
+
|
|
|
+ // This is a taxonomic tree, so assocaite this node with an
|
|
|
+ // organism if one is provided.
|
|
|
+ if (array_key_exists('organism_id', $tree)) {
|
|
|
+ $values = array(
|
|
|
+ 'phylonode_id' => $tree['phylonode_id'],
|
|
|
+ 'organism_id' => $tree['organism_id']
|
|
|
+ );
|
|
|
+ $pylonode_organism = chado_insert_record('phylonode_organism', $values);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Associate any properties
|
|
|
+ if (array_key_exists('properties', $tree)) {
|
|
|
+ foreach ($tree['properties'] as $type_id => $value) {
|
|
|
+ $values = array(
|
|
|
+ 'phylonode_id' => $tree['phylonode_id'],
|
|
|
+ 'type_id' => $type_id,
|
|
|
+ 'value' => $value,
|
|
|
+ );
|
|
|
+ $pylonode_organism = chado_insert_record('phylonodeprop', $values);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (is_array($tree) and array_key_exists('branch_set', $tree)) {
|
|
|
+ foreach ($tree['branch_set'] as $key => $node) {
|
|
|
+ tripal_phylogeny_import_tree($tree['branch_set'][$key], $phylotree, $options, $vocab, $tree);
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ *
|
|
|
+ * @return boolean|multitype:Either
|
|
|
+ */
|
|
|
+function tripal_phylogeny_get_node_types_vocab() {
|
|
|
+ // Get the vocabulary terms used to describe nodes in the tree
|
|
|
+ $values = array(
|
|
|
+ 'name' => 'phylo_leaf',
|
|
|
+ 'cv_id' => array(
|
|
|
+ 'name' => 'tripal_phylogeny',
|
|
|
+ ),
|
|
|
+ );
|
|
|
+ $leaf = chado_generate_var('cvterm', $values);
|
|
|
+ if (!$leaf) {
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
|
|
|
+ "Could not find the leaf vocabulary term: 'phylo_leaf'. It should " .
|
|
|
+ "already be present as part of the tripal_phylogeny vocabulary.");
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ $values['name'] = 'phylo_interior';
|
|
|
+ $internal = chado_generate_var('cvterm', $values);
|
|
|
+ if (!$internal) {
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
|
|
|
+ "Could not find the leaf vocabulary term: 'phylo_interior'. It should " .
|
|
|
+ "already be present as part of the tripal_phylogeny vocabulary.");
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ $values['name'] = 'phylo_root';
|
|
|
+ $root = chado_generate_var('cvterm', $values);
|
|
|
+ if (!$root) {
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
|
|
|
+ "Could not find the leaf vocabulary term: 'phylo_root'. It should " .
|
|
|
+ "already be present as part of the tripal_phylogeny vocabulary.");
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ $vocab = array(
|
|
|
+ 'leaf' => $leaf,
|
|
|
+ 'internal' => $internal,
|
|
|
+ 'root' => $root,
|
|
|
+ );
|
|
|
+ return $vocab;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+/**
|
|
|
+ * Imports a tree file.
|
|
|
+ *
|
|
|
+ * This function is used as a wrapper for loading a phylogenetic tree using
|
|
|
+ * any number of file loaders.
|
|
|
+ *
|
|
|
+ * @param $file_name
|
|
|
+ * The name of the file containing the phylogenetic tree to import.
|
|
|
+ * @param $format
|
|
|
+ * The format of the file. Currently only the 'newick' file format is
|
|
|
+ * supported.
|
|
|
+ * @param $options
|
|
|
+ * Options if the phylotree record already exists:
|
|
|
+ * 'phylotree_id': The imported nodes will be associated with this tree.
|
|
|
+ * 'leaf_type': A sequence ontology term or the word 'organism'. If the
|
|
|
+ * type is 'organism' then this tree represents a
|
|
|
+ * taxonomic tree. The default, if not specified, is the
|
|
|
+ * term 'polypeptide'.
|
|
|
+ * 'name_re': If the leaf type is NOT 'taxonomy', then the value of
|
|
|
+ * this field can be a regular expression to pull out
|
|
|
+ * the name of the feature from the node label in the
|
|
|
+ * intput tree. If no value is provided the entire label is
|
|
|
+ * used.
|
|
|
+ * 'match': Set to 'uniquename' if the leaf nodes should be matched
|
|
|
+ * with the feature uniquename.
|
|
|
+ *
|
|
|
+ */
|
|
|
+function tripal_phylogeny_import_tree_file($file_name, $format, $options = array(), $job_id = NULL) {
|
|
|
+
|
|
|
+ // Set some option details.
|
|
|
+ if (!array_key_exists('leaf_type', $options)) {
|
|
|
+ $options['leaf_type'] = 'polypeptide';
|
|
|
+ }
|
|
|
+ if (!array_key_exists('match', $options)) {
|
|
|
+ $options['match'] = 'name';
|
|
|
+ }
|
|
|
+ if (!array_key_exists('name_re', $options)) {
|
|
|
+ $options['name_re'] = '^(.*)$';
|
|
|
+ }
|
|
|
+ $options['name_re'] = trim($options['name_re']);
|
|
|
+
|
|
|
+ // If a phylotree ID is not passed in then make sure we have the other
|
|
|
+ // required fields for creating a tree.
|
|
|
+ if (!array_key_exists('phylotree_id', $options)) {
|
|
|
+ if (!array_key_exists('name', $options)) {
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
|
|
|
+ 'The phylotree_id is required for importing the tree.');
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // get the phylotree record.
|
|
|
+ $values = array('phylotree_id' => $options['phylotree_id']);
|
|
|
+ $phylotree = chado_generate_var('phylotree', $values);
|
|
|
+
|
|
|
+ if (!$phylotree) {
|
|
|
+ tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
|
|
|
+ 'Could not find the phylotree using the ID provided: %phylotree_id.',
|
|
|
+ array('%phylotree_id' => $options['phylotree_id']));
|
|
|
+ return FALSE;
|
|
|
+ }
|
|
|
+
|
|
|
+ $transaction = db_transaction();
|
|
|
+ print "\nNOTE: Loading of this tree file is performed using a database transaction. \n" .
|
|
|
+ "If the load fails or is terminated prematurely then the entire set of \n" .
|
|
|
+ "insertions/updates is rolled back and will not be found in the database\n\n";
|
|
|
+ try {
|
|
|
+
|
|
|
+ // Parse the file according to the format indicated.
|
|
|
+ if ($format == 'newick') {
|
|
|
+ // Parse the tree into the expected nested node format.
|
|
|
+ module_load_include('inc', 'tripal_phylogeny', 'includes/parsers/tripal_phylogeny.newick_parser');
|
|
|
+ $tree = tripal_phylogeny_parse_newick_file($file_name);
|
|
|
+
|
|
|
+ // Assign the right and left indecies to the tree ndoes
|
|
|
+ tripal_assign_phylogeny_tree_indices($tree);
|
|
|
+ }
|
|
|
+ // Iterate through the tree nodes and add them to Chado in accordance
|
|
|
+ // with the details in the $options array.
|
|
|
+ tripal_phylogeny_import_tree($tree, $phylotree, $options);
|
|
|
+ }
|
|
|
+ catch (Exception $e) {
|
|
|
+ $transaction->rollback();
|
|
|
+ watchdog_exception('tripal_phylogeny', $e);
|
|
|
+ print "\nFAILED: Rolling back database changes...\n";
|
|
|
+ }
|
|
|
+ print "\nDone Importing Tree.\n";
|
|
|
+}
|