$options['phylotree_id']), array('has_record' => 1)); if (!$exists) { $errors['phylotree_id'] = t('The phylotree_id does not exist.'); return FALSE; } } // Make sure the file exists if one is specified. if (array_key_exists('tree_file', $options) and $options['tree_file']) { // If this is a numeric Drupal file then all is good, no need to check. if (!is_numeric($options['tree_file'])) { if (!file_exists($options['tree_file'])) { $errors['tree_file'] = t('The file provided does not exists.'); return FALSE; } } // Make sure the file format is correct. if (!array_key_exists('format', $options) or ($options['format'] != 'newick' and $options['format'] != 'taxonomy')) { $errors['format'] = t('Please provide a supported file format. Currently only the "newick" file format is supported.'); return FALSE; } // If no leaf type is provided then use the polypeptide term. if (!array_key_exists('leaf_type', $options) or !$options['leaf_type']) { $options['leaf_type'] = 'polypeptide'; } } // Make sure the analysis exists. $analysis = NULL; if (array_key_exists('analysis_id', $options) and $options['analysis_id']) { $analysis = chado_select_record('analysis', array('analysis_id'), array('analysis_id' => $options['analysis_id'])); if (!$analysis) { $errors['analysis_id'] = t('The analysis name provided does not exist.'); return FALSE; } $options['analysis_id'] = $analysis[0]->analysis_id; } if (array_key_exists('analysis', $options) and $options['analysis']) { $analysis = chado_select_record('analysis', array('analysis_id'), array('name' => $options['analysis'])); if (!$analysis) { $errors['analysis'] = t('The analysis ID provided does not exist.'); return FALSE; } $options['analysis_id'] = $analysis[0]->analysis_id; } // Make sure the leaf type exists. $type = NULL; if (array_key_exists('leaf_type', $options) and $options['leaf_type']) { if ($options['leaf_type'] == 'taxonomy') { $values = array( 'cv_id' => array( 'name' => 'EDAM' ), 'name' => 'Species tree' ); $type = chado_select_record('cvterm', array('cvterm_id'), $values); } else { $values = array( 'cv_id' => array( 'name' => 'sequence' ), 'name' => $options['leaf_type'] ); $type = chado_select_record('cvterm', array('cvterm_id'), $values); if (!$type) { $errors['leaf_type'] = t('The leaf_type provided is not a valid Sequence Ontology term: %term.'); return FALSE; } } $options['type_id'] = $type[0]->cvterm_id; } // A Dbxref is required by the phylotree module, but if the // tree was generated in-house and the site admin doens't want to // assign a local dbxref then we will set it to the null db // and the local:null dbxref. if (array_key_exists('dbxref', $options)) { if (!$options['dbxref']) { $options['dbxref'] = 'null:local:null'; } $matches = array(); preg_match('/^(.*?):(.*)$/', $options['dbxref'], $matches); $db_name = $matches[1]; $accession = $matches[2]; $values = array( 'accession' => $accession, 'db_id' => array( 'name' => $db_name ), ); $dbxref = chado_generate_var('dbxref', $values); if (!$dbxref) { $errors['dbxref'] = t('The dbxref provided does not exist in the database: %dbxref.', array('%dbxref' => $dbxref)); return FALSE; } $options['dbxref_id'] = $dbxref->dbxref_id; } // Make sure the tree name is unique. if (array_key_exists('name', $options) and $options['name']) { $sql = " SELECT * FROM {phylotree} P WHERE P.name = :name "; $args = array(':name' => $options['name']); if ($val_type == 'update') { $sql .= " AND NOT P.phylotree_id = :phylotree_id"; $args[':phylotree_id'] = $options['phylotree_id']; } $result = chado_query($sql, $args)->fetchObject(); if ($result) { $errors['name'] = t("The tree name is in use by another tree. Please provide a different unique name for this tree."); } } return TRUE; } /** * Inserts a phylotree record into Chado. * * This function validates the options passed prior to insertion of the record, * and if validation passes then any values in the options array that needed * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have * their approriate primary key values added to the options array. * * @param $options * An array of key value pairs with the following keys required: * 'name': The name of the tree. This will be displayed to users. * 'description: A description about the tree * 'anlaysis_id: The ID of the analysis to which this phylotree should be * associated. * 'analysis': If the analysis_id key is not used then the analysis name * may be provided to identify the analysis to which the tree * should be associated. * 'leaf_type': A sequence ontology term or the word 'organism'. If the * type is 'organism' then this tree represents a * taxonomic tree. The default, if not specified, is the * term 'polypeptide'. * 'tree_file': The path of the file containing the phylogenetic tree to * import or a Drupal managed_file numeric ID. * 'format': The file format. Currently only 'newick is supported'. * * Optional keys: * 'dbxref': A database cross-reference of the form DB:ACCESSION. * Where DB is the database name, which is already present * in Chado, and ACCESSION is the unique identifier for * this tree in the remote database. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of * this field can be a regular expression to pull out * the name of the feature from the node label in the * intput tree. If no value is provided the entire label is * used. * 'match': Set to 'uniquename' if the leaf nodes should be matched * with the feature uniquename. * 'load_now': If set, the tree will be loaded immediately if a tree_file * is provided. Otherwise, the tree will be loaded via * a Tripal jobs call. * 'no_load': If set the tree file will not be loaded. * @param $errors * An empty array where validation error messages will be set. The keys * of the array will be name of the field from the options array and the * value is the error message. * @param $warnings * An empty array where validation warning messagges will be set. The * warnings should not stop an insert or an update but should be provided * to the user as information by a drupal_set_message() if appropriate. The * keys of the array will be name of the field from the options array and the * value is the error message. * @return * TRUE for success and FALSE for failure. * * @ingroup tripal_phylotree_api */ function chado_insert_phylotree(&$options, &$errors, &$warnings) { global $user; $options['name_re'] = trim($options['name_re']); $options['leaf_type'] = trim($options['leaf_type']); $options['name'] = trim($options['name']); $options['format'] = trim($options['format']); $options['tree_file'] = trim($options['tree_file']); // Validate the incoming options. $success = chado_validate_phylotree('insert', $options, $errors, $warnings); if (!$success) { foreach ($errors as $field => $message) { tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message); } return FALSE; } // If we're here then all is good, so add the phylotree record. $values = array( 'analysis_id' => $options['analysis_id'], 'name' => $options['name'], 'dbxref_id' => $options['dbxref_id'], 'comment' => $options['description'], 'type_id' => $options['type_id'], ); $phylotree = chado_insert_record('phylotree', $values); if (!$phylotree) { drupal_set_message(t('Unable to add phylotree.'), 'warning'); tripal_report_error('tripal_phylogeny', TRIPAL_WARNING, 'Insert phylotree: Unable to create phylotree where values: %values', array('%values' => print_r($values, TRUE))); return FALSE; } $phylotree_id = $phylotree['phylotree_id']; $options['phylotree_id'] = $phylotree_id; // If the tree_file is numeric then it is a Drupal managed file and // we want to make the file permanent and associated with the tree. if (is_numeric($options['tree_file'])) { $file = NULL; $file = file_load($options['tree_file']); $file->status = FILE_STATUS_PERMANENT; $file = file_save($file); file_usage_add($file, 'tripal_phylogeny', $options['format'], $phylotree_id); $real_file_path = drupal_realpath($file->uri); } else { $real_file_path = $options['tree_file']; } // If caller has requested to load the file now then do so, otherwise // submit using a Tripal job. if (!array_key_exists('no_load', $options) or !$options['no_load']) { if (array_key_exists('load_now', $options) and $options['load_now']) { $args = array( 'phylotree_id' => $phylotree_id, 'leaf_type' => $options['leaf_type'], 'match' => $options['match'] ? 'uniquename' : 'name', 'name_re' => $options['name_re'], ); chado_phylogeny_import_tree_file($real_file_path, $options['format'], $args); } else { $args = array( $real_file_path, 'newick', array( 'phylotree_id' => $phylotree_id, 'leaf_type' => $options['leaf_type'], 'match' => $options['match'] ? 'uniquename' : 'name', 'name_re' => $options['name_re'], ), ); if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny', 'chado_phylogeny_import_tree_file', $args, $user->uid)) { drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.')); } } } return TRUE; } /** * Updates a phylotree record into Chado. * * This function validates the options passed prior to update of the record * and if validation passes then any values in the options array that needed * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have * their approriate primary key values added to the options array. A Drupal * File object will be added to the options array for the tree file if one * is provided. * * * @param $phylotree_id * The ID of the phylotree to update. * @param $options * An array of key value pairs with the following optional keys: * 'name': The name of the tree. This will be displayed to users. * 'description: A description about the tree * 'anlaysis_id: The ID of the analysis to which this phylotree should be * associated. * 'analysis': If the analysis_id key is not used then the analysis name * may be provided to identify the analysis to which the tree * should be associated. * 'leaf_type': A sequence ontology term or the word 'organism'. If the * type is 'organism' then this tree represents a * taxonomic tree. The default, if not specified, is the * term 'polypeptide'. * 'tree_file': The path of the file containing the phylogenetic tree to * import or a Drupal managed_file numeric ID. * 'format': The file format. Currently only 'newick is supported' * 'dbxref': A database cross-reference of the form DB:ACCESSION. * Where DB is the database name, which is already present * in Chado, and ACCESSION is the unique identifier for * this tree in the remote database. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of * this field can be a regular expression to pull out * the name of the feature from the node label in the * intput tree. If no value is provided the entire label is * used. * 'match': Set to 'uniquename' if the leaf nodes should be matched * with the feature uniquename. * 'load_now': If set, the tree will be loaded immediately if a tree_file * is provided. Otherwise, the tree will be loaded via * a Tripal jobs call. * * @ingroup tripal_phylotree_api */ function chado_update_phylotree($phylotree_id, &$options) { global $user; // Validate the incoming options. $errors = array(); $warnings = array(); $success = chado_validate_phylotree('update', $options, $errors, $warnings); if (!$success) { foreach ($errors as $field => $message) { tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message); } return FALSE; } // If we're here then all is good, so update the phylotree record. $match = array( 'phylotree_id' => $phylotree_id, ); if (array_key_exists('name', $options) and $options['name']) { $values['name'] = $options['name']; } if (array_key_exists('analysis_id', $options) and $options['analysis_id']) { $values['analysis_id'] = $options['analysis_id']; } if (array_key_exists('dbxref_id', $options) and $options['dbxref_id']) { $values['dbxref_id'] = $options['dbxref_id']; } if (array_key_exists('description', $options) and $options['description']) { $values['comment'] = $options['description']; } if (array_key_exists('type_id', $options) and $options['type_id']) { $values['type_id'] = $options['type_id']; } $phylotree = chado_update_record('phylotree', $match, $values, array('return_record' => TRUE)); if (!$phylotree) { drupal_set_message(t('Unable to update phylotree.'), 'warning'); tripal_report_error('tripal_phylogeny', TRIPAL_WARNING, 'Update phylotree: Unable to update phylotree where values: %values', array('%values' => print_r($values, TRUE)) ); } // If we have a tree file, then import the tree. if (array_key_exists('tree_file', $options) and $options['tree_file']) { // Remove any existing nodes chado_delete_record('phylonode', array('phylotree_id' => $options['phylotree_id'])); // Make sure if we already have a file that we remove the old one. $sql = " SELECT FM.fid FROM {file_managed} FM INNER JOIN {file_usage} FU on FM.fid = FU.fid WHERE FU.id = :id and FU.module = 'tripal_phylogeny' "; $fid = db_query($sql, array(':id' => $options['phylotree_id']))->fetchField(); if ($fid) { $file = file_load($fid); file_delete($file, TRUE); } // If the tree_file is numeric then it is a Drupal managed file and // we want to make the file permanent and associated with the tree. if (is_numeric($options['tree_file'])) { $file = file_load($options['tree_file']); $file->status = FILE_STATUS_PERMANENT; $file = file_save($file); file_usage_add($file, 'tripal_phylogeny', 'newick', $options['phylotree_id']); // Add a job to parse the new node tree. $real_file_path = drupal_realpath($file->uri); } else { $real_file_path = $options['tree_file']; } // If caller has requested to load the file now then do so, otherwise // submit using a Tripal job. if (array_key_exists('load_now', $options) and $options['load_now']) { $args = array( 'phylotree_id' => $options['phylotree_id'], 'leaf_type' => $options['leaf_type'], 'match' => $options['match'] ? 'uniquename' : 'name', 'name_re' => $options['name_re'], ); chado_phylogeny_import_tree_file($real_file_path, $options['format'], $args); } else { $args = array( $real_file_path, 'newick', array( 'phylotree_id' => $options['phylotree_id'], 'leaf_type' => $options['leaf_type'], 'match' => $options['match'] ? 'uniquename' : 'name', 'name_re' => $options['name_re'], ), ); if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny', 'chado_phylogeny_import_tree_file', $args, $user->uid)) { drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.')); } } } return TRUE; } /** * Deletes a phylotree record from Chado. * * @param $phylotree_id * * @return * TRUE on success, FALSE on failure. * * @ingroup tripal_phylotree_api */ function chado_delete_phylotree($phylotree_id) { // If we don't have a phylotree id for this node then this isn't a node of // type chado_phylotree or the entry in the chado_phylotree table was lost. if (!$phylotree_id) { tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, 'Please provide a phylotree_id to delete a tree.'); return FALSE; } // Remove the tree $values = array('phylotree_id' => $phylotree_id); return chado_delete_record('phylotree', $values); } /** * Iterates through the tree and sets the left and right indicies. * * @param $tree * The tree array. * @param $index * This parameters is not used when the function is first called. It * is used for recursive calls. * * @ingroup tripal_phylotree_api */ function chado_assign_phylogeny_tree_indices(&$tree, &$index = 1) { // Assign a left and right index to each node. The child node must // have a right and left index less than that of it's parents. We // increment the index by 100 to give space for new nodes that might // be added later. if (array_key_exists('name', $tree)) { $tree['left_index'] = $index += 100; if (array_key_exists('is_leaf', $tree)) { $tree['right_index'] = $index += 100; } } if (array_key_exists('branch_set', $tree)) { foreach ($tree['branch_set'] as $key => $node) { chado_assign_phylogeny_tree_indices($tree['branch_set'][$key], $index); $tree['right_index'] = $index += 100; } } } /** * Iterates through the tree array and creates phylonodes in Chado. * * The function iterates through the tree in a top-down approach adding * parent internal nodes prior to leaf nodes. Each node of the tree should have * the following fields: * * -name: The name (or label) for this node. * -depth: The depth of the node in the tree. * -is_root: Set to 1 if this node is a root node. * -is_leaf: Set to 1 if this node is a leaf node. * -is_internal: Set to 1 if this node is an internal node. * -left_index: The index of the node to the left in the tree. * -right_index: The index of the node to the right in the tree. * -branch_set: An array containing a list of nodes of that are children * of the node. * -parent: The name of the parent node. * -organism_id: The organism_id for associtating the node with an organism. * -properties: An array of key/value pairs where the key is the cvterm_id * and the value is the property value. These properties * will be assocaited with the phylonode. * * Prior to importing the tree the indicies can be set by using the * chado_assign_phylogeny_tree_indices() function. * * @param $tree * The tree array. * @param $phylotree. * The phylotree object (from Chado). * @param $options * The options provide some direction for how the tree is imported. The * following keys can be used: * -taxonomy: Set to 1 if this tree is a taxonomic tree. Set to 0 * otherwise. * -leaf_type: Set to the leaf type name. If this is a non-taxonomic tree * that is associated with features, then this should be the * Sequence Ontology term for the feature (e.g. polypeptide). * If this is a taxonomic tree then this option is not needed. * -match: Set to either 'name' or 'uniquename'. This is used for * matching the feature name or uniquename with the node name. * This is not needed for taxonomic trees. * -match_re: Set to a regular that can be used for matching the node * name with the feature name if the node name is not * identical to the feature name. * @param $vocab * Optional. An array containing a set of key/value pairs that maps node * types to CV terms. The keys must be 'root', 'internal' or 'leaf'. If * no vocab is provded then the terms provided by the tripal_phylogeny * CV will be used. * @param $parent * This argument is not needed when the funtion is first called. This * function is recursive and this argument is used on recursive calls. * * @ingroup tripal_phylotree_api */ function chado_phylogeny_import_tree(&$tree, $phylotree, $options, $vocab = array(), $parent = NULL) { // Get the vocabulary terms used to describe nodes in the tree if one // wasn't provided. if (count($vocab) == 0) { $vocab = chado_phylogeny_get_node_types_vocab(); } if (is_array($tree) and array_key_exists('name', $tree)) { $values = array( 'phylotree_id' => $phylotree->phylotree_id, 'left_idx' => $tree['left_index'], 'right_idx' => $tree['right_index'], ); // Add in any optional values to the $values array if they are present. if (!empty($tree['name']) and $tree['name'] != '') { $values['label'] = $tree['name']; } if (!empty($tree['length']) and $tree['length'] != '') { $values['distance'] = $tree['length']; } // Set the type of node. if ($tree['is_root']) { $values['type_id'] = $vocab['root']->cvterm_id; } else if ($tree['is_internal']) { $values['type_id'] = $vocab['internal']->cvterm_id; $values['parent_phylonode_id'] = $parent['phylonode_id']; // TOOD: a feature may be associated here but it is recommended that it // be a feature of type SO:match and should represent the alignment of // all features beneath it. } else if ($tree['is_leaf']) { $values['type_id'] = $vocab['leaf']->cvterm_id; $values['parent_phylonode_id'] = $parent['phylonode_id']; // Match this leaf node with an organism or feature depending on the // type of tree. But we can't do that if we don't have a name. if (!empty($tree['name']) and $tree['name'] != '') { if (!$options['taxonomy']) { // This is a sequence-based tree. Try to match leaf nodes with // features. // First, Get the Name and uniquename for the feature. $matches = array(); $sel_values = array(); if ($options['match'] == "name") { $sel_values['name'] = $tree['name']; $re = $options['name_re']; if (preg_match("/$re/", $tree['name'], $matches)) { $sel_values['name'] = $matches[1]; } } else { $sel_values['uniquename'] = $tree['name']; $re = $options['name_re']; if (preg_match("/$re/", $tree['name'], $matches)) { $sel_values['uniquename'] = $matches[1]; } } $sel_values['type_id'] = array( 'name' => $options['leaf_type'], 'cv_id' => array( 'name' => 'sequence' ), ); $sel_columns = array('feature_id'); $feature = chado_select_record('feature', $sel_columns, $sel_values); if (count($feature) > 1) { // Found multiple features, cannot make an association. } else if (count($feature) == 1) { $values['feature_id'] = $feature[0]->feature_id; } else { // Could not find a feature that matches the name or uniquename } } } } // Insert the new node and then add it's assigned phylonode_id to the node. $phylonode = chado_insert_record('phylonode', $values); $tree['phylonode_id'] = $phylonode['phylonode_id']; // This is a taxonomic tree, so assocaite this node with an // organism if one is provided. if (array_key_exists('organism_id', $tree)) { $values = array( 'phylonode_id' => $tree['phylonode_id'], 'organism_id' => $tree['organism_id'] ); $pylonode_organism = chado_insert_record('phylonode_organism', $values); } // Associate any properties. if (array_key_exists('properties', $tree)) { foreach ($tree['properties'] as $type_id => $value) { $values = array( 'phylonode_id' => $tree['phylonode_id'], 'type_id' => $type_id, 'value' => $value, ); $pylonode_organism = chado_insert_record('phylonodeprop', $values); } } } if (is_array($tree) and array_key_exists('branch_set', $tree)) { foreach ($tree['branch_set'] as $key => $node) { chado_phylogeny_import_tree($tree['branch_set'][$key], $phylotree, $options, $vocab, $tree); } } } /** * Get the vocabulary terms used to describe nodes in the tree. * * @return * Array of vocab info or FALSE on failure. * * @ingroup tripal_phylotree_api */ function chado_phylogeny_get_node_types_vocab() { // Get the vocabulary terms used to describe nodes in the tree. $values = array( 'name' => 'phylo_leaf', 'cv_id' => array( 'name' => 'tripal_phylogeny', ), ); $leaf = chado_generate_var('cvterm', $values); if (!$leaf) { tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, "Could not find the leaf vocabulary term: 'phylo_leaf'. It should " . "already be present as part of the tripal_phylogeny vocabulary."); return FALSE; } $values['name'] = 'phylo_interior'; $internal = chado_generate_var('cvterm', $values); if (!$internal) { tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, "Could not find the leaf vocabulary term: 'phylo_interior'. It should " . "already be present as part of the tripal_phylogeny vocabulary."); return FALSE; } $values['name'] = 'phylo_root'; $root = chado_generate_var('cvterm', $values); if (!$root) { tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, "Could not find the leaf vocabulary term: 'phylo_root'. It should " . "already be present as part of the tripal_phylogeny vocabulary."); return FALSE; } $vocab = array( 'leaf' => $leaf, 'internal' => $internal, 'root' => $root, ); return $vocab; } /** * Imports a tree file. * * This function is used as a wrapper for loading a phylogenetic tree using * any number of file loaders. * * @param $file_name * The name of the file containing the phylogenetic tree to import. * @param $format * The format of the file. Currently only the 'newick' file format is * supported. * @param $options * Options if the phylotree record already exists: * 'phylotree_id': The imported nodes will be associated with this tree. * 'leaf_type': A sequence ontology term or the word 'organism'. If the * type is 'organism' then this tree represents a * taxonomic tree. The default, if not specified, is the * term 'polypeptide'. * 'name_re': If the leaf type is NOT 'taxonomy', then the value of * this field can be a regular expression to pull out * the name of the feature from the node label in the * intput tree. If no value is provided the entire label is * used. * 'match': Set to 'uniquename' if the leaf nodes should be matched * with the feature uniquename. * * @ingroup tripal_phylotree_api */ function chado_phylogeny_import_tree_file($file_name, $format, $options = array(), $job_id = NULL) { // Set some option details. if (!array_key_exists('leaf_type', $options)) { $options['leaf_type'] = 'polypeptide'; } if (!array_key_exists('match', $options)) { $options['match'] = 'name'; } if (!array_key_exists('name_re', $options)) { $options['name_re'] = '^(.*)$'; } $options['name_re'] = trim($options['name_re']); // If a phylotree ID is not passed in then make sure we have the other // required fields for creating a tree. if (!array_key_exists('phylotree_id', $options)) { if (!array_key_exists('name', $options)) { tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, 'The phylotree_id is required for importing the tree.'); return FALSE; } } // Get the phylotree record. $values = array('phylotree_id' => $options['phylotree_id']); $phylotree = chado_generate_var('phylotree', $values); if (!$phylotree) { tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, 'Could not find the phylotree using the ID provided: %phylotree_id.', array('%phylotree_id' => $options['phylotree_id'])); return FALSE; } $transaction = db_transaction(); print "\nNOTE: Loading of this tree file is performed using a database transaction. \n" . "If the load fails or is terminated prematurely then the entire set of \n" . "insertions/updates is rolled back and will not be found in the database\n\n"; try { // Parse the file according to the format indicated. if ($format == 'newick') { // Parse the tree into the expected nested node format. module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.phylotree_newick'); $tree = tripal_phylogeny_parse_newick_file($file_name); // Assign the right and left indecies to the tree ndoes. chado_assign_phylogeny_tree_indices($tree); } // Iterate through the tree nodes and add them to Chado in accordance // with the details in the $options array. chado_phylogeny_import_tree($tree, $phylotree, $options); } catch (Exception $e) { $transaction->rollback(); watchdog_exception('tripal_phylogeny', $e); } }