Browse Source

Added NCBI Taxonomy importer. Phylotree importer added too but it needs some work

Stephen Ficklin 8 years ago
parent
commit
cac91404aa

+ 0 - 24
legacy/tripal_organism/tripal_organism.install

@@ -27,9 +27,6 @@ function tripal_organism_disable() {
  * @ingroup tripal_organism
  */
 function tripal_organism_install() {
-  // cvs & cvterms
-  tripal_organism_add_cvs();
-  tripal_organism_add_cvterms();
 
   // set the default vocabularies
   tripal_set_default_cv('organismprop', 'type_id', 'organism_property');
@@ -102,24 +99,3 @@ function tripal_organism_requirements($phase) {
   }
   return $requirements;
 }
-
-/**
- * Add cvterms related to organisms
- *
- * @ingroup tripal_organism
- */
-function tripal_organism_add_cvs() {
-  tripal_insert_cv(
-    'organism_property',
-    'Contains properties for organisms'
-  );
-}
-
-/**
- * Add cvterms pertaining to organisms
- *
- * @ingroup tripal_organism
- */
-function tripal_organism_add_cvterms() {
-
-}

+ 0 - 528
legacy/tripal_phylogeny/api/tripal_phylogeny.api.inc

@@ -1,528 +0,0 @@
-<?php
-
-/**
- * Validates an $options array for insert or update of a phylotree record.
- *
- * If validation passes then any values that needed validation lookups
- * (such as the dbxref, analysis, leaf_type, etc) will have their approriate
- * primary_keys added to the $options array, and missing default values
- * will also be added.
- *
- * @param $val_type
- *   The type of validation. Can be either 'insert' or 'update'.
- * @param $options
- *   An array of key/value pairs containing any of the valid keys for
- *   either the tripal_insert_phylotree() or tripal_update_phylotree()
- *   functions.
- * @param $errors
- *   An empty array where validation error messages will be set. The keys
- *   of the array will be name of the field from the options array and the
- *   value is the error message.
- * @param $warnings
- *   An empty array where validation warning messagges will be set. The
- *   warnings should not stop an insert or an update but should be provided
- *   to the user as information by a drupal_set_message() if appropriate. The
- *   keys of the array will be name of the field from the options array and the
- *   value is the error message.
- * @return
- *   If validation failes then FALSE is returned.  Any options that do not pass
- *   validation checks will be added in the $errors array with the key being
- *   the option and the value being the error message.  If validation
- *   is successful then TRUE is returned.
- *
- */
-function tripal_validate_phylotree($val_type, &$options, &$errors, &$warnings) {
-
-  if ($val_type != 'insert' and $val_type != 'update') {
-    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, "The $val_type argument must be either 'update or 'insert'.");
-  }
-
-  // Set Defaults.
-  if ($val_type == 'insert') {
-    // Match by feature name.
-    if (!array_key_exists('match', $options)) {
-      $options['match'] = 'name';
-    }
-    // The regular expression is to match the entire node name.
-    if (!array_key_exists('name_re', $options)) {
-      $options['name_re'] = '^(.*)$';
-    }
-    // A dbxref is not required by Tripal but is required by the database
-    // field in the phylotree table.  Therefore, if the dbxref is not provided
-    // we can set this to be the null database and null dbxref which
-    // is represented as 'null:local:null'
-    if (!array_key_exists('dbxref', $options)) {
-      $options['dbxref'] = "null:local:null";
-    }
-  }
-
-  // Make sure required values are set.
-  if ($val_type == 'insert') {
-    if (!array_key_exists('name', $options)) {
-      $errors['name'] = t('Please provide the name of the tree.');
-      return FALSE;
-    }
-    if (!array_key_exists('description', $options)) {
-      $errors['description'] = t('Please provide a description for this tree.');
-      return FALSE;
-    }
-    if (!array_key_exists('analysis', $options) and !array_key_exists('analysis_id', $options)) {
-      $errors['analysis'] = t('Please provide an analysis or analysis_id for this tree.');
-      return FALSE;
-    }
-    if (!array_key_exists('tree_file', $options)) {
-      $errors['tree_file'] = t('Please provide either the full path to the tree_file or a Drupal managed file ID number.');
-      return FALSE;
-    }
-    if (!array_key_exists('format', $options) or !$options['format']) {
-      $errors['format'] = t('Please provide a file format for the tree file.');
-      return FALSE;
-    }
-    // Make sure the file format is correct
-    if ($options['format'] != 'newick' and $options['format'] != 'taxonomy') {
-      $errors['format'] = t('The file format is not supported. Currently only the "newick" file format is supported.');
-      return FALSE;
-    }
-  }
-  else {
-    // Does the phylotree ID exist and is it valid
-    if (!array_key_exists('phylotree_id', $options)) {
-      $errors['phylotree_id'] = t('Please provide the ID for the tree.');
-      return FALSE;
-    }
-    $exists = chado_select_record('phylotree', array('phylotree_id'),
-        array('phylotree_id' => $options['phylotree_id']), array('has_record' => 1));
-    if (!$exists) {
-      $errors['phylotree_id'] = t('The phylotree_id does not exist.');
-      return FALSE;
-    }
-
-  }
-
-  // Make sure the file exists if one is specified
-  if (array_key_exists('tree_file', $options) and $options['tree_file']) {
-    // If this is a numeric Drupal file then all is good, no need to check.
-    if (!is_numeric($options['tree_file'])) {
-      if (!file_exists($options['tree_file'])) {
-        $errors['tree_file'] = t('The file provided does not exists.');
-        return FALSE;
-      }
-    }
-    // Make sure the file format is correct
-    if (!array_key_exists('format', $options) or
-        ($options['format'] != 'newick' and $options['format'] != 'taxonomy')) {
-      $errors['format'] = t('Please provide a supported file format. Currently only the "newick" file format is supported.');
-      return FALSE;
-    }
-
-    // If no leaf type is provided then use the polypeptide term.
-    if (!array_key_exists('leaf_type', $options) or !$options['leaf_type']) {
-      $options['leaf_type'] = 'polypeptide';
-    }
-  }
-
-  // Make sure the analysis exists.
-  $analysis = NULL;
-  if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
-    $analysis = chado_select_record('analysis', array('analysis_id'), array('analysis_id' => $options['analysis_id']));
-    if (!$analysis) {
-      $errors['analysis_id'] = t('The analysis name provided does not exist.');
-      return FALSE;
-    }
-    $options['analysis_id'] = $analysis[0]->analysis_id;
-  }
-  if (array_key_exists('analysis', $options) and $options['analysis']) {
-    $analysis = chado_select_record('analysis', array('analysis_id'), array('name' => $options['analysis']));
-    if (!$analysis) {
-      $errors['analysis'] = t('The analysis ID provided does not exist.');
-      return FALSE;
-    }
-    $options['analysis_id'] = $analysis[0]->analysis_id;
-  }
-
-  // Make sure the leaf type exists.
-  $type = NULL;
-  if (array_key_exists('leaf_type', $options) and $options['leaf_type']) {
-    if ($options['leaf_type'] == 'taxonomy') {
-      $values = array(
-        'cv_id' => array(
-           'name' => 'tripal_phylogeny'
-        ),
-        'name' => 'taxonomy'
-      );
-      $type = chado_select_record('cvterm', array('cvterm_id'), $values);
-    }
-    else {
-      $values = array(
-        'cv_id' => array(
-           'name' => 'sequence'
-        ),
-        'name' => $options['leaf_type']
-      );
-      $type = chado_select_record('cvterm', array('cvterm_id'), $values);
-      if (!$type) {
-        $errors['leaf_type'] = t('The leaf_type provided is not a valid Sequence Ontology term: %term.');
-        return FALSE;
-      }
-    }
-    $options['type_id'] = $type[0]->cvterm_id;
-  }
-
-  // A Dbxref is required by the phylotree module, but if the
-  // tree was generated in-house and the site admin doens't want to
-  // assign a local dbxref then we will set it to the null db
-  // and the local:null dbxref.
-  if (array_key_exists('dbxref', $options)) {
-    if (!$options['dbxref']) {
-        $options['dbxref'] = 'null:local:null';
-    }
-    $matches = array();
-    preg_match('/^(.*?):(.*)$/', $options['dbxref'], $matches);
-    $db_name = $matches[1];
-    $accession = $matches[2];
-    $values = array(
-      'accession' => $accession,
-      'db_id' => array(
-        'name' => $db_name
-      ),
-    );
-    $dbxref = chado_generate_var('dbxref', $values);
-    if (!$dbxref) {
-      $errors['dbxref'] = t('The dbxref provided does not exist in the database: %dbxref.', array('%dbxref' => $dbxref));
-      return FALSE;
-    }
-    $options['dbxref_id'] = $dbxref->dbxref_id;
-  }
-
-  // Make sure the tree name is unique
-  if (array_key_exists('name', $options) and $options['name']) {
-    $sql = "
-      SELECT *
-      FROM {phylotree} P
-      WHERE
-        P.name = :name
-    ";
-    $args = array(':name' => $options['name']);
-    if ($val_type == 'update') {
-      $sql .= " AND NOT P.phylotree_id = :phylotree_id";
-      $args[':phylotree_id'] = $options['phylotree_id'];
-    }
-    $result = chado_query($sql, $args)->fetchObject();
-    if ($result) {
-      $errors['name'] = t("The tree name is in use by another tree. Please provide a different unique name for this tree.");
-    }
-  }
-
-  return TRUE;
-}
-/**
- * Inserts a phylotree record into Chado.
- *
- * This function validates the options passed prior to insertion of the record,
- * and if validation passes then any values in the options array that needed
- * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
- * their approriate primary key values added to the options array.
- *
- * @param $options
- *  An array of key value pairs with the following keys required:
- *     'name':       The name of the tree. This will be displayed to users.
- *     'description: A description about the tree
- *     'anlaysis_id: The ID of the analysis to which this phylotree should be
- *                   associated.
- *     'analysis':   If the analysis_id key is not used then the analysis name
- *                   may be provided to identify the analysis to which the tree
- *                   should be associated.
- *     'leaf_type':  A sequence ontology term or the word 'organism'. If the
- *                   type is 'organism' then this tree represents a
- *                   taxonomic tree.  The default, if not specified, is the
- *                   term 'polypeptide'.
- *     'tree_file':  The path of the file containing the phylogenetic tree to
- *                   import or a Drupal managed_file numeric ID.
- *     'format':     The file format. Currently only 'newick is supported'
- *  Optional keys:
- *     'dbxref':     A database cross-reference of the form DB:ACCESSION.
- *                   Where DB is the database name, which is already present
- *                   in Chado, and ACCESSION is the unique identifier for
- *                   this tree in the remote database.
- *     'name_re':    If the leaf type is NOT 'taxonomy', then the value of
- *                   this field can be a regular expression to pull out
- *                   the name of the feature from the node label in the
- *                   intput tree. If no value is provided the entire label is
- *                   used.
- *     'match':      Set to 'uniquename' if the leaf nodes should be matched
- *                   with the feature uniquename.
- *     'load_now':   If set, the tree will be loaded immediately if a tree_file
- *                   is provided. Otherwise, the tree will be loaded via
- *                   a Tripal jobs call.
- *     'no_load':    If set the tree file will not be loaded.
- * @param $errors
- *   An empty array where validation error messages will be set. The keys
- *   of the array will be name of the field from the options array and the
- *   value is the error message.
- * @param $warnings
- *   An empty array where validation warning messagges will be set. The
- *   warnings should not stop an insert or an update but should be provided
- *   to the user as information by a drupal_set_message() if appropriate. The
- *   keys of the array will be name of the field from the options array and the
- *   value is the error message.
- * @return
- *   TRUE for success and FALSE for failure.
- */
-function tripal_insert_phylotree(&$options, &$errors, &$warnings) {
-  global $user;
-
-  $options['name_re'] = trim($options['name_re']);
-  $options['leaf_type'] = trim($options['leaf_type']);
-  $options['name'] = trim($options['name']);
-  $options['format'] = trim($options['format']);
-  $options['tree_file'] = trim($options['tree_file']);
-
-  // Validate the incoming options.
-  $success = tripal_validate_phylotree('insert', $options, $errors, $warnings);
-  if (!$success) {
-    foreach ($errors as $field => $message) {
-      tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
-    }
-    return FALSE;
-  }
-
-  // If we're here then all is good, so add the phylotree record.
-  $values = array(
-    'analysis_id' => $options['analysis_id'],
-    'name' => $options['name'],
-    'dbxref_id' => $options['dbxref_id'],
-    'comment' => $options['description'],
-    'type_id' => $options['type_id'],
-  );
-  $phylotree = chado_insert_record('phylotree', $values);
-  if (!$phylotree) {
-    drupal_set_message(t('Unable to add phylotree.'), 'warning');
-    tripal_report_error('tripal_phylogeny', TRIPAL_WARNING, 'Insert phylotree: Unable to create phylotree where values: %values',
-        array('%values' => print_r($values, TRUE)));
-    return FALSE;
-  }
-  $phylotree_id = $phylotree['phylotree_id'];
-  $options['phylotree_id'] = $phylotree_id;
-
-  // If the tree_file is numeric then it is a Drupal managed file and
-  // we want to make the file permanent and associated with the tree.
-  if (is_numeric($options['tree_file'])) {
-    $file = NULL;
-    $file = file_load($options['tree_file']);
-    $file->status = FILE_STATUS_PERMANENT;
-    $file = file_save($file);
-    file_usage_add($file, 'tripal_phylogeny', $options['format'], $phylotree_id);
-    $real_file_path = drupal_realpath($file->uri);
-  }
-  else {
-    $real_file_path = $options['tree_file'];
-  }
-
-  // If caller has requested to load the file now then do so, otherwise
-  // submit using a Tripal job.
-  if (!array_key_exists('no_load', $options) or !$options['no_load']) {
-    if (array_key_exists('load_now', $options) and $options['load_now']) {
-      $args = array(
-        'phylotree_id' => $phylotree_id,
-        'leaf_type' => $options['leaf_type'],
-        'match' => $options['match'] ? 'uniquename' : 'name',
-        'name_re' => $options['name_re'],
-      );
-      tripal_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
-    }
-    else {
-      $args = array(
-          $real_file_path,
-          'newick',
-          array(
-            'phylotree_id' => $phylotree_id,
-            'leaf_type' => $options['leaf_type'],
-            'match' => $options['match'] ? 'uniquename' : 'name',
-            'name_re' => $options['name_re'],
-          ),
-      );
-      if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
-          'tripal_phylogeny_import_tree_file', $args, $user->uid)) {
-          drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
-      }
-    }
-  }
-
-  return TRUE;
-}
-
-/**
- * Updates a phylotree record into Chado.
- *
- * This function validates the options passed prior to update of the record
- * and if validation passes then any values in the options array that needed
- * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
- * their approriate primary key values added to the options array. A Drupal
- * File object will be added to the options array for the tree file if one
- * is provided.
- *
- *
- * @param $phylotree_id
- *   The ID of the phylotree to update.
- * @param $options
- *  An array of key value pairs with the following optional keys:
- *     'name':       The name of the tree. This will be displayed to users.
- *     'description: A description about the tree
- *     'anlaysis_id: The ID of the analysis to which this phylotree should be
- *                   associated.
- *     'analysis':   If the analysis_id key is not used then the analysis name
- *                   may be provided to identify the analysis to which the tree
- *                   should be associated.
- *     'leaf_type':  A sequence ontology term or the word 'organism'. If the
- *                   type is 'organism' then this tree represents a
- *                   taxonomic tree.  The default, if not specified, is the
- *                   term 'polypeptide'.
- *     'tree_file':  The path of the file containing the phylogenetic tree to
- *                   import or a Drupal managed_file numeric ID.
- *     'format':     The file format. Currently only 'newick is supported'
- *     'dbxref':     A database cross-reference of the form DB:ACCESSION.
- *                   Where DB is the database name, which is already present
- *                   in Chado, and ACCESSION is the unique identifier for
- *                   this tree in the remote database.
- *     'name_re':    If the leaf type is NOT 'taxonomy', then the value of
- *                   this field can be a regular expression to pull out
- *                   the name of the feature from the node label in the
- *                   intput tree. If no value is provided the entire label is
- *                   used.
- *     'match':      Set to 'uniquename' if the leaf nodes should be matched
- *                   with the feature uniquename.
- *     'load_now':   If set, the tree will be loaded immediately if a tree_file
- *                   is provided. Otherwise, the tree will be loaded via
- *                   a Tripal jobs call.
- */
-function tripal_update_phylotree($phylotree_id, &$options) {
-  global $user;
-
-  // Validate the incoming options.
-  $errors = array();
-  $warnings = array();
-  $success = tripal_validate_phylotree('update', $options, $errors, $warnings);
-  if (!$success) {
-    foreach ($errors as $field => $message) {
-      tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
-    }
-    return FALSE;
-  }
-
-  // If we're here then all is good, so update the phylotree record.
-  $match = array(
-      'phylotree_id' => $phylotree_id,
-  );
-  if (array_key_exists('name', $options) and $options['name']) {
-    $values['name'] = $options['name'];
-  }
-  if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
-    $values['analysis_id'] = $options['analysis_id'];
-  }
-  if (array_key_exists('dbxref_id', $options) and $options['dbxref_id']) {
-    $values['dbxref_id'] = $options['dbxref_id'];
-  }
-  if (array_key_exists('description', $options) and $options['description']) {
-    $values['comment'] = $options['description'];
-  }
-  if (array_key_exists('type_id', $options) and $options['type_id']) {
-    $values['type_id'] = $options['type_id'];
-  }
-
-  $phylotree = chado_update_record('phylotree', $match, $values, array('return_record' => TRUE));
-  if (!$phylotree) {
-    drupal_set_message(t('Unable to update phylotree.'), 'warning');
-    tripal_report_error('tripal_phylogeny', TRIPAL_WARNING,
-        'Update phylotree: Unable to update phylotree where values: %values',
-        array('%values' => print_r($values, TRUE))
-    );
-  }
-
-  // If we have a tree file, then import the tree
-  if (array_key_exists('tree_file', $options) and $options['tree_file']) {
-
-    // Remove any existing nodes
-    chado_delete_record('phylonode', array('phylotree_id' => $options['phylotree_id']));
-
-    // Make sure if we already have a file that we remove the old one.
-    $sql = "
-      SELECT FM.fid
-      FROM {file_managed} FM
-        INNER JOIN {file_usage} FU on FM.fid = FU.fid
-      WHERE FU.id = :id and FU.module = 'tripal_phylogeny'
-    ";
-    $fid = db_query($sql, array(':id' => $options['phylotree_id']))->fetchField();
-    if ($fid) {
-      $file = file_load($fid);
-      file_delete($file, TRUE);
-    }
-
-    // If the tree_file is numeric then it is a Drupal managed file and
-    // we want to make the file permanent and associated with the tree.
-    if (is_numeric($options['tree_file'])) {
-      $file = file_load($options['tree_file']);
-      $file->status = FILE_STATUS_PERMANENT;
-      $file = file_save($file);
-      file_usage_add($file, 'tripal_phylogeny', 'newick', $options['phylotree_id']);
-
-      // Add a job to parse the new node tree.
-      $real_file_path = drupal_realpath($file->uri);
-    }
-    else {
-      $real_file_path = $options['tree_file'];
-    }
-
-    // If caller has requested to load the file now then do so, otherwise
-    // submit using a Tripal job.
-    if (array_key_exists('load_now', $options) and $options['load_now']) {
-      $args = array(
-        'phylotree_id' => $options['phylotree_id'],
-        'leaf_type' => $options['leaf_type'],
-        'match' => $options['match'] ? 'uniquename' : 'name',
-        'name_re' => $options['name_re'],
-      );
-      tripal_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
-    }
-    else {
-      $args = array(
-        $real_file_path,
-        'newick',
-        array(
-          'phylotree_id' => $options['phylotree_id'],
-          'leaf_type' => $options['leaf_type'],
-          'match' => $options['match'] ? 'uniquename' : 'name',
-          'name_re' => $options['name_re'],
-        ),
-      );
-      if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
-          'tripal_phylogeny_import_tree_file', $args, $user->uid)) {
-        drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
-      }
-    }
-  }
-
-  return TRUE;
-}
-
-/**
- * Deletes a phylotree record from Chado.
- *
- * @param $phylotree_id
- *
- * @return
- *   TRUE on success, FALSE on failure.
- */
-function tripal_delete_phylotree($phylotree_id) {
-
-  // if we don't have a phylotree id for this node then this isn't a node of
-  // type chado_phylotree or the entry in the chado_phylotree table was lost.
-  if (!$phylotree_id) {
-    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
-        'Please provide a phylotree_id to delete a tree.');
-    return FALSE;
-  }
-
-  // Remove the tree
-  $values = array('phylotree_id' => $phylotree_id);
-  return chado_delete_record('phylotree', $values);
-}

+ 0 - 328
legacy/tripal_phylogeny/includes/parsers/tripal_phylogeny.newick_parser.inc

@@ -1,331 +1,3 @@
 <?php
 
-/**
- * This code parses the grammer for the Newick format as per the following
- * grammar:
- * 
- *  Tree --> Subtree ";" | Branch ";"
- *  Subtree --> Leaf | Internal
- *  Leaf --> Name
- *  Internal --> "(" BranchSet ")" Name
- *  BranchSet --> Branch | BranchSet "," Branch
- *  Branch --> Subtree Length
- *  Name --> empty | string
- *  Length --> empty | ":" number
- * 
- */
 
-/**
- * 
- * @param unknown $file_name
- */
-function tripal_phylogeny_parse_newick_file($file_name) {
-  
-  // Initialize the bootstrap value and index
-  global $tripal_phylogeny_bootstrap;
-  
-  $tripal_phylogeny_bootstrap = 1;
-  $tripal_phylogeny_index = 1;
-  
-  $tree = array();
-  
-  $fp = fopen($file_name, 'r');
-  if ($fp) {
-    $tree = tripal_phylogeny_parse_newick_tree($fp);
-  }
-  else {
-    // ERROR
-  }
-  return $tree;
-}
-/**
- * 
- * @param unknown $fp
- * @param number $depth
- * @return boolean
- */
-function tripal_phylogeny_parse_newick_tree($fp, $depth = 0) {
-  
-  $subtree = tripal_phylogeny_parse_newick_subtree($fp, $depth);
-  $subtree['is_root'] = 1;
-
-  // this subtree may also be a branch. A branch is a subtree with a length,
-  // so see if there is a length
-  $token = tripal_phylogeny_parse_newick_get_token($fp);
-  if ($token == ";") {
-    // we're done!
-    return $subtree;
-  }
-  tripal_phylogeny_parse_newick_replace_token($fp);
-  
-  // Get the length.
-  $length = tripal_phylogeny_parse_newick_length($fp, $depth);
-  $subtree['length'] = $length;
-  
-  // Now if we're missing the semicolon we have a syntax error.
-  $token = tripal_phylogeny_parse_newick_get_token($fp);
-  if ($token != ';') {
-    print "Syntax Error: missing trailing semicolon.\n";
-    exit;
-  }
- 
-  return $subtree;
-}
-
-/**
- * 
- * @param unknown $fp
- * @param unknown $depth
- * @return Ambigous|unknown
- */
-function tripal_phylogeny_parse_newick_subtree($fp, $depth) {
-  
-  $internal = tripal_phylogeny_parse_newick_internal($fp, $depth + 1);
-  if (!is_array($internal)) {
-    $leaf_node = tripal_phylogeny_parse_newick_leaf($fp, $depth);
-    return array(
-      'name' => $leaf_node,
-      'depth' => $depth,
-      'is_leaf' => TRUE,
-      'descendents' => 0,
-    );
-  }
-  else {
-    $internal['depth'] = $depth;
-  }
-  return $internal;
-}
-
-/**
- * 
- * @param unknown $fp
- * @param unknown $depth
- * @return boolean|multitype:unknown Ambigous <Ambigous, unknown>
- */
-function tripal_phylogeny_parse_newick_branch($fp, $depth) {
-
-  $subtree = tripal_phylogeny_parse_newick_subtree($fp, $depth);
-  $length = tripal_phylogeny_parse_newick_length($fp, $depth);
-
-  $subtree['length'] = $length;
-  return $subtree;
-}
-/**
- * 
- * @param unknown $fp
- * @param unknown $parent
- * @param unknown $depth
- */
-function tripal_phylogeny_parse_newick_internal($fp, $depth) {
-
-  // If the next character is not an open paren then this is an internal node
-  if (tripal_phylogeny_parse_newick_get_token($fp) != '(') {
-    tripal_phylogeny_parse_newick_replace_token($fp);
-    return FALSE;
-  }
-  
-  $branches = tripal_phylogeny_parse_newick_branchset($fp, $depth);
-  if (!is_array($branches)) {
-    return FALSE;
-  }
-  // If we don't have a closing parent then this is a syntax error.
-  if (tripal_phylogeny_parse_newick_get_token($fp) != ')') {
-    tripal_phylogeny_parse_newick_replace_token($fp);
-    return FALSE;
-  }
-  $internal_node = tripal_phylogeny_parse_newick_name($fp, $depth);
-  $descendent_count = 0;
-  for ($i = 0; $i < count($branches); $i++) {
-    $branches[$i]['parent'] = $internal_node;
-    $descendent_count += 1 + $branches[$i]['descendents'];
-  }
-
-  return array(
-    'name' => $internal_node,
-    'depth' => $depth,
-    'branch_set' => $branches,
-    'is_internal' => TRUE,
-    'descendents' => $descendent_count,
-  );
-}
-
-/**
- * 
- * @param unknown $fp
- * @param unknown $parent
- * @param unknown $depth
- */
-function tripal_phylogeny_parse_newick_branchset($fp, $depth) {
-  $branches = array();
-  
-  $num_read = 0;
-  $branch = tripal_phylogeny_parse_newick_branch($fp, $depth);
-  $branches[] = $branch;
-
-  // If it's not a branch then return false, a branchset will
-  // always appear as a branch.
-  if (!is_array($branch)) {
-    return FALSE;
-  }
-
-  // If we have a comma as the next token then this is
-  // a branchset and we should recurse. 
-  $token = tripal_phylogeny_parse_newick_get_token($fp);
-  if ($token == ',') {
-    $rbranches = tripal_phylogeny_parse_newick_branchset($fp, $depth);
-    foreach ($rbranches as $branch) {
-      $branches[] = $branch;
-    }
-  }
-  else {
-    tripal_phylogeny_parse_newick_replace_token($fp);
-  }
-
-  return $branches;
-}
-/**
- * 
- * @param unknown $fp
- * @param unknown $depth
- * @return Ambigous <string, boolean, unknown>
- */
-function tripal_phylogeny_parse_newick_leaf($fp, $depth) {
-  return tripal_phylogeny_parse_newick_name($fp, $depth);
-}
-/**
- * 
- * @param unknown $fp
- * @param unknown $depth
- * @return string|boolean|Ambigous <string, unknown>
- */
-function tripal_phylogeny_parse_newick_name($fp, $depth) {
-  global $tripal_phylogeny_bootstrap;
-
-  $token = tripal_phylogeny_parse_newick_get_token($fp);
-  
-  // If the next token is a colon, semicolon, close paren, or comma 
-  // then the name is empty.
-  if ($token == ':' or $token == ',' or $token == ';' or $token == ')') {
-    tripal_phylogeny_parse_newick_replace_token($fp);
-    // create a bootstrap value
-    return $tripal_phylogeny_bootstrap++;
-  }
-  
-  // If the next token is an open paren then this is a syntax error:
-  if ($token == '(') {
-    tripal_phylogeny_parse_newick_replace_token($fp);
-    return FALSE;
-  }
-  return $token;
-}
-/**
- * 
- * @param unknown $fp
- * @param unknown $depth
- * @return string|boolean|unknown
- */
-function tripal_phylogeny_parse_newick_length($fp, $depth) {
-  $length  = '';
-
-  $token = tripal_phylogeny_parse_newick_get_token($fp);
-  
-  // If the next token is a semicolon, close paren, or comma
-  // then the length is empty.
-  if ($token == ',' or $token == ';' or $token == ')') {
-    tripal_phylogeny_parse_newick_replace_token($fp);
-    return '';
-  }
-
-  // If the next token is a colon then we are parsing the length. 
-  // Otherwise we are not.
-  if ($token != ':') {
-    tripal_phylogeny_parse_newick_replace_token($fp);
-    return FALSE;
-  }
-  
-  // Now get the length.
-  $token = tripal_phylogeny_parse_newick_get_token($fp);
-
-  // If the next token is an open paren then this is a syntax error:
-  if ($token == '(') {
-    exit();
-  }
-
-  return $token;
-}
-
-/**
- * 
- * @param unknown $fp
- * @return string
- */
-function tripal_phylogeny_parse_newick_get_token($fp) {
-  
-  // Keep track of the file position that we start with
-  global $tripal_phylogeny_fp_pos;
-  $tripal_phylogeny_fp_pos = ftell($fp);
-  
-  $token = '';
-  $in_quote = FALSE;
-  $num_read = 0;
-  
-  $c = fgetc($fp);
-  while (!feof($fp)) {
-    $num_read++;
-
-    switch ($c) {
-      // If the first character is a reserved character and we
-      // we have not encountered any other charcters then return
-      // it as the token. Otherwise, return the collected token.
-      case ';':
-      case '(':
-      case ')':
-      case ',':
-      case ':':
-        if (!$token) {
-          return $c;
-        }
-        else {
-          // put the character back and return the token
-          fseek($fp, $tripal_phylogeny_fp_pos + $num_read - 1);
-          return $token;
-        }
-        
-        break;
-      // Quotes are allowed around names and if a name is in
-      // quotes then allow spaces. Otherwise, spaces are ignored.
-      case '\'':
-      case '"':
-        if (!$in_quote) {
-          $in_quote = TRUE;
-        }
-        else {
-          $in_quote = FALSE;
-        }
-        break;
-      case " ":
-      case "\t":
-      case "\r":
-      case "\n":
-        if ($in_quote) {
-          $token .= $c;
-        }
-        break;
-      // All other characters get saved as the token
-      default:
-        $token .= $c;
-    }
-    $c = fgetc($fp);
-  }
-  return $token;
-}
-/**
- * 
- * @param unknown $fp
- */
-function tripal_phylogeny_parse_newick_replace_token($fp) {
-  global $tripal_phylogeny_fp_pos;
-  
-  fseek($fp, $tripal_phylogeny_fp_pos);
-  $tripal_phylogeny_fp_pos = ftell($fp);
-}

+ 0 - 323
legacy/tripal_phylogeny/includes/tripal_phylogeny.import_tree.inc

@@ -1,326 +1,3 @@
 <?php
 
 
-/**
- * Imports a tree file.
- *
- * This function is used as a wrapper for loading a phylogenetic tree using
- * any number of file loaders.
- *
- * @param $file_name
- *   The name of the file containing the phylogenetic tree to import.
- * @param $format
- *   The format of the file. Currently only the 'newick' file format is
- *   supported.
- * @param $options
- *   Options if the phylotree record already exists:
- *     'phylotree_id': The imported nodes will be associated with this tree.
- *     'leaf_type':  A sequence ontology term or the word 'organism'. If the
- *                   type is 'organism' then this tree represents a
- *                   taxonomic tree.  The default, if not specified, is the
- *                   term 'polypeptide'.
- *     'name_re':    If the leaf type is NOT 'taxonomy', then the value of
- *                   this field can be a regular expression to pull out
- *                   the name of the feature from the node label in the
- *                   intput tree. If no value is provided the entire label is
- *                   used.
- *     'match':      Set to 'uniquename' if the leaf nodes should be matched
- *                   with the feature uniquename.
- *
- */
-function tripal_phylogeny_import_tree_file($file_name, $format, $options = array(), $job_id = NULL) {
-
-    // Set some option details.
-  if (!array_key_exists('leaf_type', $options)) {
-    $options['leaf_type'] = 'polypeptide';
-  }
-  if (!array_key_exists('match', $options)) {
-    $options['match'] = 'name';
-  }
-  if (!array_key_exists('name_re', $options)) {
-    $options['name_re'] = '^(.*)$';
-  }
-  $options['name_re'] = trim($options['name_re']);
-
-  // If a phylotree ID is not passed in then make sure we have the other
-  // required fields for creating a tree.
-  if (!array_key_exists('phylotree_id', $options)) {
-    if (!array_key_exists('name', $options)) {
-       tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
-         'The phylotree_id is required for importing the tree.');
-       return FALSE;
-    }
-  }
-
-  // get the phylotree record.
-  $values = array('phylotree_id' => $options['phylotree_id']);
-  $phylotree = chado_generate_var('phylotree', $values);
-
-  if (!$phylotree) {
-    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
-        'Could not find the phylotree using the ID provided: %phylotree_id.',
-        array('%phylotree_id' => $options['phylotree_id']));
-    return FALSE;
-  }
-
-  $transaction = db_transaction();
-  print "\nNOTE: Loading of this tree file is performed using a database transaction. \n" .
-      "If the load fails or is terminated prematurely then the entire set of \n" .
-      "insertions/updates is rolled back and will not be found in the database\n\n";
-  try {
-
-    // Parse the file according to the format indicated.
-    if ($format == 'newick') {
-      // Parse the tree into the expected nested node format.
-      module_load_include('inc', 'tripal_phylogeny', 'includes/parsers/tripal_phylogeny.newick_parser');
-      $tree = tripal_phylogeny_parse_newick_file($file_name);
-
-      // Assign the right and left indecies to the tree ndoes
-      tripal_phylogeny_assign_tree_indices($tree);
-    }
-    // Iterate through the tree nodes and add them to Chado in accordance
-    // with the details in the $options array.
-    tripal_phylogeny_import_tree($tree, $phylotree, $options);
-  }
-  catch (Exception $e) {
-    $transaction->rollback();
-    watchdog_exception('tripal_phylogeny', $e);
-    print "\nFAILED: Rolling back database changes...\n";
-  }
-  print "\nDone Importing Tree.\n";
-}
-
-/**
- *
- * @return boolean|multitype:Either
- */
-function tripal_phylogeny_get_node_types_vocab() {
-  // Get the vocabulary terms used to describe nodes in the tree
-  $values = array(
-    'name' => 'phylo_leaf',
-    'cv_id' => array(
-      'name' => 'tripal_phylogeny',
-    ),
-  );
-  $leaf = chado_generate_var('cvterm', $values);
-  if (!$leaf) {
-    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
-      "Could not find the leaf vocabulary term: 'phylo_leaf'. It should " .
-      "already be present as part of the tripal_phylogeny vocabulary.");
-    return FALSE;
-  }
-  $values['name'] = 'phylo_interior';
-  $internal = chado_generate_var('cvterm', $values);
-  if (!$internal) {
-    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
-      "Could not find the leaf vocabulary term: 'phylo_interior'. It should " .
-      "already be present as part of the tripal_phylogeny vocabulary.");
-    return FALSE;
-  }
-  $values['name'] = 'phylo_root';
-  $root = chado_generate_var('cvterm', $values);
-  if (!$root) {
-    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
-      "Could not find the leaf vocabulary term: 'phylo_root'. It should " .
-      "already be present as part of the tripal_phylogeny vocabulary.");
-    return FALSE;
-  }
-  $vocab = array(
-    'leaf' => $leaf,
-    'internal' => $internal,
-    'root' => $root,
-  );
-  return $vocab;
-}
-
-/**
- * Iterates through the tree and sets the left and right indicies .
- *
- * @param $tree
- *   The tree array.
- * @param $index
- *   This parameters is not used when the function is first called. It
- *   is used for recursive calls.
- */
-function tripal_phylogeny_assign_tree_indices(&$tree, &$index = 1) {
-  // Assign a left and right index to each node.  The child node must
-  // have a right and left index less than that of it's parents.  We
-  // increment the index by 100 to give space for new nodes that might
-  // be added later.
-  if (array_key_exists('name', $tree)) {
-    $tree['left_index'] = $index += 100;
-    if (array_key_exists('is_leaf', $tree)) {
-      $tree['right_index'] = $index += 100;
-    }
-  }
-  if (array_key_exists('branch_set', $tree)) {
-    foreach ($tree['branch_set'] as $key => $node) {
-      tripal_phylogeny_assign_tree_indices($tree['branch_set'][$key], $index);
-      $tree['right_index'] = $index += 100;
-    }
-  }
-}
-
-/**
- * Iterates through the tree array and creates phylonodes in Chado.
- *
- * The function iterates through the tree in a top-down approach adding
- * parent internal nodes prior to leaf nodes.  Each node of the tree should have
- * the following fields:
- *
- *   -name:         The name (or label) for this node.
- *   -depth:        The depth of the node in the tree.
- *   -is_root:      Set to 1 if this node is a root node.
- *   -is_leaf:      Set to 1 if this node is a leaf node.
- *   -is_internal:  Set to 1 if this node is an internal node.
- *   -left_index:   The index of the node to the left in the tree.
- *   -right_index:  The index of the node to the right in the tree.
- *   -branch_set:   An array containing a list of nodes of that are children
- *                  of the node.
- *   -parent:       The name of the parent node.
- *   -organism_id:  The organism_id for associtating the node with an organism.
- *   -properties:   An array of key/value pairs where the key is the cvterm_id
- *                  and the value is the property value.  These properties
- *                  will be assocaited with the phylonode.
- *
- * Prior to importing the tree the indicies can be set by using the
- * tripal_phylogeny_assign_tree_indices() function.
- *
- * @param $tree
- *   The tree array.
- * @param $options
- *   The options provide some direction for how the tree is imported.  The
- *   following keys can be used:
- *   -taxonomy:  Set to 1 if this tree is a taxonomic tree. Set to 0
- *               otherwise.
- *   -leaf_type: Set to the leaf type name. If this is a non-taxonomic tree
- *               that is associated with features, then this should be the
- *               Sequence Ontology term for the feature (e.g. polypeptide).
- *               If this is a taxonomic tree then this option is not needed.
- *   -match:     Set to either 'name' or 'uniquename'.  This is used for
- *               matching the feature name or uniquename with the node name.
- *               This is not needed for taxonomic trees.
- *   -match_re:  Set to a regular that can be used for matching the node
- *               name with the feature name if the node name is not
- *               identical to the feature name.
- * @param $vocab
- *   Optional. An array containing a set of key/value pairs that maps node
- *   types to CV terms.  The keys must be 'root', 'internal' or 'leaf'.  If
- *   no vocab is provded then the terms provided by the tripal_phylogeny
- *   CV will be used.
- * @param $parent
- *   This argument is not needed when the funtion is first called. This
- *   function is recursive and this argument is used on recursive calls.
- */
-function tripal_phylogeny_import_tree(&$tree, $phylotree, $options, $vocab = array(), $parent = NULL) {
-
-  // Get the vocabulary terms used to describe nodes in the tree if one
-  // wasn't provided.
-  if (count($vocab) == 0) {
-    $vocab = tripal_phylogeny_get_node_types_vocab();
-  }
-
-  if (is_array($tree) and array_key_exists('name', $tree)) {
-    $values = array(
-      'phylotree_id' => $phylotree->phylotree_id,
-      'left_idx'  => $tree['left_index'],
-      'right_idx' => $tree['right_index'],
-    );
-    // Add in any optional values to the $values array if they are present
-    if (!empty($tree['name']) and $tree['name'] != '') {
-      $values['label'] = $tree['name'];
-    }
-    if (!empty($tree['length']) and $tree['length'] != '') {
-      $values['distance'] = $tree['length'];
-    }
-    // Set the type of node
-    if ($tree['is_root']) {
-      $values['type_id'] = $vocab['root']->cvterm_id;
-    }
-    else if ($tree['is_internal']) {
-      $values['type_id'] = $vocab['internal']->cvterm_id;
-      $values['parent_phylonode_id'] = $parent['phylonode_id'];
-      // TOOD: a feature may be associated here but it is recommended that it
-      // be a feature of type SO:match and should represent the alignment of
-      // all features beneath it.
-    }
-    else if ($tree['is_leaf']) {
-      $values['type_id'] = $vocab['leaf']->cvterm_id;
-      $values['parent_phylonode_id'] = $parent['phylonode_id'];
-
-      // Match this leaf node with an organism or feature depending on the
-      // type of tree. But we can't do that if we don't have a name.
-      if (!empty($tree['name']) and $tree['name'] != '') {
-        if (!$options['taxonomy']) {
-
-          // This is a sequence-based tree. Try to match leaf nodes with features.
-          // First, Get the Name and uniquename for the feature
-          $matches = array();
-          $sel_values = array();
-          if ($options['match'] == "name") {
-            $sel_values['name'] = $tree['name'];
-            $re = $options['name_re'];
-            if (preg_match("/$re/", $tree['name'], $matches)) {
-              $sel_values['name'] = $matches[1];
-            }
-          }
-          else {
-            $sel_values['uniquename'] = $tree['name'];
-            $re = $options['name_re'];
-            if (preg_match("/$re/", $tree['name'], $matches)) {
-              $sel_values['uniquename'] = $matches[1];
-            }
-          }
-          $sel_values['type_id'] = array(
-            'name' => $options['leaf_type'],
-            'cv_id' => array(
-              'name' => 'sequence'
-            ),
-          );
-          $sel_columns = array('feature_id');
-          $feature = chado_select_record('feature', $sel_columns, $sel_values);
-          if (count($feature) > 1) {
-            // Found multiple features, cannot make an association.
-          }
-          else if (count($feature) == 1) {
-            $values['feature_id'] = $feature[0]->feature_id;
-          }
-          else {
-            // Could not find a feature that matches the name or uniquename
-          }
-        }
-      }
-    }
-
-    // Insert the new node and then add it's assigned phylonode_id to the node
-    $phylonode = chado_insert_record('phylonode', $values);
-    $tree['phylonode_id'] = $phylonode['phylonode_id'];
-
-    // This is a taxonomic tree, so assocaite this node with an
-    // organism if one is provided.
-    if (array_key_exists('organism_id', $tree)) {
-      $values = array(
-        'phylonode_id' => $tree['phylonode_id'],
-        'organism_id' => $tree['organism_id']
-      );
-      $pylonode_organism = chado_insert_record('phylonode_organism', $values);
-    }
-
-    // Associate any properties
-    if (array_key_exists('properties', $tree)) {
-      foreach ($tree['properties'] as $type_id => $value) {
-        $values = array(
-          'phylonode_id' => $tree['phylonode_id'],
-          'type_id' => $type_id,
-          'value' => $value,
-        );
-        $pylonode_organism = chado_insert_record('phylonodeprop', $values);
-      }
-    }
-  }
-  if (is_array($tree) and array_key_exists('branch_set', $tree)) {
-    foreach ($tree['branch_set'] as $key => $node) {
-      tripal_phylogeny_import_tree($tree['branch_set'][$key], $phylotree, $options, $vocab, $tree);
-    }
-  }
-}

+ 0 - 368
legacy/tripal_phylogeny/includes/tripal_phylogeny.taxonomy.inc

@@ -40,372 +40,4 @@ function tripal_phylogeny_taxonomy_view() {
     '#markup' => t('This site has not yet prepared the taxonomy for viewing.') . $admin_message,
   );
 }
-/**
- *
- */
-function tripal_phylogeny_taxonomy_load_form($form, &$form_state) {
-  $form['instructions'] = array(
-    '#type' => 'item',
-    '#markup' => '',
-  );
-
-  $form['import_existing'] = array(
-    '#type' => 'checkbox',
-    '#title' => 'Import taxonomy for existing species.',
-    '#description' =>  t('The NCBI Taxonmic Importer examines the organisms
-      currently present in the database and queries NCBI for the
-      taxonomic details.  If the importer is able to match the
-      genus and species with NCBI the species details will be imported,
-      and a page containing the taxonomic tree will be created.'),
-  );
-
-  $form['submit'] = array(
-    '#type' => 'submit',
-    '#name' => 'import',
-    '#value' => 'Submit',
-  );
-  return $form;
-}
-
-/**
- *
- * @param unknown $form
- * @param unknown $form_state
- */
-function tripal_phylogeny_taxonomy_load_form_validate($form, &$form_state) {
-  global $user;
-
-  if (!$form_state['values']['import_existing']) {
-    form_set_error('import_exists', 'Please confirm the import by clicking the checkbox.');
-  }
-}
-
-/**
- *
- * @param unknown $form
- * @param unknown $form_state
- */
-function tripal_phylogeny_taxonomy_load_form_submit($form, &$form_state) {
-  global $user;
-
-  if ($form_state['values']['import_existing']) {
-    $args = array();
-    tripal_add_job("Import NCBI Taxonomy", 'tripal_phylogeny',
-      'tripal_phylogeny_ncbi_taxonomy_import', $args, $user->uid);
-  }
-}
-
-/**
- *
- * @param unknown $job_id
- */
-function tripal_phylogeny_ncbi_taxonomy_import($job_id) {
-
-  print "\nNOTE: Importing of NCBI taxonomy data is performed using a database transaction. \n" .
-    "If the load fails or is terminated prematurely then the entire set of \n" .
-    "insertions/updates is rolled back and will not be found in the database\n\n";
-
-  $transaction = db_transaction();
-  try {
-    // TDDO: there should be an API function named tripal_insert_analysis().
-    // But until then we have to insert the analysis manually.
-    // Get the version of this module for the analysis record:
-    $info = system_get_info('module', 'tripal_phylogeny');
-    $version = $info['version'];
-    $analysis_name = 'NCBI Taxonomy Tree Import';
-
-    // If the analysis record already exists then don't add it again.
-    $analysis = chado_select_record('analysis', array('*'), array('name' => $analysis_name));
-    if (count($analysis) == 0) {
-      $values = array(
-        'name' => 'NCBI Taxonomy Tree Import',
-        'description' => 'Used to import NCBI taxonomy details for organisms in this database.',
-        'program' => 'Tripal Phylogeny Module NCBI Taxonomy Importer',
-        'programversion' => $version,
-        'sourcename' => 'NCBI Taxonomy',
-        'sourceuri' => 'http://www.ncbi.nlm.nih.gov/taxonomy',
-      );
-      $analysis = chado_insert_record('analysis', $values);
-      if (!$analysis) {
-        throw new Exception("Cannot add NCBI Taxonomy Tree Import Analysis.");
-      }
-    }
-    else {
-      $analysis = $analysis[0];
-    }
-
-    // If the tree already exists then don't insert it again.
-    global $site_name;
-    $tree_name = $site_name . 'Taxonomy Tree';
-    $phylotree = chado_select_record('phylotree', array('*'), array('name' => $tree_name));
-    if (count($phylotree) == 0) {
-      // Add the taxonomic tree.
-      $options = array(
-        'name' =>  $site_name . 'Taxonomy Tree',
-        'description' => 'The taxonomic tree of species present on this site. Click a species name for more details.',
-        'leaf_type' => 'taxonomy',
-        'analysis_id' => $analysis->analysis_id,
-        'tree_file' => '/dev/null',
-        'format' => 'taxonomy',
-        'no_load' => TRUE,
-      );
-      $errors = array();
-      $warnings = array();
-      $success = tripal_insert_phylotree($options, $errors, $warnings);
-      if (!$success) {
-        throw new Exception("Cannot add the Taxonomy Tree record.");
-      }
-      $phylotree = (object) $options;
-    }
-    else {
-      $phylotree = $phylotree[0];
-    }
-
-    // Clean out the phylotree in the event this is a reload
-    chado_delete_record('phylonode', array('phylotree_id' => $phylotree->phylotree_id));
-
-    // The taxonomic tree must have a root, so create that first.
-    $tree = array(
-      'name' => 'root',
-      'depth' => 0,
-      'is_root' => 1,
-      'is_leaf' => 0,
-      'is_internal' => 0,
-      'left_index' => 0,
-      'right_index' => 0,
-      'branch_set' => array(),
-    );
-
-    // Get the "rank" cvterm. It requires that the TAXRANK vocabulary is loaded.
-    $rank_cvterm = tripal_get_cvterm(array(
-      'name' => 'rank',
-      'cv_id' => array('name' => 'tripal_phylogeny')
-    ));
-
-    // Get the list of organisms
-    $sql = "SELECT O.* FROM {organism} O";
-    $organisms = chado_query($sql);
-    while ($organism = $organisms->fetchObject()) {
-      // Build the query string to get the information about this species.
-      $term = $organism->genus . ' ' . $organism->species;
-      $term = urlencode($term);
-      $search_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?".
-        "db=taxonomy" .
-        "&term=$term";
 
-      // Get the search response from NCBI.
-      $rfh = fopen($search_url, "r");
-      $xml_text = '';
-      while (!feof($rfh)) {
-        $xml_text .= fread($rfh, 255);
-      }
-      fclose($rfh);
-
-      // Parse the XML to get the taxonomy ID
-      $xml = new SimpleXMLElement($xml_text);
-      if ($xml) {
-        $taxid = (string) $xml->IdList->Id;
-        if ($taxid) {
-          print "$taxid\t$organism->genus $organism->species\n";
-          // If we have a taxonomy ID we can now get the details.
-          $fetch_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?".
-            "db=taxonomy" .
-            "&id=$taxid";
-          // Get the search response from NCBI.
-          $rfh = fopen($fetch_url, "r");
-          $xml_text = '';
-          while (!feof($rfh)) {
-            $xml_text .= fread($rfh, 255);
-          }
-          fclose($rfh);
-
-          $xml = new SimpleXMLElement($xml_text);
-          if ($xml) {
-            $taxon = $xml->Taxon;
-
-            // Add in the organism properties
-            $lineage = (string) $taxon->Lineage;
-            tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'lineage', $lineage);
-
-            $genetic_code = (string) $taxon->GeneticCode->GCId;
-            tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genetic_code', $genetic_code);
-
-            $genetic_code_name = (string) $taxon->GeneticCode->GCName;
-            tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genetic_code_name', $genetic_code_name);
-
-            $mito_genetic_code = (string) $taxon->MitoGeneticCode->MGCId;
-            tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'mitochondrial_genetic_code', $mito_genetic_code);
-
-            $mito_genetic_code_name = (string) $taxon->MitoGeneticCode->MGCName;
-            tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'mitochondrial_genetic_code_name', $mito_genetic_code_name);
-
-            $division = (string) $taxon->Division;
-            tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'division', $division);
-
-            $name_ranks = array();
-            foreach ($taxon->OtherNames->children() as $child) {
-              $type = $child->getName();
-              $name = (string) $child;
-              if (!array_key_exists($type, $name_ranks)) {
-                $name_ranks[$type] = 0;
-              }
-              switch ($type) {
-                case 'GenbankCommonName':
-                  tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'genbank_common_name', $name, $name_ranks[$type]);
-                  break;
-                case 'Synonym':
-                  tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'synonym', $name, $name_ranks[$type]);
-                  break;
-                case 'CommonName':
-                case 'Includes':
-                  tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'other_name', $name, $name_ranks[$type]);
-                  break;
-                case 'EquivalentName':
-                  tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'equivalent_name', $name, $name_ranks[$type]);
-                  break;
-                case 'Anamorph':
-                  tripal_phylogeny_taxonomy_add_organism_property($organism->organism_id, 'anamorph', $name, $name_ranks[$type]);
-                  break;
-                case 'Name':
-                  // skip the Name stanza
-                  break;
-                default:
-                  print "NOTICE: Skipping unrecognzed name type: $type\n";
-                  // do nothing for unrecognized types
-              }
-              $name_ranks[$type]++;
-            }
-
-            // Generate a nested array structure that can be used for importing the tree.
-            $parent = (string) $taxon->ParentTaxId;
-            $rank = (string) $taxon->Rank;
-            $sci_name = (string) $taxon->ScientificName;
-            $lineage_depth = preg_split('/;\s*/', $lineage);
-            $parent = $tree;
-            $i = 1;
-            foreach ($taxon->LineageEx->children() as $child) {
-              $tid = (string) $child->TaxID;
-              $name = (string) $child->ScientificName;
-              $node_rank = (string) $child->Rank;
-              $node = array(
-                'name' => $name,
-                'depth' => $i,
-                'is_root' => 0,
-                'is_leaf' => 0,
-                'is_internal' => 1,
-                'left_index' => 0,
-                'right_index' => 0,
-                'parent' => $parent,
-                'branch_set' => array(),
-                'parent' => $parent['name'],
-                'properties' => array(
-                  $rank_cvterm->cvterm_id => $node_rank,
-                ),
-              );
-              $parent = $node;
-              tripal_phylogeny_taxonomy_import_add_node($tree, $node, $lineage_depth);
-              $i++;
-            }
-            // Now add in the leaf node
-            $node = array(
-              'name' => $sci_name,
-              'depth' => $i,
-              'is_root' => 0,
-              'is_leaf' => 1,
-              'is_internal' => 0,
-              'left_index' => 0,
-              'right_index' => 0,
-              'parent' => $parent['name'],
-              'organism_id' => $organism->organism_id,
-              'properties' => array(
-                $rank_cvterm->cvterm_id => $rank,
-              ),
-            );
-            tripal_phylogeny_taxonomy_import_add_node($tree, $node, $lineage_depth);
-
-            // Set the indecies for the tree.
-            tripal_phylogeny_assign_tree_indices($tree);
-          } // end: if ($xml) { ...
-        } // end: if ($taxid) { ...
-      } // end: if ($xml) { ...
-    } // end: while ($organism = $organisms->fetchObject()) { ...
-    // print json_encode(($tree));
-
-    // Now add the tree
-    $options = array('taxonomy' => 1);
-    tripal_phylogeny_import_tree($tree, $phylotree, $options);
-
-    // If ther user requested to sync the tree then do it.
-    //if ($sync) {
-      chado_node_sync_records('phylotree', FALSE, FALSE,
-        array(), $ids = array($phylotree->phylotree_id));
-    //}
-  }
-  catch (Exception $e) {
-    $transaction->rollback();
-    print "\n"; // make sure we start errors on new line
-    watchdog_exception('tripal_phylogeny', $e);
-    print "FAILED: Rolling back database changes...\n";
-  }
-}
-
-/**
- *
- * @param unknown $node
- */
-function tripal_phylogeny_taxonomy_import_add_node(&$tree, $node, $lineage_depth) {
-
-   // Get the branch set for the tree root.
-   $branch_set = &$tree['branch_set'];
-
-   // Iterate through the tree up until the depth where this node will
-   // be placed.
-   $node_depth = $node['depth'];
-   for ($i = 1; $i <= $node_depth; $i++) {
-     // Iterate through any existing nodes in the branch set to see if
-     // the node name matches the correct name for the lineage at this
-     // depth. If it matches then it is inside of this branch set that
-     // we will place the node.
-     for ($j = 0; $j < count($branch_set); $j++) {
-       // If this node already exists in the tree then return.
-       if ($branch_set[$j]['name'] == $node['name'] and
-           $branch_set[$j]['depth'] = $node['depth']) {
-         return;
-       }
-       // Otherwise, set the branch to be the current branch and continue.
-       if ($branch_set[$j]['name'] == $lineage_depth[$i-1]) {
-         $branch_set = &$branch_set[$j]['branch_set'];
-         break;
-       }
-     }
-   }
-   // Add the node to the last branch set.  This should be where this node goes.
-   $branch_set[] = $node;
-}
-
-/**
- *
- * @param unknown $organism_id
- * @param unknown $term_name
- * @param unknown $value
- */
-function tripal_phylogeny_taxonomy_add_organism_property($organism_id, $term_name, $value, $rank = 0) {
-  if (!$value) {
-    return;
-  }
-
-  $record = array(
-    'table' => 'organism',
-    'id' => $organism_id
-  );
-  $property = array(
-    'type_name' => $term_name,
-    'cv_name' => organism_property,
-    'value' => $value
-  );
-  // Delete all properties of this type if the rank is zero.
-  if ($rank == 0) {
-    chado_delete_property($record, $property);
-  }
-  chado_insert_property($record, $property);
-}

+ 0 - 114
legacy/tripal_phylogeny/tripal_phylogeny.install

@@ -129,120 +129,6 @@ function tripal_phylogeny_schema() {
  */
 function tripal_phylogeny_add_cvterms() {
 
-  tripal_insert_cv(
-    'tripal_phylogeny',
-    'Terms used by the Tripal phylotree module for phylogenetic and taxonomic trees.'
-  );
-
-  // Add the terms used to identify nodes in the tree.
-  tripal_insert_cvterm(
-    array(
-      'name' => 'phylo_leaf',
-      'definition' => 'A leaf node in a phylogenetic tree.',
-      'cv_name' => 'tripal_phylogeny',
-      'is_relationship' => 0,
-      'db_name' => 'tripal'
-    ),
-    array('update_existing' => TRUE)
-  );
-  // Add the terms used to identify nodes in the tree.
-  tripal_insert_cvterm(
-    array(
-      'name' => 'phylo_root',
-      'definition' => 'The root node of a phylogenetic tree.',
-      'cv_name' => 'tripal_phylogeny',
-      'is_relationship' => 0,
-      'db_name' => 'tripal'
-    ),
-    array('update_existing' => TRUE)
-  );
-  // Add the terms used to identify nodes in the tree.
-  tripal_insert_cvterm(
-    array(
-      'name' => 'phylo_interior',
-      'definition' => 'An interior node in a phylogenetic tree.',
-      'cv_name' => 'tripal_phylogeny',
-      'is_relationship' => 0,
-      'db_name' => 'tripal'
-    ),
-    array('update_existing' => TRUE)
-  );
-  // Add the terms used to identify nodes in the tree.
-  tripal_insert_cvterm(
-    array(
-      'name' => 'taxonomy',
-      'definition' => 'A term used to indicate if a phylotree is a taxonomic tree',
-      'cv_name' => 'tripal_phylogeny',
-      'is_relationship' => 0,
-      'db_name' => 'tripal'
-    ),
-    array('update_existing' => TRUE)
-  );
-
-  tripal_insert_cvterm(
-      array(
-        'name' => 'rank',
-        'definition' => 'A taxonmic rank',
-        'cv_name' => 'tripal_phylogeny',
-        'is_relationship' => 0,
-        'db_name' => 'tripal'
-      ),
-      array('update_existing' => TRUE)
-  );
-
-//   // ----------------------------------------------
-//   // Add the terms for importing NCBI taxonomy data
-//   $values = array(
-//     'name' => 'taxonomy',
-//     'description' => 'The NCBI Taxonomy Database is a curated classification and nomenclature for all of the organisms in the public sequence databases. This currently represents about 10% of the described species of life on the planet.',
-//     'url' => 'http://www.ncbi.nlm.nih.gov/taxonomy',
-//     'urlprefix' => 'http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=',
-//   );
-//   tripal_insert_db($values);
-//   tripal_insert_cv('taxonomy', 'A taxonomy for NCBI taxonomy ranks');
-
-//   $terms = array('rank', 'taxid', 'class', 'cohort', 'family', 'forma', 'genus', 'infraclass',
-//     'infraorder', 'kingdom', 'no rank', 'order', 'parvorder', 'phylum',
-//     'species', 'species group', 'species subgroup', 'subclass', 'subcohort',
-//     'subfamily', 'subgenus', 'subkingdom', 'suborder', 'subphylum',
-//     'subspecies', 'subtribe', 'superclass', 'superfamily', 'superkingdom',
-//     'superorder', 'superphylum', 'tribe', 'varietas');
-//   $options = array('update_existing' => TRUE);
-//   foreach ($terms as $term) {
-//     $value = array(
-//       'name' => $term,
-//       'definition' => '',
-//       'cv_name' => 'taxonomy',
-//       'is_relationship' => 0,
-//       'db_name' => 'taxonomy'
-//     );
-//     tripal_insert_cvterm($value, $options);
-//   }
-
-  $terms = array(
-    'lineage',
-    'genetic_code',
-    'genetic_code_name',
-    'mitochondrial_genetic_code',
-    'mitochondrial_genetic_code_name',
-    'division',
-    'genbank_common_name',
-    'synonym',
-    'other_name',
-    'equivalent_name',
-    'anamorph'
-  );
-  $options = array('update_existing' => TRUE);
-  foreach ($terms as $term) {
-    $value = array(
-      'name' => $term,
-      'definition' => '',
-      'cv_name' => 'organism_property',
-      'is_relationship' => 0,
-      'db_name' => 'tripal'
-    );
-    tripal_insert_cvterm($value, $options);
-  }
 }
 
 

+ 0 - 19
legacy/tripal_phylogeny/tripal_phylogeny.module

@@ -114,25 +114,6 @@ function tripal_phylogeny_menu() {
     'access arguments' => array('administer tripal phylotree'),
     'type' => MENU_CALLBACK,
   );
-  // Data Loaders
-   $items['admin/tripal/loaders/newic_phylotree_loader'] = array(
-     'title' => 'Phylogenetic Trees (Newic format)',
-     'description' => 'Loads phylogenetic trees in Newic format. (Redirects to create a phylogenetic tree content type)',
-     'page callback' => 'drupal_goto',
-     'page arguments' => array('node/add/chado-phylotree'),
-     'access arguments' => array('administer tripal phylotree'),
-     'type' => MENU_NORMAL_ITEM,
-   );
-
-   $items['admin/tripal/loaders/ncbi_taxonomy_loader'] = array(
-     'title' => 'NCBI Taxonomy Loader',
-     'description' => 'Loads taxonomic details about installed organisms.',
-     'page callback' => 'drupal_get_form',
-     'page arguments' => array('tripal_phylogeny_taxonomy_load_form'),
-     'access arguments' => array('administer tripal phylotree'),
-     'file' => '/includes/tripal_phylogeny.taxonomy.inc',
-     'type' => MENU_NORMAL_ITEM,
-   );
 
    $items['taxonomy_view'] = array(
      'title' => 'Taxonomy',

+ 2 - 1
tripal/includes/TripalEntityUIController.inc

@@ -410,7 +410,8 @@ function tripal_entity_form_ajax_callback($form, $form_state) {
  function tripal_entity_form_validate($form, &$form_state) {
 
    // If the user is cancelling or deleting the entity then don't validate.
-   if ($form_state['clicked_button']['#name'] =='cancel_data' or
+   if (array_key_exists('clicked_button', $form_state) and
+       $form_state['clicked_button']['#name'] =='cancel_data' or
        $form_state['clicked_button']['#name'] =='delete_data') {
      return;
    }

+ 853 - 0
tripal_chado/api/modules/tripal_chado.phylotree.api.inc

@@ -0,0 +1,853 @@
+<?php
+
+/**
+ * Validates an $options array for insert or update of a phylotree record.
+ *
+ * If validation passes then any values that needed validation lookups
+ * (such as the dbxref, analysis, leaf_type, etc) will have their approriate
+ * primary_keys added to the $options array, and missing default values
+ * will also be added.
+ *
+ * @param $val_type
+ *   The type of validation. Can be either 'insert' or 'update'.
+ * @param $options
+ *   An array of key/value pairs containing any of the valid keys for
+ *   either the tripal_insert_phylotree() or tripal_update_phylotree()
+ *   functions.
+ * @param $errors
+ *   An empty array where validation error messages will be set. The keys
+ *   of the array will be name of the field from the options array and the
+ *   value is the error message.
+ * @param $warnings
+ *   An empty array where validation warning messagges will be set. The
+ *   warnings should not stop an insert or an update but should be provided
+ *   to the user as information by a drupal_set_message() if appropriate. The
+ *   keys of the array will be name of the field from the options array and the
+ *   value is the error message.
+ * @return
+ *   If validation failes then FALSE is returned.  Any options that do not pass
+ *   validation checks will be added in the $errors array with the key being
+ *   the option and the value being the error message.  If validation
+ *   is successful then TRUE is returned.
+ *
+ */
+function tripal_validate_phylotree($val_type, &$options, &$errors, &$warnings) {
+
+  if ($val_type != 'insert' and $val_type != 'update') {
+    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, "The $val_type argument must be either 'update or 'insert'.");
+  }
+
+  // Set Defaults.
+  if ($val_type == 'insert') {
+    // Match by feature name.
+    if (!array_key_exists('match', $options)) {
+      $options['match'] = 'name';
+    }
+    // The regular expression is to match the entire node name.
+    if (!array_key_exists('name_re', $options)) {
+      $options['name_re'] = '^(.*)$';
+    }
+    // A dbxref is not required by Tripal but is required by the database
+    // field in the phylotree table.  Therefore, if the dbxref is not provided
+    // we can set this to be the null database and null dbxref which
+    // is represented as 'null:local:null'
+    if (!array_key_exists('dbxref', $options)) {
+      $options['dbxref'] = "null:local:null";
+    }
+  }
+
+  // Make sure required values are set.
+  if ($val_type == 'insert') {
+    if (!array_key_exists('name', $options)) {
+      $errors['name'] = t('Please provide the name of the tree.');
+      return FALSE;
+    }
+    if (!array_key_exists('description', $options)) {
+      $errors['description'] = t('Please provide a description for this tree.');
+      return FALSE;
+    }
+    if (!array_key_exists('analysis', $options) and !array_key_exists('analysis_id', $options)) {
+      $errors['analysis'] = t('Please provide an analysis or analysis_id for this tree.');
+      return FALSE;
+    }
+    if (!array_key_exists('tree_file', $options)) {
+      $errors['tree_file'] = t('Please provide either the full path to the tree_file or a Drupal managed file ID number.');
+      return FALSE;
+    }
+    if (!array_key_exists('format', $options) or !$options['format']) {
+      $errors['format'] = t('Please provide a file format for the tree file.');
+      return FALSE;
+    }
+    // Make sure the file format is correct
+    if ($options['format'] != 'newick' and $options['format'] != 'taxonomy') {
+      $errors['format'] = t('The file format is not supported. Currently only the "newick" file format is supported.');
+      return FALSE;
+    }
+  }
+  else {
+    // Does the phylotree ID exist and is it valid
+    if (!array_key_exists('phylotree_id', $options)) {
+      $errors['phylotree_id'] = t('Please provide the ID for the tree.');
+      return FALSE;
+    }
+    $exists = chado_select_record('phylotree', array('phylotree_id'),
+        array('phylotree_id' => $options['phylotree_id']), array('has_record' => 1));
+    if (!$exists) {
+      $errors['phylotree_id'] = t('The phylotree_id does not exist.');
+      return FALSE;
+    }
+
+  }
+
+  // Make sure the file exists if one is specified
+  if (array_key_exists('tree_file', $options) and $options['tree_file']) {
+    // If this is a numeric Drupal file then all is good, no need to check.
+    if (!is_numeric($options['tree_file'])) {
+      if (!file_exists($options['tree_file'])) {
+        $errors['tree_file'] = t('The file provided does not exists.');
+        return FALSE;
+      }
+    }
+    // Make sure the file format is correct
+    if (!array_key_exists('format', $options) or
+        ($options['format'] != 'newick' and $options['format'] != 'taxonomy')) {
+      $errors['format'] = t('Please provide a supported file format. Currently only the "newick" file format is supported.');
+      return FALSE;
+    }
+
+    // If no leaf type is provided then use the polypeptide term.
+    if (!array_key_exists('leaf_type', $options) or !$options['leaf_type']) {
+      $options['leaf_type'] = 'polypeptide';
+    }
+  }
+
+  // Make sure the analysis exists.
+  $analysis = NULL;
+  if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
+    $analysis = chado_select_record('analysis', array('analysis_id'), array('analysis_id' => $options['analysis_id']));
+    if (!$analysis) {
+      $errors['analysis_id'] = t('The analysis name provided does not exist.');
+      return FALSE;
+    }
+    $options['analysis_id'] = $analysis[0]->analysis_id;
+  }
+  if (array_key_exists('analysis', $options) and $options['analysis']) {
+    $analysis = chado_select_record('analysis', array('analysis_id'), array('name' => $options['analysis']));
+    if (!$analysis) {
+      $errors['analysis'] = t('The analysis ID provided does not exist.');
+      return FALSE;
+    }
+    $options['analysis_id'] = $analysis[0]->analysis_id;
+  }
+
+  // Make sure the leaf type exists.
+  $type = NULL;
+  if (array_key_exists('leaf_type', $options) and $options['leaf_type']) {
+    if ($options['leaf_type'] == 'taxonomy') {
+      $values = array(
+        'cv_id' => array(
+           'name' => 'tripal_phylogeny'
+        ),
+        'name' => 'taxonomy'
+      );
+      $type = chado_select_record('cvterm', array('cvterm_id'), $values);
+    }
+    else {
+      $values = array(
+        'cv_id' => array(
+           'name' => 'sequence'
+        ),
+        'name' => $options['leaf_type']
+      );
+      $type = chado_select_record('cvterm', array('cvterm_id'), $values);
+      if (!$type) {
+        $errors['leaf_type'] = t('The leaf_type provided is not a valid Sequence Ontology term: %term.');
+        return FALSE;
+      }
+    }
+    $options['type_id'] = $type[0]->cvterm_id;
+  }
+
+  // A Dbxref is required by the phylotree module, but if the
+  // tree was generated in-house and the site admin doens't want to
+  // assign a local dbxref then we will set it to the null db
+  // and the local:null dbxref.
+  if (array_key_exists('dbxref', $options)) {
+    if (!$options['dbxref']) {
+        $options['dbxref'] = 'null:local:null';
+    }
+    $matches = array();
+    preg_match('/^(.*?):(.*)$/', $options['dbxref'], $matches);
+    $db_name = $matches[1];
+    $accession = $matches[2];
+    $values = array(
+      'accession' => $accession,
+      'db_id' => array(
+        'name' => $db_name
+      ),
+    );
+    $dbxref = chado_generate_var('dbxref', $values);
+    if (!$dbxref) {
+      $errors['dbxref'] = t('The dbxref provided does not exist in the database: %dbxref.', array('%dbxref' => $dbxref));
+      return FALSE;
+    }
+    $options['dbxref_id'] = $dbxref->dbxref_id;
+  }
+
+  // Make sure the tree name is unique
+  if (array_key_exists('name', $options) and $options['name']) {
+    $sql = "
+      SELECT *
+      FROM {phylotree} P
+      WHERE
+        P.name = :name
+    ";
+    $args = array(':name' => $options['name']);
+    if ($val_type == 'update') {
+      $sql .= " AND NOT P.phylotree_id = :phylotree_id";
+      $args[':phylotree_id'] = $options['phylotree_id'];
+    }
+    $result = chado_query($sql, $args)->fetchObject();
+    if ($result) {
+      $errors['name'] = t("The tree name is in use by another tree. Please provide a different unique name for this tree.");
+    }
+  }
+
+  return TRUE;
+}
+/**
+ * Inserts a phylotree record into Chado.
+ *
+ * This function validates the options passed prior to insertion of the record,
+ * and if validation passes then any values in the options array that needed
+ * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
+ * their approriate primary key values added to the options array.
+ *
+ * @param $options
+ *  An array of key value pairs with the following keys required:
+ *     'name':       The name of the tree. This will be displayed to users.
+ *     'description: A description about the tree
+ *     'anlaysis_id: The ID of the analysis to which this phylotree should be
+ *                   associated.
+ *     'analysis':   If the analysis_id key is not used then the analysis name
+ *                   may be provided to identify the analysis to which the tree
+ *                   should be associated.
+ *     'leaf_type':  A sequence ontology term or the word 'organism'. If the
+ *                   type is 'organism' then this tree represents a
+ *                   taxonomic tree.  The default, if not specified, is the
+ *                   term 'polypeptide'.
+ *     'tree_file':  The path of the file containing the phylogenetic tree to
+ *                   import or a Drupal managed_file numeric ID.
+ *     'format':     The file format. Currently only 'newick is supported'
+ *  Optional keys:
+ *     'dbxref':     A database cross-reference of the form DB:ACCESSION.
+ *                   Where DB is the database name, which is already present
+ *                   in Chado, and ACCESSION is the unique identifier for
+ *                   this tree in the remote database.
+ *     'name_re':    If the leaf type is NOT 'taxonomy', then the value of
+ *                   this field can be a regular expression to pull out
+ *                   the name of the feature from the node label in the
+ *                   intput tree. If no value is provided the entire label is
+ *                   used.
+ *     'match':      Set to 'uniquename' if the leaf nodes should be matched
+ *                   with the feature uniquename.
+ *     'load_now':   If set, the tree will be loaded immediately if a tree_file
+ *                   is provided. Otherwise, the tree will be loaded via
+ *                   a Tripal jobs call.
+ *     'no_load':    If set the tree file will not be loaded.
+ * @param $errors
+ *   An empty array where validation error messages will be set. The keys
+ *   of the array will be name of the field from the options array and the
+ *   value is the error message.
+ * @param $warnings
+ *   An empty array where validation warning messagges will be set. The
+ *   warnings should not stop an insert or an update but should be provided
+ *   to the user as information by a drupal_set_message() if appropriate. The
+ *   keys of the array will be name of the field from the options array and the
+ *   value is the error message.
+ * @return
+ *   TRUE for success and FALSE for failure.
+ */
+function tripal_insert_phylotree(&$options, &$errors, &$warnings) {
+  global $user;
+
+  $options['name_re'] = trim($options['name_re']);
+  $options['leaf_type'] = trim($options['leaf_type']);
+  $options['name'] = trim($options['name']);
+  $options['format'] = trim($options['format']);
+  $options['tree_file'] = trim($options['tree_file']);
+
+  // Validate the incoming options.
+  $success = tripal_validate_phylotree('insert', $options, $errors, $warnings);
+  if (!$success) {
+    foreach ($errors as $field => $message) {
+      tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
+    }
+    return FALSE;
+  }
+
+  // If we're here then all is good, so add the phylotree record.
+  $values = array(
+    'analysis_id' => $options['analysis_id'],
+    'name' => $options['name'],
+    'dbxref_id' => $options['dbxref_id'],
+    'comment' => $options['description'],
+    'type_id' => $options['type_id'],
+  );
+  $phylotree = chado_insert_record('phylotree', $values);
+  if (!$phylotree) {
+    drupal_set_message(t('Unable to add phylotree.'), 'warning');
+    tripal_report_error('tripal_phylogeny', TRIPAL_WARNING, 'Insert phylotree: Unable to create phylotree where values: %values',
+        array('%values' => print_r($values, TRUE)));
+    return FALSE;
+  }
+  $phylotree_id = $phylotree['phylotree_id'];
+  $options['phylotree_id'] = $phylotree_id;
+
+  // If the tree_file is numeric then it is a Drupal managed file and
+  // we want to make the file permanent and associated with the tree.
+  if (is_numeric($options['tree_file'])) {
+    $file = NULL;
+    $file = file_load($options['tree_file']);
+    $file->status = FILE_STATUS_PERMANENT;
+    $file = file_save($file);
+    file_usage_add($file, 'tripal_phylogeny', $options['format'], $phylotree_id);
+    $real_file_path = drupal_realpath($file->uri);
+  }
+  else {
+    $real_file_path = $options['tree_file'];
+  }
+
+  // If caller has requested to load the file now then do so, otherwise
+  // submit using a Tripal job.
+  if (!array_key_exists('no_load', $options) or !$options['no_load']) {
+    if (array_key_exists('load_now', $options) and $options['load_now']) {
+      $args = array(
+        'phylotree_id' => $phylotree_id,
+        'leaf_type' => $options['leaf_type'],
+        'match' => $options['match'] ? 'uniquename' : 'name',
+        'name_re' => $options['name_re'],
+      );
+      tripal_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
+    }
+    else {
+      $args = array(
+          $real_file_path,
+          'newick',
+          array(
+            'phylotree_id' => $phylotree_id,
+            'leaf_type' => $options['leaf_type'],
+            'match' => $options['match'] ? 'uniquename' : 'name',
+            'name_re' => $options['name_re'],
+          ),
+      );
+      if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
+          'tripal_phylogeny_import_tree_file', $args, $user->uid)) {
+          drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
+      }
+    }
+  }
+
+  return TRUE;
+}
+
+/**
+ * Updates a phylotree record into Chado.
+ *
+ * This function validates the options passed prior to update of the record
+ * and if validation passes then any values in the options array that needed
+ * validation lookups (such as the dbxref, analysis, leaf_type, etc) will have
+ * their approriate primary key values added to the options array. A Drupal
+ * File object will be added to the options array for the tree file if one
+ * is provided.
+ *
+ *
+ * @param $phylotree_id
+ *   The ID of the phylotree to update.
+ * @param $options
+ *  An array of key value pairs with the following optional keys:
+ *     'name':       The name of the tree. This will be displayed to users.
+ *     'description: A description about the tree
+ *     'anlaysis_id: The ID of the analysis to which this phylotree should be
+ *                   associated.
+ *     'analysis':   If the analysis_id key is not used then the analysis name
+ *                   may be provided to identify the analysis to which the tree
+ *                   should be associated.
+ *     'leaf_type':  A sequence ontology term or the word 'organism'. If the
+ *                   type is 'organism' then this tree represents a
+ *                   taxonomic tree.  The default, if not specified, is the
+ *                   term 'polypeptide'.
+ *     'tree_file':  The path of the file containing the phylogenetic tree to
+ *                   import or a Drupal managed_file numeric ID.
+ *     'format':     The file format. Currently only 'newick is supported'
+ *     'dbxref':     A database cross-reference of the form DB:ACCESSION.
+ *                   Where DB is the database name, which is already present
+ *                   in Chado, and ACCESSION is the unique identifier for
+ *                   this tree in the remote database.
+ *     'name_re':    If the leaf type is NOT 'taxonomy', then the value of
+ *                   this field can be a regular expression to pull out
+ *                   the name of the feature from the node label in the
+ *                   intput tree. If no value is provided the entire label is
+ *                   used.
+ *     'match':      Set to 'uniquename' if the leaf nodes should be matched
+ *                   with the feature uniquename.
+ *     'load_now':   If set, the tree will be loaded immediately if a tree_file
+ *                   is provided. Otherwise, the tree will be loaded via
+ *                   a Tripal jobs call.
+ */
+function tripal_update_phylotree($phylotree_id, &$options) {
+  global $user;
+
+  // Validate the incoming options.
+  $errors = array();
+  $warnings = array();
+  $success = tripal_validate_phylotree('update', $options, $errors, $warnings);
+  if (!$success) {
+    foreach ($errors as $field => $message) {
+      tripal_report_error('tripal_phylogeny', TRIPAL_ERROR, $message);
+    }
+    return FALSE;
+  }
+
+  // If we're here then all is good, so update the phylotree record.
+  $match = array(
+      'phylotree_id' => $phylotree_id,
+  );
+  if (array_key_exists('name', $options) and $options['name']) {
+    $values['name'] = $options['name'];
+  }
+  if (array_key_exists('analysis_id', $options) and $options['analysis_id']) {
+    $values['analysis_id'] = $options['analysis_id'];
+  }
+  if (array_key_exists('dbxref_id', $options) and $options['dbxref_id']) {
+    $values['dbxref_id'] = $options['dbxref_id'];
+  }
+  if (array_key_exists('description', $options) and $options['description']) {
+    $values['comment'] = $options['description'];
+  }
+  if (array_key_exists('type_id', $options) and $options['type_id']) {
+    $values['type_id'] = $options['type_id'];
+  }
+
+  $phylotree = chado_update_record('phylotree', $match, $values, array('return_record' => TRUE));
+  if (!$phylotree) {
+    drupal_set_message(t('Unable to update phylotree.'), 'warning');
+    tripal_report_error('tripal_phylogeny', TRIPAL_WARNING,
+        'Update phylotree: Unable to update phylotree where values: %values',
+        array('%values' => print_r($values, TRUE))
+    );
+  }
+
+  // If we have a tree file, then import the tree
+  if (array_key_exists('tree_file', $options) and $options['tree_file']) {
+
+    // Remove any existing nodes
+    chado_delete_record('phylonode', array('phylotree_id' => $options['phylotree_id']));
+
+    // Make sure if we already have a file that we remove the old one.
+    $sql = "
+      SELECT FM.fid
+      FROM {file_managed} FM
+        INNER JOIN {file_usage} FU on FM.fid = FU.fid
+      WHERE FU.id = :id and FU.module = 'tripal_phylogeny'
+    ";
+    $fid = db_query($sql, array(':id' => $options['phylotree_id']))->fetchField();
+    if ($fid) {
+      $file = file_load($fid);
+      file_delete($file, TRUE);
+    }
+
+    // If the tree_file is numeric then it is a Drupal managed file and
+    // we want to make the file permanent and associated with the tree.
+    if (is_numeric($options['tree_file'])) {
+      $file = file_load($options['tree_file']);
+      $file->status = FILE_STATUS_PERMANENT;
+      $file = file_save($file);
+      file_usage_add($file, 'tripal_phylogeny', 'newick', $options['phylotree_id']);
+
+      // Add a job to parse the new node tree.
+      $real_file_path = drupal_realpath($file->uri);
+    }
+    else {
+      $real_file_path = $options['tree_file'];
+    }
+
+    // If caller has requested to load the file now then do so, otherwise
+    // submit using a Tripal job.
+    if (array_key_exists('load_now', $options) and $options['load_now']) {
+      $args = array(
+        'phylotree_id' => $options['phylotree_id'],
+        'leaf_type' => $options['leaf_type'],
+        'match' => $options['match'] ? 'uniquename' : 'name',
+        'name_re' => $options['name_re'],
+      );
+      tripal_phylogeny_import_tree_file($real_file_path, $options['format'], $args);
+    }
+    else {
+      $args = array(
+        $real_file_path,
+        'newick',
+        array(
+          'phylotree_id' => $options['phylotree_id'],
+          'leaf_type' => $options['leaf_type'],
+          'match' => $options['match'] ? 'uniquename' : 'name',
+          'name_re' => $options['name_re'],
+        ),
+      );
+      if (tripal_add_job("Import Tree File: " . $file->filename, 'tripal_phylogeny',
+          'tripal_phylogeny_import_tree_file', $args, $user->uid)) {
+        drupal_set_message(t('The tree visualizations will appear once the tree is fully imported.'));
+      }
+    }
+  }
+
+  return TRUE;
+}
+
+/**
+ * Deletes a phylotree record from Chado.
+ *
+ * @param $phylotree_id
+ *
+ * @return
+ *   TRUE on success, FALSE on failure.
+ */
+function tripal_delete_phylotree($phylotree_id) {
+
+  // if we don't have a phylotree id for this node then this isn't a node of
+  // type chado_phylotree or the entry in the chado_phylotree table was lost.
+  if (!$phylotree_id) {
+    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
+        'Please provide a phylotree_id to delete a tree.');
+    return FALSE;
+  }
+
+  // Remove the tree
+  $values = array('phylotree_id' => $phylotree_id);
+  return chado_delete_record('phylotree', $values);
+}
+
+/**
+ * Iterates through the tree and sets the left and right indicies .
+ *
+ * @param $tree
+ *   The tree array.
+ * @param $index
+ *   This parameters is not used when the function is first called. It
+ *   is used for recursive calls.
+ */
+function tripal_assign_phylogeny_tree_indices(&$tree, &$index = 1) {
+  // Assign a left and right index to each node.  The child node must
+  // have a right and left index less than that of it's parents.  We
+  // increment the index by 100 to give space for new nodes that might
+  // be added later.
+  if (array_key_exists('name', $tree)) {
+    $tree['left_index'] = $index += 100;
+    if (array_key_exists('is_leaf', $tree)) {
+      $tree['right_index'] = $index += 100;
+    }
+  }
+  if (array_key_exists('branch_set', $tree)) {
+    foreach ($tree['branch_set'] as $key => $node) {
+      tripal_assign_phylogeny_tree_indices($tree['branch_set'][$key], $index);
+      $tree['right_index'] = $index += 100;
+    }
+  }
+}
+
+/**
+ * Iterates through the tree array and creates phylonodes in Chado.
+ *
+ * The function iterates through the tree in a top-down approach adding
+ * parent internal nodes prior to leaf nodes.  Each node of the tree should have
+ * the following fields:
+ *
+ *   -name:         The name (or label) for this node.
+ *   -depth:        The depth of the node in the tree.
+ *   -is_root:      Set to 1 if this node is a root node.
+ *   -is_leaf:      Set to 1 if this node is a leaf node.
+ *   -is_internal:  Set to 1 if this node is an internal node.
+ *   -left_index:   The index of the node to the left in the tree.
+ *   -right_index:  The index of the node to the right in the tree.
+ *   -branch_set:   An array containing a list of nodes of that are children
+ *                  of the node.
+ *   -parent:       The name of the parent node.
+ *   -organism_id:  The organism_id for associtating the node with an organism.
+ *   -properties:   An array of key/value pairs where the key is the cvterm_id
+ *                  and the value is the property value.  These properties
+ *                  will be assocaited with the phylonode.
+ *
+ * Prior to importing the tree the indicies can be set by using the
+ * tripal_assign_phylogeny_tree_indices() function.
+ *
+ * @param $tree
+ *   The tree array.
+ * @param $options
+ *   The options provide some direction for how the tree is imported.  The
+ *   following keys can be used:
+ *   -taxonomy:  Set to 1 if this tree is a taxonomic tree. Set to 0
+ *               otherwise.
+ *   -leaf_type: Set to the leaf type name. If this is a non-taxonomic tree
+ *               that is associated with features, then this should be the
+ *               Sequence Ontology term for the feature (e.g. polypeptide).
+ *               If this is a taxonomic tree then this option is not needed.
+ *   -match:     Set to either 'name' or 'uniquename'.  This is used for
+ *               matching the feature name or uniquename with the node name.
+ *               This is not needed for taxonomic trees.
+ *   -match_re:  Set to a regular that can be used for matching the node
+ *               name with the feature name if the node name is not
+ *               identical to the feature name.
+ * @param $vocab
+ *   Optional. An array containing a set of key/value pairs that maps node
+ *   types to CV terms.  The keys must be 'root', 'internal' or 'leaf'.  If
+ *   no vocab is provded then the terms provided by the tripal_phylogeny
+ *   CV will be used.
+ * @param $parent
+ *   This argument is not needed when the funtion is first called. This
+ *   function is recursive and this argument is used on recursive calls.
+ */
+function tripal_phylogeny_import_tree(&$tree, $phylotree, $options, $vocab = array(), $parent = NULL) {
+
+  // Get the vocabulary terms used to describe nodes in the tree if one
+  // wasn't provided.
+  if (count($vocab) == 0) {
+    $vocab = tripal_phylogeny_get_node_types_vocab();
+  }
+
+  if (is_array($tree) and array_key_exists('name', $tree)) {
+    $values = array(
+      'phylotree_id' => $phylotree->phylotree_id,
+      'left_idx'  => $tree['left_index'],
+      'right_idx' => $tree['right_index'],
+    );
+    // Add in any optional values to the $values array if they are present
+    if (!empty($tree['name']) and $tree['name'] != '') {
+      $values['label'] = $tree['name'];
+    }
+    if (!empty($tree['length']) and $tree['length'] != '') {
+      $values['distance'] = $tree['length'];
+    }
+    // Set the type of node
+    if ($tree['is_root']) {
+      $values['type_id'] = $vocab['root']->cvterm_id;
+    }
+    else if ($tree['is_internal']) {
+      $values['type_id'] = $vocab['internal']->cvterm_id;
+      $values['parent_phylonode_id'] = $parent['phylonode_id'];
+      // TOOD: a feature may be associated here but it is recommended that it
+      // be a feature of type SO:match and should represent the alignment of
+      // all features beneath it.
+    }
+    else if ($tree['is_leaf']) {
+      $values['type_id'] = $vocab['leaf']->cvterm_id;
+      $values['parent_phylonode_id'] = $parent['phylonode_id'];
+
+      // Match this leaf node with an organism or feature depending on the
+      // type of tree. But we can't do that if we don't have a name.
+      if (!empty($tree['name']) and $tree['name'] != '') {
+        if (!$options['taxonomy']) {
+
+          // This is a sequence-based tree. Try to match leaf nodes with features.
+          // First, Get the Name and uniquename for the feature
+          $matches = array();
+          $sel_values = array();
+          if ($options['match'] == "name") {
+            $sel_values['name'] = $tree['name'];
+            $re = $options['name_re'];
+            if (preg_match("/$re/", $tree['name'], $matches)) {
+              $sel_values['name'] = $matches[1];
+            }
+          }
+          else {
+            $sel_values['uniquename'] = $tree['name'];
+            $re = $options['name_re'];
+            if (preg_match("/$re/", $tree['name'], $matches)) {
+              $sel_values['uniquename'] = $matches[1];
+            }
+          }
+          $sel_values['type_id'] = array(
+            'name' => $options['leaf_type'],
+            'cv_id' => array(
+              'name' => 'sequence'
+            ),
+          );
+          $sel_columns = array('feature_id');
+          $feature = chado_select_record('feature', $sel_columns, $sel_values);
+          if (count($feature) > 1) {
+            // Found multiple features, cannot make an association.
+          }
+          else if (count($feature) == 1) {
+            $values['feature_id'] = $feature[0]->feature_id;
+          }
+          else {
+            // Could not find a feature that matches the name or uniquename
+          }
+        }
+      }
+    }
+
+    // Insert the new node and then add it's assigned phylonode_id to the node
+    $phylonode = chado_insert_record('phylonode', $values);
+    $tree['phylonode_id'] = $phylonode['phylonode_id'];
+
+    // This is a taxonomic tree, so assocaite this node with an
+    // organism if one is provided.
+    if (array_key_exists('organism_id', $tree)) {
+      $values = array(
+        'phylonode_id' => $tree['phylonode_id'],
+        'organism_id' => $tree['organism_id']
+      );
+      $pylonode_organism = chado_insert_record('phylonode_organism', $values);
+    }
+
+    // Associate any properties
+    if (array_key_exists('properties', $tree)) {
+      foreach ($tree['properties'] as $type_id => $value) {
+        $values = array(
+          'phylonode_id' => $tree['phylonode_id'],
+          'type_id' => $type_id,
+          'value' => $value,
+        );
+        $pylonode_organism = chado_insert_record('phylonodeprop', $values);
+      }
+    }
+  }
+  if (is_array($tree) and array_key_exists('branch_set', $tree)) {
+    foreach ($tree['branch_set'] as $key => $node) {
+      tripal_phylogeny_import_tree($tree['branch_set'][$key], $phylotree, $options, $vocab, $tree);
+    }
+  }
+}
+
+/**
+ *
+ * @return boolean|multitype:Either
+ */
+function tripal_phylogeny_get_node_types_vocab() {
+  // Get the vocabulary terms used to describe nodes in the tree
+  $values = array(
+    'name' => 'phylo_leaf',
+    'cv_id' => array(
+      'name' => 'tripal_phylogeny',
+    ),
+  );
+  $leaf = chado_generate_var('cvterm', $values);
+  if (!$leaf) {
+    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
+        "Could not find the leaf vocabulary term: 'phylo_leaf'. It should " .
+        "already be present as part of the tripal_phylogeny vocabulary.");
+    return FALSE;
+  }
+  $values['name'] = 'phylo_interior';
+  $internal = chado_generate_var('cvterm', $values);
+  if (!$internal) {
+    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
+        "Could not find the leaf vocabulary term: 'phylo_interior'. It should " .
+        "already be present as part of the tripal_phylogeny vocabulary.");
+    return FALSE;
+  }
+  $values['name'] = 'phylo_root';
+  $root = chado_generate_var('cvterm', $values);
+  if (!$root) {
+    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
+        "Could not find the leaf vocabulary term: 'phylo_root'. It should " .
+        "already be present as part of the tripal_phylogeny vocabulary.");
+    return FALSE;
+  }
+  $vocab = array(
+    'leaf' => $leaf,
+    'internal' => $internal,
+    'root' => $root,
+  );
+  return $vocab;
+}
+
+
+/**
+ * Imports a tree file.
+ *
+ * This function is used as a wrapper for loading a phylogenetic tree using
+ * any number of file loaders.
+ *
+ * @param $file_name
+ *   The name of the file containing the phylogenetic tree to import.
+ * @param $format
+ *   The format of the file. Currently only the 'newick' file format is
+ *   supported.
+ * @param $options
+ *   Options if the phylotree record already exists:
+ *     'phylotree_id': The imported nodes will be associated with this tree.
+ *     'leaf_type':  A sequence ontology term or the word 'organism'. If the
+ *                   type is 'organism' then this tree represents a
+ *                   taxonomic tree.  The default, if not specified, is the
+ *                   term 'polypeptide'.
+ *     'name_re':    If the leaf type is NOT 'taxonomy', then the value of
+ *                   this field can be a regular expression to pull out
+ *                   the name of the feature from the node label in the
+ *                   intput tree. If no value is provided the entire label is
+ *                   used.
+ *     'match':      Set to 'uniquename' if the leaf nodes should be matched
+ *                   with the feature uniquename.
+ *
+ */
+function tripal_phylogeny_import_tree_file($file_name, $format, $options = array(), $job_id = NULL) {
+
+  // Set some option details.
+  if (!array_key_exists('leaf_type', $options)) {
+    $options['leaf_type'] = 'polypeptide';
+  }
+  if (!array_key_exists('match', $options)) {
+    $options['match'] = 'name';
+  }
+  if (!array_key_exists('name_re', $options)) {
+    $options['name_re'] = '^(.*)$';
+  }
+  $options['name_re'] = trim($options['name_re']);
+
+  // If a phylotree ID is not passed in then make sure we have the other
+  // required fields for creating a tree.
+  if (!array_key_exists('phylotree_id', $options)) {
+    if (!array_key_exists('name', $options)) {
+      tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
+          'The phylotree_id is required for importing the tree.');
+      return FALSE;
+    }
+  }
+
+  // get the phylotree record.
+  $values = array('phylotree_id' => $options['phylotree_id']);
+  $phylotree = chado_generate_var('phylotree', $values);
+
+  if (!$phylotree) {
+    tripal_report_error('tripal_phylogeny', TRIPAL_ERROR,
+        'Could not find the phylotree using the ID provided: %phylotree_id.',
+        array('%phylotree_id' => $options['phylotree_id']));
+    return FALSE;
+  }
+
+  $transaction = db_transaction();
+  print "\nNOTE: Loading of this tree file is performed using a database transaction. \n" .
+      "If the load fails or is terminated prematurely then the entire set of \n" .
+      "insertions/updates is rolled back and will not be found in the database\n\n";
+  try {
+
+    // Parse the file according to the format indicated.
+    if ($format == 'newick') {
+      // Parse the tree into the expected nested node format.
+      module_load_include('inc', 'tripal_phylogeny', 'includes/parsers/tripal_phylogeny.newick_parser');
+      $tree = tripal_phylogeny_parse_newick_file($file_name);
+
+      // Assign the right and left indecies to the tree ndoes
+      tripal_assign_phylogeny_tree_indices($tree);
+    }
+    // Iterate through the tree nodes and add them to Chado in accordance
+    // with the details in the $options array.
+    tripal_phylogeny_import_tree($tree, $phylotree, $options);
+  }
+  catch (Exception $e) {
+    $transaction->rollback();
+    watchdog_exception('tripal_phylogeny', $e);
+    print "\nFAILED: Rolling back database changes...\n";
+  }
+  print "\nDone Importing Tree.\n";
+}

+ 3 - 2
tripal_chado/includes/TripalFields/sbo__database_cross_reference/sbo__database_cross_reference_widget.inc

@@ -222,12 +222,13 @@ class sbo__database_cross_reference_widget extends ChadoFieldWidget {
  */
 function sbo__database_cross_reference_widget_form_ajax_callback($form, $form_state) {
 
+  $instance = $form['#instance'];
   $field_name = $form_state['triggering_element']['#parents'][0];
   $delta = $form_state['triggering_element']['#parents'][2];
   $field = field_info_field($field_name);
   $field_type = $field['type'];
-  $field_table = $this->instance['settings']['chado_table'];
-  $field_column = $this->instance['settings']['chado_column'];
+  $field_table = $instance['settings']['chado_table'];
+  $field_column = $instance['settings']['chado_column'];
   $field_prefix = 'chado-' . $field_table . '__dbxref_id';
 
   // Check to see if this dbxref already exists. If not then

+ 331 - 0
tripal_chado/includes/loaders/tripal_chado.phylotree_newick.inc

@@ -0,0 +1,331 @@
+<?php
+
+/**
+ * This code parses the grammer for the Newick format as per the following
+ * grammar:
+ *
+ *  Tree --> Subtree ";" | Branch ";"
+ *  Subtree --> Leaf | Internal
+ *  Leaf --> Name
+ *  Internal --> "(" BranchSet ")" Name
+ *  BranchSet --> Branch | BranchSet "," Branch
+ *  Branch --> Subtree Length
+ *  Name --> empty | string
+ *  Length --> empty | ":" number
+ *
+ */
+
+/**
+ *
+ * @param unknown $file_name
+ */
+function tripal_phylogeny_parse_newick_file($file_name) {
+
+  // Initialize the bootstrap value and index
+  global $tripal_phylogeny_bootstrap;
+
+  $tripal_phylogeny_bootstrap = 1;
+  $tripal_phylogeny_index = 1;
+
+  $tree = array();
+
+  $fp = fopen($file_name, 'r');
+  if ($fp) {
+    $tree = tripal_phylogeny_parse_newick_tree($fp);
+  }
+  else {
+    // ERROR
+  }
+  return $tree;
+}
+/**
+ *
+ * @param unknown $fp
+ * @param number $depth
+ * @return boolean
+ */
+function tripal_phylogeny_parse_newick_tree($fp, $depth = 0) {
+
+  $subtree = tripal_phylogeny_parse_newick_subtree($fp, $depth);
+  $subtree['is_root'] = 1;
+
+  // this subtree may also be a branch. A branch is a subtree with a length,
+  // so see if there is a length
+  $token = tripal_phylogeny_parse_newick_get_token($fp);
+  if ($token == ";") {
+    // we're done!
+    return $subtree;
+  }
+  tripal_phylogeny_parse_newick_replace_token($fp);
+
+  // Get the length.
+  $length = tripal_phylogeny_parse_newick_length($fp, $depth);
+  $subtree['length'] = $length;
+
+  // Now if we're missing the semicolon we have a syntax error.
+  $token = tripal_phylogeny_parse_newick_get_token($fp);
+  if ($token != ';') {
+    print "Syntax Error: missing trailing semicolon.\n";
+    exit;
+  }
+
+  return $subtree;
+}
+
+/**
+ *
+ * @param unknown $fp
+ * @param unknown $depth
+ * @return Ambigous|unknown
+ */
+function tripal_phylogeny_parse_newick_subtree($fp, $depth) {
+
+  $internal = tripal_phylogeny_parse_newick_internal($fp, $depth + 1);
+  if (!is_array($internal)) {
+    $leaf_node = tripal_phylogeny_parse_newick_leaf($fp, $depth);
+    return array(
+      'name' => $leaf_node,
+      'depth' => $depth,
+      'is_leaf' => TRUE,
+      'descendents' => 0,
+    );
+  }
+  else {
+    $internal['depth'] = $depth;
+  }
+  return $internal;
+}
+
+/**
+ *
+ * @param unknown $fp
+ * @param unknown $depth
+ * @return boolean|multitype:unknown Ambigous <Ambigous, unknown>
+ */
+function tripal_phylogeny_parse_newick_branch($fp, $depth) {
+
+  $subtree = tripal_phylogeny_parse_newick_subtree($fp, $depth);
+  $length = tripal_phylogeny_parse_newick_length($fp, $depth);
+
+  $subtree['length'] = $length;
+  return $subtree;
+}
+/**
+ *
+ * @param unknown $fp
+ * @param unknown $parent
+ * @param unknown $depth
+ */
+function tripal_phylogeny_parse_newick_internal($fp, $depth) {
+
+  // If the next character is not an open paren then this is an internal node
+  if (tripal_phylogeny_parse_newick_get_token($fp) != '(') {
+    tripal_phylogeny_parse_newick_replace_token($fp);
+    return FALSE;
+  }
+
+  $branches = tripal_phylogeny_parse_newick_branchset($fp, $depth);
+  if (!is_array($branches)) {
+    return FALSE;
+  }
+  // If we don't have a closing parent then this is a syntax error.
+  if (tripal_phylogeny_parse_newick_get_token($fp) != ')') {
+    tripal_phylogeny_parse_newick_replace_token($fp);
+    return FALSE;
+  }
+  $internal_node = tripal_phylogeny_parse_newick_name($fp, $depth);
+  $descendent_count = 0;
+  for ($i = 0; $i < count($branches); $i++) {
+    $branches[$i]['parent'] = $internal_node;
+    $descendent_count += 1 + $branches[$i]['descendents'];
+  }
+
+  return array(
+    'name' => $internal_node,
+    'depth' => $depth,
+    'branch_set' => $branches,
+    'is_internal' => TRUE,
+    'descendents' => $descendent_count,
+  );
+}
+
+/**
+ *
+ * @param unknown $fp
+ * @param unknown $parent
+ * @param unknown $depth
+ */
+function tripal_phylogeny_parse_newick_branchset($fp, $depth) {
+  $branches = array();
+
+  $num_read = 0;
+  $branch = tripal_phylogeny_parse_newick_branch($fp, $depth);
+  $branches[] = $branch;
+
+  // If it's not a branch then return false, a branchset will
+  // always appear as a branch.
+  if (!is_array($branch)) {
+    return FALSE;
+  }
+
+  // If we have a comma as the next token then this is
+  // a branchset and we should recurse.
+  $token = tripal_phylogeny_parse_newick_get_token($fp);
+  if ($token == ',') {
+    $rbranches = tripal_phylogeny_parse_newick_branchset($fp, $depth);
+    foreach ($rbranches as $branch) {
+      $branches[] = $branch;
+    }
+  }
+  else {
+    tripal_phylogeny_parse_newick_replace_token($fp);
+  }
+
+  return $branches;
+}
+/**
+ *
+ * @param unknown $fp
+ * @param unknown $depth
+ * @return Ambigous <string, boolean, unknown>
+ */
+function tripal_phylogeny_parse_newick_leaf($fp, $depth) {
+  return tripal_phylogeny_parse_newick_name($fp, $depth);
+}
+/**
+ *
+ * @param unknown $fp
+ * @param unknown $depth
+ * @return string|boolean|Ambigous <string, unknown>
+ */
+function tripal_phylogeny_parse_newick_name($fp, $depth) {
+  global $tripal_phylogeny_bootstrap;
+
+  $token = tripal_phylogeny_parse_newick_get_token($fp);
+
+  // If the next token is a colon, semicolon, close paren, or comma
+  // then the name is empty.
+  if ($token == ':' or $token == ',' or $token == ';' or $token == ')') {
+    tripal_phylogeny_parse_newick_replace_token($fp);
+    // create a bootstrap value
+    return $tripal_phylogeny_bootstrap++;
+  }
+
+  // If the next token is an open paren then this is a syntax error:
+  if ($token == '(') {
+    tripal_phylogeny_parse_newick_replace_token($fp);
+    return FALSE;
+  }
+  return $token;
+}
+/**
+ *
+ * @param unknown $fp
+ * @param unknown $depth
+ * @return string|boolean|unknown
+ */
+function tripal_phylogeny_parse_newick_length($fp, $depth) {
+  $length  = '';
+
+  $token = tripal_phylogeny_parse_newick_get_token($fp);
+
+  // If the next token is a semicolon, close paren, or comma
+  // then the length is empty.
+  if ($token == ',' or $token == ';' or $token == ')') {
+    tripal_phylogeny_parse_newick_replace_token($fp);
+    return '';
+  }
+
+  // If the next token is a colon then we are parsing the length.
+  // Otherwise we are not.
+  if ($token != ':') {
+    tripal_phylogeny_parse_newick_replace_token($fp);
+    return FALSE;
+  }
+
+  // Now get the length.
+  $token = tripal_phylogeny_parse_newick_get_token($fp);
+
+  // If the next token is an open paren then this is a syntax error:
+  if ($token == '(') {
+    exit();
+  }
+
+  return $token;
+}
+
+/**
+ *
+ * @param unknown $fp
+ * @return string
+ */
+function tripal_phylogeny_parse_newick_get_token($fp) {
+
+  // Keep track of the file position that we start with
+  global $tripal_phylogeny_fp_pos;
+  $tripal_phylogeny_fp_pos = ftell($fp);
+
+  $token = '';
+  $in_quote = FALSE;
+  $num_read = 0;
+
+  $c = fgetc($fp);
+  while (!feof($fp)) {
+    $num_read++;
+
+    switch ($c) {
+      // If the first character is a reserved character and we
+      // we have not encountered any other charcters then return
+      // it as the token. Otherwise, return the collected token.
+      case ';':
+      case '(':
+      case ')':
+      case ',':
+      case ':':
+        if (!$token) {
+          return $c;
+        }
+        else {
+          // put the character back and return the token
+          fseek($fp, $tripal_phylogeny_fp_pos + $num_read - 1);
+          return $token;
+        }
+
+        break;
+        // Quotes are allowed around names and if a name is in
+        // quotes then allow spaces. Otherwise, spaces are ignored.
+      case '\'':
+      case '"':
+        if (!$in_quote) {
+          $in_quote = TRUE;
+        }
+        else {
+          $in_quote = FALSE;
+        }
+        break;
+      case " ":
+      case "\t":
+      case "\r":
+      case "\n":
+        if ($in_quote) {
+          $token .= $c;
+        }
+        break;
+        // All other characters get saved as the token
+      default:
+        $token .= $c;
+    }
+    $c = fgetc($fp);
+  }
+  return $token;
+}
+/**
+ *
+ * @param unknown $fp
+ */
+function tripal_phylogeny_parse_newick_replace_token($fp) {
+  global $tripal_phylogeny_fp_pos;
+
+  fseek($fp, $tripal_phylogeny_fp_pos);
+  $tripal_phylogeny_fp_pos = ftell($fp);
+}

+ 367 - 0
tripal_chado/includes/loaders/tripal_chado.taxonomy_importer.inc

@@ -0,0 +1,367 @@
+<?php
+
+/**
+ *
+ */
+function tripal_chado_taxonomy_load_form($form, &$form_state) {
+  $form['instructions'] = array(
+    '#type' => 'item',
+    '#markup' => '',
+  );
+
+  $form['import_existing'] = array(
+    '#type' => 'checkbox',
+    '#title' => 'Import taxonomy for existing species.',
+    '#description' =>  t('The NCBI Taxonmic Importer examines the organisms
+      currently present in the database and queries NCBI for the
+      taxonomic details.  If the importer is able to match the
+      genus and species with NCBI the species details will be imported,
+      and a page containing the taxonomic tree will be created.'),
+  );
+
+  $form['submit'] = array(
+    '#type' => 'submit',
+    '#name' => 'import',
+    '#value' => 'Submit',
+  );
+  return $form;
+}
+
+/**
+ *
+ * @param unknown $form
+ * @param unknown $form_state
+ */
+function tripal_chado_taxonomy_load_form_validate($form, &$form_state) {
+  global $user;
+
+  if (!$form_state['values']['import_existing']) {
+    form_set_error('import_exists', 'Please confirm the import by clicking the checkbox.');
+  }
+}
+
+/**
+ *
+ * @param unknown $form
+ * @param unknown $form_state
+ */
+function tripal_chado_taxonomy_load_form_submit($form, &$form_state) {
+  global $user;
+
+  if ($form_state['values']['import_existing']) {
+    $args = array();
+    $includes = array();
+    $includes[] = module_load_include('inc', 'tripal_chado', 'includes/loaders/tripal_chado.taxonomy_importer');
+    tripal_add_job("Import NCBI Taxonomy", 'tripal_chado',
+      'tripal_chado_ncbi_taxonomy_import', $args, $user->uid, 10, $includes);
+  }
+}
+
+/**
+ *
+ * @param unknown $job_id
+ */
+function tripal_chado_ncbi_taxonomy_import($job_id) {
+
+  print "\nNOTE: Importing of NCBI taxonomy data is performed using a database transaction. \n" .
+    "If the load fails or is terminated prematurely then the entire set of \n" .
+    "insertions/updates is rolled back and will not be found in the database\n\n";
+
+  $transaction = db_transaction();
+  try {
+    // TDDO: there should be an API function named tripal_insert_analysis().
+    // But until then we have to insert the analysis manually.
+    // Get the version of this module for the analysis record:
+    $info = system_get_info('module', 'tripal_chado');
+    $version = $info['version'];
+    $analysis_name = 'NCBI Taxonomy Tree Import';
+
+    // If the analysis record already exists then don't add it again.
+    $analysis = chado_select_record('analysis', array('*'), array('name' => $analysis_name));
+    if (count($analysis) == 0) {
+      $values = array(
+        'name' => 'NCBI Taxonomy Tree Import',
+        'description' => 'Used to import NCBI taxonomy details for organisms in this database.',
+        'program' => 'Tripal Phylogeny Module NCBI Taxonomy Importer',
+        'programversion' => $version,
+        'sourcename' => 'NCBI Taxonomy',
+        'sourceuri' => 'http://www.ncbi.nlm.nih.gov/taxonomy',
+      );
+      $analysis = chado_insert_record('analysis', $values);
+      if (!$analysis) {
+        throw new Exception("Cannot add NCBI Taxonomy Tree Import Analysis.");
+      }
+    }
+    else {
+      $analysis = $analysis[0];
+    }
+
+    // If the tree already exists then don't insert it again.
+    global $site_name;
+    $tree_name = $site_name . 'Taxonomy Tree';
+    $phylotree = chado_select_record('phylotree', array('*'), array('name' => $tree_name));
+    if (count($phylotree) == 0) {
+      // Add the taxonomic tree.
+      $options = array(
+        'name' =>  $site_name . 'Taxonomy Tree',
+        'description' => 'The taxonomic tree of species present on this site. Click a species name for more details.',
+        'leaf_type' => 'taxonomy',
+        'analysis_id' => $analysis->analysis_id,
+        'tree_file' => '/dev/null',
+        'format' => 'taxonomy',
+        'no_load' => TRUE,
+      );
+      $errors = array();
+      $warnings = array();
+      $success = tripal_insert_phylotree($options, $errors, $warnings);
+      if (!$success) {
+        throw new Exception("Cannot add the Taxonomy Tree record.");
+      }
+      $phylotree = (object) $options;
+    }
+    else {
+      $phylotree = $phylotree[0];
+    }
+
+    // Clean out the phylotree in the event this is a reload
+    chado_delete_record('phylonode', array('phylotree_id' => $phylotree->phylotree_id));
+
+    // The taxonomic tree must have a root, so create that first.
+    $tree = array(
+      'name' => 'root',
+      'depth' => 0,
+      'is_root' => 1,
+      'is_leaf' => 0,
+      'is_internal' => 0,
+      'left_index' => 0,
+      'right_index' => 0,
+      'branch_set' => array(),
+    );
+
+    // Get the "rank" cvterm. It requires that the TAXRANK vocabulary is loaded.
+    $rank_cvterm = tripal_get_cvterm(array(
+      'name' => 'rank',
+      'cv_id' => array('name' => 'local')
+    ));
+
+    // Get the list of organisms
+    $sql = "SELECT O.* FROM {organism} O";
+    $organisms = chado_query($sql);
+    while ($organism = $organisms->fetchObject()) {
+      // Build the query string to get the information about this species.
+      $term = $organism->genus . ' ' . $organism->species;
+      $term = urlencode($term);
+      $search_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?".
+        "db=taxonomy" .
+        "&term=$term";
+
+      // Get the search response from NCBI.
+      $rfh = fopen($search_url, "r");
+      $xml_text = '';
+      while (!feof($rfh)) {
+        $xml_text .= fread($rfh, 255);
+      }
+      fclose($rfh);
+
+      // Parse the XML to get the taxonomy ID
+      $xml = new SimpleXMLElement($xml_text);
+      if ($xml) {
+        $taxid = (string) $xml->IdList->Id;
+        if ($taxid) {
+          print "$taxid\t$organism->genus $organism->species\n";
+          // If we have a taxonomy ID we can now get the details.
+          $fetch_url = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?".
+            "db=taxonomy" .
+            "&id=$taxid";
+          // Get the search response from NCBI.
+          $rfh = fopen($fetch_url, "r");
+          $xml_text = '';
+          while (!feof($rfh)) {
+            $xml_text .= fread($rfh, 255);
+          }
+          fclose($rfh);
+
+          $xml = new SimpleXMLElement($xml_text);
+          if ($xml) {
+            $taxon = $xml->Taxon;
+
+            // Add in the organism properties
+            $lineage = (string) $taxon->Lineage;
+            tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'lineage', $lineage);
+
+            $genetic_code = (string) $taxon->GeneticCode->GCId;
+            tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'genetic_code', $genetic_code);
+
+            $genetic_code_name = (string) $taxon->GeneticCode->GCName;
+            tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'genetic_code_name', $genetic_code_name);
+
+            $mito_genetic_code = (string) $taxon->MitoGeneticCode->MGCId;
+            tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'mitochondrial_genetic_code', $mito_genetic_code);
+
+            $mito_genetic_code_name = (string) $taxon->MitoGeneticCode->MGCName;
+            tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'mitochondrial_genetic_code_name', $mito_genetic_code_name);
+
+            $division = (string) $taxon->Division;
+            tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'division', $division);
+
+            $name_ranks = array();
+            foreach ($taxon->OtherNames->children() as $child) {
+              $type = $child->getName();
+              $name = (string) $child;
+              if (!array_key_exists($type, $name_ranks)) {
+                $name_ranks[$type] = 0;
+              }
+              switch ($type) {
+                case 'GenbankCommonName':
+                  tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'genbank_common_name', $name, $name_ranks[$type]);
+                  break;
+                case 'Synonym':
+                  tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'synonym', $name, $name_ranks[$type]);
+                  break;
+                case 'CommonName':
+                case 'Includes':
+                  tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'other_name', $name, $name_ranks[$type]);
+                  break;
+                case 'EquivalentName':
+                  tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'equivalent_name', $name, $name_ranks[$type]);
+                  break;
+                case 'Anamorph':
+                  tripal_chado_taxonomy_add_organism_property($organism->organism_id, 'anamorph', $name, $name_ranks[$type]);
+                  break;
+                case 'Name':
+                  // skip the Name stanza
+                  break;
+                default:
+                  print "NOTICE: Skipping unrecognzed name type: $type\n";
+                  // do nothing for unrecognized types
+              }
+              $name_ranks[$type]++;
+            }
+
+            // Generate a nested array structure that can be used for importing the tree.
+            $parent = (string) $taxon->ParentTaxId;
+            $rank = (string) $taxon->Rank;
+            $sci_name = (string) $taxon->ScientificName;
+            $lineage_depth = preg_split('/;\s*/', $lineage);
+            $parent = $tree;
+            $i = 1;
+            foreach ($taxon->LineageEx->children() as $child) {
+              $tid = (string) $child->TaxID;
+              $name = (string) $child->ScientificName;
+              $node_rank = (string) $child->Rank;
+              $node = array(
+                'name' => $name,
+                'depth' => $i,
+                'is_root' => 0,
+                'is_leaf' => 0,
+                'is_internal' => 1,
+                'left_index' => 0,
+                'right_index' => 0,
+                'parent' => $parent,
+                'branch_set' => array(),
+                'parent' => $parent['name'],
+                'properties' => array(
+                  $rank_cvterm->cvterm_id => $node_rank,
+                ),
+              );
+              $parent = $node;
+              tripal_chado_taxonomy_import_add_node($tree, $node, $lineage_depth);
+              $i++;
+            }
+            // Now add in the leaf node
+            $node = array(
+              'name' => $sci_name,
+              'depth' => $i,
+              'is_root' => 0,
+              'is_leaf' => 1,
+              'is_internal' => 0,
+              'left_index' => 0,
+              'right_index' => 0,
+              'parent' => $parent['name'],
+              'organism_id' => $organism->organism_id,
+              'properties' => array(
+                $rank_cvterm->cvterm_id => $rank,
+              ),
+            );
+            tripal_chado_taxonomy_import_add_node($tree, $node, $lineage_depth);
+
+            // Set the indecies for the tree.
+            tripal_assign_phylogeny_tree_indices($tree);
+          } // end: if ($xml) { ...
+        } // end: if ($taxid) { ...
+      } // end: if ($xml) { ...
+    } // end: while ($organism = $organisms->fetchObject()) { ...
+    // print json_encode(($tree));
+
+    // Now add the tree
+    $options = array('taxonomy' => 1);
+    tripal_phylogeny_import_tree($tree, $phylotree, $options);
+  }
+  catch (Exception $e) {
+    $transaction->rollback();
+    print "\n"; // make sure we start errors on new line
+    watchdog_exception('tripal_chado', $e);
+    print "FAILED: Rolling back database changes...\n";
+  }
+}
+
+/**
+ *
+ * @param unknown $node
+ */
+function tripal_chado_taxonomy_import_add_node(&$tree, $node, $lineage_depth) {
+
+   // Get the branch set for the tree root.
+   $branch_set = &$tree['branch_set'];
+
+   // Iterate through the tree up until the depth where this node will
+   // be placed.
+   $node_depth = $node['depth'];
+   for ($i = 1; $i <= $node_depth; $i++) {
+     // Iterate through any existing nodes in the branch set to see if
+     // the node name matches the correct name for the lineage at this
+     // depth. If it matches then it is inside of this branch set that
+     // we will place the node.
+     for ($j = 0; $j < count($branch_set); $j++) {
+       // If this node already exists in the tree then return.
+       if ($branch_set[$j]['name'] == $node['name'] and
+           $branch_set[$j]['depth'] = $node['depth']) {
+         return;
+       }
+       // Otherwise, set the branch to be the current branch and continue.
+       if ($branch_set[$j]['name'] == $lineage_depth[$i-1]) {
+         $branch_set = &$branch_set[$j]['branch_set'];
+         break;
+       }
+     }
+   }
+   // Add the node to the last branch set.  This should be where this node goes.
+   $branch_set[] = $node;
+}
+
+/**
+ *
+ * @param unknown $organism_id
+ * @param unknown $term_name
+ * @param unknown $value
+ */
+function tripal_chado_taxonomy_add_organism_property($organism_id, $term_name, $value, $rank = 0) {
+  if (!$value) {
+    return;
+  }
+
+  $record = array(
+    'table' => 'organism',
+    'id' => $organism_id
+  );
+  $property = array(
+    'type_name' => $term_name,
+    'cv_name' => organism_property,
+    'value' => $value
+  );
+  // Delete all properties of this type if the rank is zero.
+  if ($rank == 0) {
+    chado_delete_property($record, $property);
+  }
+  chado_insert_property($record, $property);
+}

+ 4 - 0
tripal_chado/includes/tripal_chado.fields.inc

@@ -82,6 +82,7 @@ function tripal_chado_bundle_create_fields_base(&$info, $details, $entity_type,
       continue;
     }
     $field_name = strtolower($cvterm->dbxref_id->db_id->name . '__' . preg_replace('/ /', '_', $cvterm->name));
+    $field_name = substr($field_name, 0, 32);
 
     // Skip the primary key field.
     if ($column_name == $schema['primary key'][0]) {
@@ -441,6 +442,7 @@ function tripal_chado_bundle_create_fields_linker(&$info, $details, $entity_type
     while ($prop = $props->fetchObject()) {
       $term = chado_generate_var('cvterm', array('cvterm_id' => $prop->type_id));
       $field_name = strtolower(preg_replace('/[^\w]/','_', $term->dbxref_id->db_id->name . '__' . $term->name));
+      $field_name = substr($field_name, 0, 32);
       $field_type = 'chado_linker__prop';
       $info[$field_name] = array(
         'field_name' => $field_name,
@@ -579,6 +581,7 @@ function tripal_chado_bundle_create_instances_base(&$info, $entity_type, $bundle
     }
 
     $field_name = strtolower($cvterm->dbxref_id->db_id->name . '__' . preg_replace('/ /', '_', $cvterm->name));
+    $field_name = substr($field_name, 0, 32);
 
     // Skip the primary key field.
     if ($column_name == $schema['primary key'][0]) {
@@ -1301,6 +1304,7 @@ function tripal_chado_bundle_create_instances_linker(&$info, $entity_type, $bund
      while ($prop = $props->fetchObject()) {
        $term = chado_generate_var('cvterm', array('cvterm_id' => $prop->type_id));
        $field_name = strtolower(preg_replace('/[^\w]/','_', $term->dbxref_id->db_id->name . '__' . $term->name));
+       $field_name = substr($field_name, 0, 32);
        $info[$field_name] = array(
          'field_name' => $field_name,
          'entity_type' => $entity_type,

+ 88 - 0
tripal_chado/includes/tripal_chado.semweb.inc

@@ -489,6 +489,94 @@ function tripal_chado_populate_vocab_LOCAL() {
   tripal_associate_chado_semweb_term('pub_relationship', 'type_id', $term);
   tripal_associate_chado_semweb_term('quantification_relationship', 'type_id', $term);
   tripal_associate_chado_semweb_term('stock_relationship', 'type_id', $term);
+
+  $term = tripal_insert_cvterm(array(
+    'id' => 'locak:rank',
+    'name' => 'rank',
+    'definition' => 'A taxonmic rank',
+    'cv_name' => 'local',
+  ));
+
+
+  tripal_insert_cv(
+    'organism_property',
+    'Contains locally defined properties for organisms'
+  );
+  $terms = array(
+    'lineage',
+    'genetic_code',
+    'genetic_code_name',
+    'mitochondrial_genetic_code',
+    'mitochondrial_genetic_code_name',
+    'division',
+    'genbank_common_name',
+    'synonym',
+    'other_name',
+    'equivalent_name',
+    'anamorph'
+  );
+  $options = array('update_existing' => TRUE);
+  foreach ($terms as $term) {
+    $value = array(
+      'name' => $term,
+      'definition' => '',
+      'cv_name' => 'organism_property',
+      'is_relationship' => 0,
+      'db_name' => 'local'
+    );
+    tripal_insert_cvterm($value, $options);
+  }
+
+
+  tripal_insert_cv(
+      'tripal_phylogeny',
+      'Terms used by the Tripal phylotree module for phylogenetic and taxonomic trees.'
+  );
+
+  // Add the terms used to identify nodes in the tree.
+  tripal_insert_cvterm(
+      array(
+        'name' => 'phylo_leaf',
+        'definition' => 'A leaf node in a phylogenetic tree.',
+        'cv_name' => 'tripal_phylogeny',
+        'is_relationship' => 0,
+        'db_name' => 'local'
+      ),
+      array('update_existing' => TRUE)
+  );
+  // Add the terms used to identify nodes in the tree.
+  tripal_insert_cvterm(
+      array(
+        'name' => 'phylo_root',
+        'definition' => 'The root node of a phylogenetic tree.',
+        'cv_name' => 'tripal_phylogeny',
+        'is_relationship' => 0,
+        'db_name' => 'local'
+      ),
+      array('update_existing' => TRUE)
+  );
+  // Add the terms used to identify nodes in the tree.
+  tripal_insert_cvterm(
+      array(
+        'name' => 'phylo_interior',
+        'definition' => 'An interior node in a phylogenetic tree.',
+        'cv_name' => 'tripal_phylogeny',
+        'is_relationship' => 0,
+        'db_name' => 'local'
+      ),
+      array('update_existing' => TRUE)
+  );
+  // Add the terms used to identify nodes in the tree.
+  tripal_insert_cvterm(
+      array(
+        'name' => 'taxonomy',
+        'definition' => 'A term used to indicate if a phylotree is a taxonomic tree',
+        'cv_name' => 'tripal_phylogeny',
+        'is_relationship' => 0,
+        'db_name' => 'local'
+      ),
+      array('update_existing' => TRUE)
+  );
 }
 /**
  * Adds the Systems Biology Ontology database and terms.

+ 23 - 0
tripal_chado/tripal_chado.module

@@ -27,6 +27,7 @@ require_once 'api/modules/tripal_chado.feature.api.inc';
 require_once 'api/modules/tripal_chado.organism.api.inc';
 require_once 'api/modules/tripal_chado.pub.api.inc';
 require_once 'api/modules/tripal_chado.stock.api.inc';
+require_once 'api/modules/tripal_chado.phylotree.api.inc';
 
 
 //
@@ -377,6 +378,27 @@ function tripal_chado_menu() {
     'file path' => drupal_get_path('module', 'tripal_chado'),
     'type' => MENU_NORMAL_ITEM,
   );
+  $items['admin/tripal/storage/chado/loaders/ncbi_taxonomy_loader'] = array(
+    'title' => 'NCBI Taxonomy Loader',
+    'description' => 'Loads taxonomic details about installed organisms.',
+    'page callback' => 'drupal_get_form',
+    'page arguments' => array('tripal_chado_taxonomy_load_form'),
+    'access arguments' => array('administer tripal phylotree'),
+    'file' => 'includes/loaders/tripal_chado.taxonomy_importer.inc',
+    'file path' => drupal_get_path('module', 'tripal_chado'),
+    'type' => MENU_NORMAL_ITEM,
+  );
+
+  // Data Loaders
+  $items['admin/tripal/storage/chado/loaders/newic_phylotree_loader'] = array(
+    'title' => 'Phylogenetic Trees (Newic format)',
+    'description' => 'Loads phylogenetic trees in Newic format.',
+    'page callback' => 'drupal_goto',
+    'page arguments' => array('node/add/chado-phylotree'),
+    'access arguments' => array('administer tripal phylotree'),
+    'type' => MENU_NORMAL_ITEM,
+  );
+
   $items['admin/tripal/storage/chado/loaders/pub'] = array(
     'title' => t('Publication Importers'),
     'description' => t('Create and modify importers that can connect to and retreive publications from remote databases.'),
@@ -460,6 +482,7 @@ function tripal_chado_menu() {
     'file path' => drupal_get_path('module', 'tripal_chado'),
   );
 
+
   //////////////////////////////////////////////////////////////////////////////
   //                           Migrate Content
   //////////////////////////////////////////////////////////////////////////////