Browse Source

Merge branch '7.x-2.x' of git.drupal.org:project/tripal into 7.x-2.x

Lacey Sanderson 9 years ago
parent
commit
0d02183154

+ 43 - 1
tripal_analysis/api/tripal_analysis.api.inc

@@ -108,7 +108,7 @@ function tripal_get_analysis($identifier, $options) {
     $property = $identifiers['property'];
     unset($identifiers['property']);
     $analysis = chado_get_record_with_property(
-      array('table' => 'analysis', 'base_records' => $identifiers), 
+      array('table' => 'analysis', 'base_records' => $identifiers),
       array('type_name' => $property)
     );
   }
@@ -154,4 +154,46 @@ function tripal_get_analysis($identifier, $options) {
   else {
     return $analysis;
   }
+}
+/**
+ * Returns a list of analyses that are currently synced with Drupal to use in select lists
+ *
+ * @param $syncd_only
+ *   Whether or not to return all chado analyses or just those sync'd with drupal. Defaults
+ *   to TRUE (only sync'd analyses)
+ * @return
+ *   An array of analyses sync'd with Drupal where each value is the analysis scientific
+ *   name and the keys are analysis_id's
+ *
+ * @ingroup tripal_analysis_api
+ */
+function tripal_get_analysis_select_options($syncd_only = TRUE) {
+  $analysis_list = array();
+  $analysis_list[] = 'Select an analysis';
+
+  if ($syncd_only) {
+    $sql = "
+      SELECT *
+      FROM public.chado_analysis CA
+        INNER JOIN {analysis} A ON A.analysis_id = CO.analysis_id
+      ORDER BY A.name
+    ";
+    $orgs = chado_query($sql);
+
+    // iterate through the analyses and build an array of those that are synced
+    foreach ($analyses as $analysis) {
+      $analysis_list[$analysis->analysis_id] = $analysis->name;
+    }
+  }
+  else {
+    // use this SQL statement for getting the analyses
+    $csql =  "SELECT * FROM {analysis} ORDER BY name";
+    $analyses = chado_query($csql);
+
+    // iterate through the analyses and build an array of those that are synced
+    foreach ($analyses as $analysis) {
+      $analysis_list[$analysis->analysis_id] = $analysis->name;
+    }
+  }
+  return $analysis_list;
 }

+ 159 - 0
tripal_bulk_loader/api/tripal_bulk_loader.api.templates.inc

@@ -14,6 +14,165 @@
  * @}
  */
 
+/**
+ * Validates an $options array for insert or update of a bulk loader record.
+ *
+ * @param $val_type
+ *   The type of validation. Can be either 'insert' or 'update'.
+ * @param $options
+ *   An array of key/value pairs containing the following keys:
+ *     'template_name':   The name of the template.
+ *     'template_array':  The JSON array representing the template.
+ *   Optional:
+ *     'strict':          If set then only JSON formatted templates are allowed.
+ * @param $errors
+ *   An empty array where validation error messages will be set. The keys
+ *   of the array will be name of the field from the options array and the
+ *   value is the error message.
+ * @param $warnings
+ *   An empty array where validation warning messagges will be set. The
+ *   warnings should not stop an insert or an update but should be provided
+ *   to the user as information by a drupal_set_message() if appropriate. The
+ *   keys of the array will be name of the field from the options array and the
+ *   value is the error message.
+ *
+ * @return
+ *   If validation failes then FALSE is returned.  Any options that do not pass
+ *   validation checks will be added in the $errors array with the key being
+ *   the option and the value being the error message.  If validation
+ *   is successful then TRUE is returned.
+ *
+ */
+function tripal_validate_bulk_loader_template($val_type, &$options, &$errors, &$warnings = array()) {
+  $template_array = trim($options['template_array']);
+  $template_name = trim($options['template_name']);
+  $strict = array_key_exists('strict', $options)  ? $options['strict'] : FALSE;
+
+  // Make sure the template array is one of the supported types
+  // DEPRECATED: A stringified version of the array (causes security issues)
+  if (preg_match('/^array/', $template_array)) {
+    if ($strict) {
+      $errors['template_array'] = t('Invalid template array. Please provide
+        a JSON formatted array');
+      return FALSE;
+    }
+    else {
+      $warnings['template_array'] = t('Please note that import of
+        bulk loader templates as PHP arrays as a stringified array is deprecated
+        and will be removed in future versions of Tripal. Export and import
+        format will be JSON.');
+    }
+  }
+  // DEPRECATED: A serialized PHP array
+  elseif (preg_match('/^a:/', $template_array)) {
+    if ($strict) {
+      $errors['template_array'] = t('Invalid template array. Please provide
+        a JSON formatted array');
+      return FALSE;
+    }
+    else {
+      $warnings['template_array'] = t('Please note that import of
+        bulk loader templates as PHP serialized arrays is deprecated and will
+        be removed in future versions of Tripal. Export and import format will
+        be JSON.');
+    }
+  }
+  // JSON FORMAT
+  elseif (json_decode($template_array)) {
+    // This is correct!
+  }
+  else {
+    $errors['template_array'] = t('The template array must be in
+      JSON format (although PHP strigified arrays and PHP serialized
+      arrays are temporarily supported for backwards compatibility).');
+    return FALSE;
+  }
+
+  // Make sure the template name is unique
+  $name_exists = db_select('tripal_bulk_loader_template', 'tblt')
+    ->fields('tblt', array('template_id'))
+    ->condition('name', $template_name)
+    ->execute()
+    ->fetchField();
+  if ($name_exists) {
+    $errors['template_name'] = t('The template name already exists. Please
+      choose another name.');
+    return FALSE;
+  }
+  return TRUE;
+}
+
+/**
+ * Inserts a bulk loader template record.
+ *
+ * This function validates the options passed prior to insertion of the record,
+ *
+ * @param $options
+ *   An array of key/value pairs containing the following keys:
+ *     'template_name':   The name of the template.
+ *     'template_array':  The JSON array representing the template.
+ *   Optional:
+ *     'strict':          If set then only JSON formatted templates are allowed.
+ * @param $errors
+ *   An empty array where validation error messages will be set. The keys
+ *   of the array will be name of the field from the options array and the
+ *   value is the error message.
+ * @param $warnings
+ *   An empty array where validation warning messagges will be set. The
+ *   warnings should not stop an insert or an update but should be provided
+ *   to the user as information by a drupal_set_message() if appropriate. The
+ *   keys of the array will be name of the field from the options array and the
+ *   value is the error message.
+ * @return
+ *   TRUE for success and FALSE for failure.
+ */
+function tripal_insert_bulk_loader_template($options, &$errors, &$warnings) {
+
+  $success = tripal_validate_bulk_loader_template('insert', $options, $errors, $warnings);
+  if (!$success) {
+    foreach ($errors as $field => $message) {
+      tripal_report_error('tripal_bulkldr', TRIPAL_ERROR, $message);
+    }
+    return FALSE;
+  }
+
+  // Insert the bulk loader template.
+  $template_array = trim($options['template_array']);
+  $template_name = trim($options['template_name']);
+
+  // Previous version of Tripal would export the template as a PHP array.
+  // This has security implications and is deprecated. This support should
+  // be reomved in future versions of Tripal, but to support transfers of
+  // templates between v1.1 and v2.x sites we support it.
+  if (preg_match('/^array/', $template_array)) {
+    $tarray = array();
+    eval("\$tarray = $template_array;");
+    $template_array = serialize($tarray);
+  }
+  // For a brief period, the bulk loader templates were exported as a PHP
+  // serialized array. We have moved to exporting in JSON as JSON is more
+  // user friendly. But we must support the serialized PHP array for
+  // backwards compatibility of v2.0-rc1 sites and v2.x sites.
+  elseif (preg_match('/^a:/', $template_array)) {
+    // do nothing it's in PHP serialized format
+  }
+  // The typical format is JSON
+  elseif (preg_match('/^\{/', $template_array)) {
+    $template_array = serialize(json_decode($template_array, TRUE));
+  }
+
+  $record = array(
+    'name' => $template_name,
+    'template_array' => $template_array,
+    'created' => time(),
+    'changed' => time()
+  );
+  if (!drupal_write_record('tripal_bulk_loader_template', $record)) {
+    return FALSE;
+  }
+  return TRUE;
+}
+
 /**
  * Meant to be called from a form_validate function to ensure a newly added bulk loader record
  * name is unique and not empty.

+ 46 - 16
tripal_bulk_loader/includes/tripal_bulk_loader.admin.templates.inc

@@ -692,7 +692,7 @@ function tripal_bulk_loader_import_template_form($form, &$form_state) {
   $breadcrumb[] = l('Templates', 'admin/tripal/loaders/bulk/templates');
   drupal_set_breadcrumb($breadcrumb);
 
-  $form['new_template_name'] = array(
+  $form['template_name'] = array(
     '#type' => 'textfield',
     '#title' => 'Template Name',
     '#weight' => 1,
@@ -716,6 +716,35 @@ function tripal_bulk_loader_import_template_form($form, &$form_state) {
   return $form;
 }
 
+/**
+ * Validates the import template form
+ */
+function tripal_bulk_loader_import_template_form_validate($form, &$form_state) {
+
+  $options = array(
+    'template_name' => trim($form_state['values']['template_name']),
+    'template_array' => trim($form_state['values']['template_array'])
+  );
+  $errors = array();
+  $warnings = array();
+
+  // Use the API function to Validate the submission.
+  tripal_validate_bulk_loader_template('insert', $options, $errors, $warnings);
+
+  // Now set form errors if any errors were detected.
+  if (count($errors) > 0) {
+    foreach($errors as $field => $message) {
+      form_set_error($field, $message);
+    }
+  }
+  // Add any warnings if any were detected
+  if (count($warnings) > 0) {
+    foreach($warnings as $field => $message) {
+      drupal_set_message($message, 'warning');
+    }
+  }
+}
+
 /**
  * Import Template Form Submit
  *
@@ -727,21 +756,22 @@ function tripal_bulk_loader_import_template_form($form, &$form_state) {
  * @ingroup tripal_bulk_loader
  */
 function tripal_bulk_loader_import_template_form_submit($form, &$form_state) {
-
-  $record = array(
-    'name' => $form_state['values']['new_template_name'],
-    'template_array' => $form_state['values']['template_array'],
-    'created' => time(),
-    'changed' => time()
+  $options = array(
+    'template_name' => trim($form_state['values']['template_name']),
+    'template_array' => trim($form_state['values']['template_array'])
   );
-  $result = drupal_write_record('tripal_bulk_loader_template', $record);
-  if ($result) {
-    drupal_set_message(t('Successfully imported Tripal Bulk Loader Template.'));
-  }
-
-  $form_state['rebuild'] = FALSE;
-  $form_state['redirect'] = 'admin/tripal/loaders/bulk/templates';
+  $errors = array();
+  $warnings = array();
 
+  // Use the API function to insert a bulk loader.
+  if (tripal_insert_bulk_loader_template($options, $errors, $warnings)) {
+    drupal_set_message(t('Successfully imported Tripal bulk loader template.'));
+    $form_state['rebuild'] = FALSE;
+    $form_state['redirect'] = 'admin/tripal/loaders/bulk/templates';
+  }
+  else {
+    drupal_set_message(t('Unable to import Tripal bulk loader template.'), 'error');
+  }
 }
 
 /**
@@ -792,8 +822,8 @@ function tripal_bulk_loader_export_template_form($form, &$form_state) {
   $form['template_array'] = array(
     '#type' => 'textarea',
     '#title' => 'Export',
-    '#default_value' => $form_state['storage']['template_array'],
-    '#description' => t('Use this serialized array for import.'),
+    '#default_value' => json_encode(unserialize($form_state['storage']['template_array'])),
+    '#description' => t('Use this JSON array for import of this bulk loader template into another Tripal site.'),
     '#rows' => 30,
     '#weight' => 5,
 

+ 1 - 1
tripal_core/api/tripal_core.chado_query.api.inc

@@ -1499,7 +1499,7 @@ function chado_query($sql, $args = array()) {
     // the featureloc table has some indexes that use function that call other functions
     // and those calls do not reference a schema, therefore, any tables with featureloc
     // must automaticaly have the chado schema set as active to find
-    if (preg_match('/chado.featureloc/i', $sql)) {
+    if (preg_match('/chado.featureloc/i', $sql) or preg_match('/chado.feature/i', $sql)) {
       $previous_db = chado_set_active('chado') ;
       $results = db_query($sql, $args);
       chado_set_active($previous_db);

+ 19 - 13
tripal_core/includes/tripal_core.extensions.inc

@@ -319,6 +319,8 @@ function tripal_core_extension_form_add_extensions(&$form, $form_state, $extensi
         continue;
       }
 
+      $extension['title'] = trim($extension['title']);
+
       // If this is an extension module then there will be a home page for it
       $home_page = '';
       if (array_key_exists('home_page', $extension[$namespace])) {
@@ -347,7 +349,7 @@ function tripal_core_extension_form_add_extensions(&$form, $form_state, $extensi
             ->execute()
             ->fetchField();
           if ($blk_id) {
-            $is_installed = '<li>A bulk loader tempalte with this name is already installed.</li>';
+            $is_installed = '<li>A bulk loader template with this name is already installed.</li>';
           }
           break;
         case 'Materialized View':
@@ -524,20 +526,24 @@ function tripal_core_extensions_form_submit($form, &$form_state) {
     $type = $extension['category'];
     switch ($type) {
       case 'Bulk Loader Template':
-        // TODO: we need an API function for adding a bulk loader
-        $id = db_insert('tripal_bulk_loader_template')
-          ->fields(array(
-            'name' => $extension['title'],
-            'template_array' => $extension[$namespace]['bulkldr_export'],
-            'created' => time(),
-            'changed' => time()
-          ))
-          ->execute();
-        if (!$id) {
-          drupal_set_message("Cannot import this bulk loader. Please view the 'Recent log messages' report for more details.",  'error');
+        $options = array(
+          'template_name' => $extension['title'],
+          'template_array' => $extension[$namespace]['bulkldr_export'],
+          'strict' => TRUE,
+        );
+        $errors = array();
+        $warnings = array();
+        $success = tripal_insert_bulk_loader_template($options, $errors, $warnings);
+        if ($success) {
+          drupal_set_message("Bulk loader succesfully added.");
         }
         else {
-          drupal_set_message("Bulk loader succesfully added.");
+          drupal_set_message("Error importing this bulk loader.",  'error');
+          if (count($errors) > 0) {
+            foreach($errors as $field => $message) {
+              drupal_set_message($message, 'error');
+            }
+          }
         }
         break;
       case 'Materialized View':

+ 10 - 3
tripal_core/includes/tripal_core.jobs.inc

@@ -252,10 +252,17 @@ function tripal_jobs_view($job_id) {
 
   // We do not know what the arguments are for and we want to provide a
   // meaningful description to the end-user. So we use a callback function
-  // deinfed in the module that created the job to describe in an array
+  // defined in the module that created the job to describe in an array
   // the arguments provided.  If the callback fails then just use the
-  // arguments as they are
-  $args = preg_split("/::/", $job->arguments);
+  // arguments as they are.  Historically, job arguments were separated with
+  // two colon. We now store them as a serialized array. So, we need to handle
+  // both cases.
+  if (preg_match("/::/", $job->arguments)) {
+    $args = preg_split("/::/", $job->arguments);
+  }
+  else {
+    $args = unserialize($job->arguments);
+  }
   $arg_hook = $job->modulename . "_job_describe_args";
   if (is_callable($arg_hook)) {
     $new_args = call_user_func_array($arg_hook, array($job->callback, $args));

+ 0 - 53
tripal_feature/api/tripal_feature.schema.api.inc

@@ -1,53 +0,0 @@
-<?php
-/**
- * @file
- * Further describe some of the feature tables to the tripal schema
- */
-
-/**
- * Implements hook_chado_schema_v1_2_tripal_gff_temp()
- * Purpose: To describe the structure of 'tripal_gff_temp' to tripal
- * @see chado_insert_record()
- * @see chado_update_record()
- * @see chado_select_record()
- *
- * @return
- *    An array describing the 'tripal_gff_temp' table
- *
- * @ingroup tripal_chado_v1.2_schema_api
- *
- */
-function tripal_feature_chado_schema_v1_2_tripal_gff_temp() {
-  $schema = array(
-    'table' => 'tripal_gff_temp',
-    'fields' => array(
-      'feature_id' => array(
-        'type' => 'int',
-        'not null' => TRUE,
-      ),
-      'organism_id' => array(
-        'type' => 'int',
-        'not null' => TRUE,
-      ),
-      'uniquename' => array(
-        'type' => 'text',
-        'not null' => TRUE,
-      ),
-      'type_name' => array(
-        'type' => 'varchar',
-        'length' => '1024',
-        'not null' => TRUE,
-      ),
-    ),
-    'indexes' => array(
-      'tripal_gff_temp_idx0' => array('feature_id'),
-      'tripal_gff_temp_idx0' => array('organism_id'),
-      'tripal_gff_temp_idx1' => array('uniquename'),
-    ),
-    'unique keys' => array(
-      'tripal_gff_temp_uq0' => array('feature_id'),
-      'tripal_gff_temp_uq1' => array('uniquename', 'organism_id', 'type_name'),
-    ),
-  );
-  return $schema;
-}

+ 67 - 109
tripal_feature/includes/tripal_feature.delete.inc

@@ -10,14 +10,6 @@
  * @ingroup tripal_feature
  */
 function tripal_feature_delete_form() {
-  // get the list of organisms
-  $sql = "SELECT * FROM {organism} ORDER BY genus, species";
-  $org_rset = chado_query($sql);
-  $organisms = array();
-  $organisms[''] = '';
-  while ($organism = $org_rset->fetchObject()) {
-    $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
-  }
   $form['desc'] = array(
     '#markup' => t("Use one or more of the following fields to identify sets of features to be deleted."),
   );
@@ -35,12 +27,15 @@ function tripal_feature_delete_form() {
     '#description' => t('Select this checbox if the names listed in the feature
       names box above are the unique name of the feature rather than the human readable names.'),
   );
+  $cv = tripal_get_cv(array('name' => 'sequence'));
   $form['seq_type']= array(
-    '#type' => 'textfield',
-    '#title' => t('Sequence Type'),
-    '#description' => t('Please enter the Sequence Ontology term that describes the features to be deleted. Use in conjunction with an organism or anaylysis.'),
+   '#title'       => t('Feature Type'),
+   '#type'        => 'textfield',
+   '#description' => t("Choose the feature type."),
+   '#autocomplete_path' => "admin/tripal/chado/tripal_cv/cvterm/auto_name/$cv->cv_id",
   );
 
+  $organisms = tripal_get_organism_select_options(FALSE);
   $form['organism_id'] = array(
    '#title'       => t('Organism'),
    '#type'        => 'select',
@@ -48,23 +43,13 @@ function tripal_feature_delete_form() {
    '#options'     => $organisms,
   );
 
-
-  // get the list of analyses
-  $sql = "SELECT * FROM {analysis} ORDER BY name";
-  $org_rset = chado_query($sql);
-  $analyses = array();
-  $analyses[''] = '';
-  while ($analysis = $org_rset->fetchObject()) {
-    $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
-  }
-  //  TODO: ADD THIS BACK IN LATER
-  //
-  //   $form['analysis']['analysis_id'] = array (
-  //     '#title'       => t('Analysis'),
-  //     '#type'        => t('select'),
-  //     '#description' => t("Choose the analysis for which associated features will be deleted."),
-  //     '#options'     => $analyses,
-  //   );
+  $analyses = tripal_get_analysis_select_options(FALSE);
+  $form['analysis_id'] = array (
+    '#title'       => t('Analysis'),
+    '#type'        => t('select'),
+    '#description' => t("Choose the analysis for which associated features will be deleted."),
+    '#options'     => $analyses,
+  );
 
   $form['button'] = array(
     '#type' => 'submit',
@@ -81,30 +66,13 @@ function tripal_feature_delete_form() {
 function tripal_feature_delete_form_validate($form, &$form_state) {
   $organism_id   = $form_state['values']['organism_id'];
   $seq_type      = trim($form_state['values']['seq_type']);
-  //$analysis_id   = $form_state['values']['analysis_id'];
+  $analysis_id   = $form_state['values']['analysis_id'];
   $is_unique     = $form_state['values']['is_unique'];
   $feature_names = $form_state['values']['feature_names'];
 
-  if (!$organism_id and !$seq_type and !$feature_names) { // !$anaysis_id and
+  if (!$analysis_id and !$organism_id and !$seq_type and !$feature_names) {
     form_set_error('feature_names', t("Please select at least one option"));
   }
-
-  // check to make sure the types exists
-  if ($seq_type) {
-    $cvtermsql = "
-      SELECT CVT.cvterm_id
-      FROM {cvterm} CVT
-        INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
-        LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
-      WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
-    ";
-    $cvterm = chado_query($cvtermsql,
-      array(':cvname' => 'sequence', ':name' => $seq_type, ':synonym' => $seq_type))->fetchObject();
-    if (!$cvterm) {
-      form_set_error('seq_type', t("The Sequence Ontology (SO) term selected for the " .
-      "sequence type is not available in the database. Please check spelling or select another."));
-    }
-  }
 }
 
 /**
@@ -117,13 +85,11 @@ function tripal_feature_delete_form_submit($form, &$form_state) {
 
   $organism_id   = $form_state['values']['organism_id'];
   $seq_type      = trim($form_state['values']['seq_type']);
-  //$analysis_id   = $form_state['values']['analysis_id'];
+  $analysis_id   = $form_state['values']['analysis_id'];
   $is_unique     = $form_state['values']['is_unique'];
   $feature_names = $form_state['values']['feature_names'];
-  $analysis_id = NULL;
 
   $args = array($organism_id, $analysis_id, $seq_type, $is_unique, $feature_names);
-
   tripal_add_job("Delete features", 'tripal_feature',
     'tripal_feature_delete_features', $args, $user->uid);
 }
@@ -153,21 +119,14 @@ function tripal_feature_delete_features($organism_id, $analysis_id, $seq_type,
   global $user;
   $match = array();
 
-  // Deleting of features will cause a cascade delete on the
-  // featureloc table which in turn will wind up calling create_point
-  // function which is not prefix with the schema, and an error occurs.
-  // Therefore, we set the active database to chado to get around that
-  // problem.
-  $previous_db = chado_set_active('chado');
-  
-  // begin the transaction
+  // Begin the transaction.
   $transaction = db_transaction();
   print "\nNOTE: Deleting features is performed using a database transaction. \n" .
       "If the load fails or is terminated prematurely then the entire set of \n" .
       "deletions is rolled back and will not be found in the database\n\n";
   try {
 
-    // if feature names have been provided then handle that separately
+    // If feature names have been provided then handle those
     if ($feature_names) {
       $names = preg_split('/\s+/', $feature_names);
       if (sizeof($names) == 1) {
@@ -181,33 +140,59 @@ function tripal_feature_delete_features($organism_id, $analysis_id, $seq_type,
       }
       $num_deletes = chado_select_record('feature', array('count(*) as cnt'), $match);
       print "Deleting " . $num_deletes[0]->cnt . " features\n";
-      
+
       chado_delete_record('feature', $match);
+      return;
+    }
+
+    // Now handle the combintation of all other inputs.
+    $args = array();
+    $sql = "";
+    $join = '';
+    $where = '';
+    if ($analysis_id) {
+      $join .= 'INNER JOIN {analysisfeature} AF on F.feature_id = AF.feature_id ';
+      $join .= 'INNER JOIN {analysis} A on A.analysis_id = AF.analysis_id ';
+      $where .= 'AND A.analysis_id = :analysis_id ';
+      $args[':analysis_id'] = $analysis_id;
     }
-  
-    // if the user has provided an analysis_id then handle that separately
-    elseif ($analysis_id) {
-      tripal_feature_delete_by_analysis();
+    if ($organism_id) {
+      $where .= 'AND F.organism_id = :organism_id ';
+      $args[':organism_id'] = $organism_id;
     }
-    else {
-  
-      if ($organism_id) {
-        $match['organism_id'] = $organism_id;
-      }
-      if ($seq_type) {
-        $match['type_id'] = array(
-          'name' => $seq_type,
-          'cv_id' => array(
-            'name' => 'sequence'
-          ),
-        );
-      }
-      $num_deletes = chado_select_record('feature', array('count(*) as cnt'), $match);
-      print "Deleting " . $num_deletes[0]->cnt . " features\n";
-      chado_delete_record('feature', $match);
+    if ($seq_type) {
+      $join .= 'INNER JOIN {cvterm} CVT ON CVT.cvterm_id = F.type_id';
+      $where .= 'AND CVT.name = :type_name';
+      $args[':type_name'] = $seq_type;
     }
-  
-    print "Removing orphaned feature pages\n";
+
+    // Do not perform a delete if we have no additions to the where clause
+    // otherwise all features will be deleted and this is probably not what
+    // is wanted.
+    if (!$where) {
+      throw new Exception('Cannot delete features as no filters are available');
+    }
+    // First, count the number of records to be deleted
+    $sql = "
+      SELECT count(F.feature_id)
+      FROM {feature} F
+        $join
+      WHERE 1=1 $where
+    ";
+    $num_deleted = chado_query($sql, $args)->fetchField();
+    // Second, delete the records.
+    $sql = "
+      DELETE FROM {feature} WHERE feature_id IN (
+        SELECT F.feature_id
+        FROM {feature} F
+          $join
+        WHERE 1=1 $where
+      )
+    ";
+    chado_query($sql, $args);
+    print "Deletiong completed successfully. Deleted $num_deleted feature(s).\n";
+
+    print "Now removing orphaned feature pages\n";
     chado_cleanup_orphaned_nodes('feature');
   }
   catch (Exception $e) {
@@ -217,33 +202,6 @@ function tripal_feature_delete_features($organism_id, $analysis_id, $seq_type,
     watchdog_exception('tripal_feature', $e);
     return 0;
   }
-  chado_set_active($previous_db);
   print "\nDone\n";
 }
 
-/**
- * Function to delete features based on an analysis passed in. This has not yet been
- * implemented in the form
- *
- * @todo: Implement this functionality and then add back in the form field
- *
- * @param $organism_id
- *   (Optional) The organism_id of the features to delete
- * @param $analysis_id
- *   (Optional) The analysis_id of the features to delete
- * @param $seq_type
- *   (Optional) The cvterm.name of the feature types to delete
- * @param $is_unique
- *   (Optional) A Boolean stating whether the names are unique (ie: feature.uniquename)
- *   or not (ie: feature.name)
- * @param $feature_names
- *   (Optional) A space separated list of the names of features to delete
- * @param $job
- *   The tripal_job id
- *
- * @ingroup tripal_feature
- */
-function tripal_feature_delete_by_analysis($organism_id, $analysis_id, $seq_type,
-  $is_unique, $feature_names, $job = NULL) {
-
-}

+ 250 - 126
tripal_feature/includes/tripal_feature.gff_loader.inc

@@ -125,20 +125,26 @@ function tripal_feature_gff3_load_form() {
                          database will not be altered.'),
     '#default_value' => 1,
   );
-  $form['import_options']['refresh']= array(
-    '#type' => 'checkbox',
-    '#title' => t('Import all and replace'),
-    '#required' => FALSE,
-    '#description' => t('Existing features will be updated and feature properties not
-                         present in the GFF file will be removed.'),
-  );
-  $form['import_options']['remove']= array(
-    '#type' => 'checkbox',
-    '#title' => t('Delete features'),
-    '#required' => FALSE,
-    '#description' => t('Features present in the GFF file that exist in the database
-                         will be removed rather than imported'),
-  );
+// SPF: there are bugs in refreshing and removing features.  The bugs arise
+//      if a feature in the GFF does not have a uniquename. GenSAS will auto
+//      generate this uniquename and it will not be the same as a previous
+//      load because it uses the date.  This causes orphaned CDS/exons, UTRs
+//      to be left behind during a delete or refresh.  So, the short term
+//      fix is to remove these options.
+//   $form['import_options']['refresh']= array(
+//     '#type' => 'checkbox',
+//     '#title' => t('Import all and replace'),
+//     '#required' => FALSE,
+//     '#description' => t('Existing features will be updated and feature properties not
+//                          present in the GFF file will be removed.'),
+//   );
+//   $form['import_options']['remove']= array(
+//     '#type' => 'checkbox',
+//     '#title' => t('Delete features'),
+//     '#required' => FALSE,
+//     '#description' => t('Features present in the GFF file that exist in the database
+//                          will be removed rather than imported'),
+//   );
   $form['import_options']['create_organism']= array(
     '#type' => 'checkbox',
     '#title' => t('Create organism'),
@@ -218,8 +224,8 @@ function tripal_feature_gff3_load_form_validate($form, &$form_state) {
   $create_organism = $form_state['values']['create_organism'];
   $add_only = $form_state['values']['add_only'];
   $update   = $form_state['values']['update'];
-  $refresh  = $form_state['values']['refresh'];
-  $remove   = $form_state['values']['remove'];
+  $refresh  = 0; //$form_state['values']['refresh'];
+  $remove   = 0; //$form_state['values']['remove'];
   $use_transaction   = $form_state['values']['use_transaction'];
   $line_number   = trim($form_state['values']['line_number']);
   $landmark_type   = trim($form_state['values']['landmark_type']);
@@ -264,8 +270,8 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
   $organism_id = $form_state['values']['organism_id'];
   $add_only = $form_state['values']['add_only'];
   $update   = $form_state['values']['update'];
-  $refresh  = $form_state['values']['refresh'];
-  $remove   = $form_state['values']['remove'];
+  $refresh  = 0; //$form_state['values']['refresh'];
+  $remove   = 0; //$form_state['values']['remove'];
   $analysis_id = $form_state['values']['analysis_id'];
   $use_transaction   = $form_state['values']['use_transaction'];
   $target_organism_id = $form_state['values']['target_organism_id'];
@@ -381,10 +387,19 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
   $job = NULL) {
 
   $ret = array();
+  $date = getdate();
 
-  // empty the temp table
+  // An array that stores CVterms that have been looked up so we don't have
+  // to do the database query every time.
+  $cvterm_lookup = array();
+
+  // empty the temp tables
   $sql = "DELETE FROM {tripal_gff_temp}";
   chado_query($sql);
+  $sql = "DELETE FROM {tripal_gffcds_temp}";
+  chado_query($sql);
+  $sql = "DELETE FROM {tripal_gffprotein_temp}";
+  chado_query($sql);
 
   // begin the transaction
   $transaction = null;
@@ -429,7 +444,6 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
         "Cannot find the 'sequence' ontology", array());
       return '';
     }
-
     // get the organism for which this GFF3 file belongs
     $sql = "SELECT * FROM {organism} WHERE organism_id = :organism_id";
     $organism = chado_query($sql, array(':organism_id' => $organism_id))->fetchObject();
@@ -454,6 +468,23 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
        (lower(CVT.name) = lower(:name) or lower(CVTS.synonym) = lower(:synonym))
      ";
 
+    // If a landmark type was provided then pre-retrieve that.
+    if ($landmark_type) {
+      $query = array(
+        ':cv_id' => $cv->cv_id,
+        ':name' => $landmark_type,
+        ':synonym' => $landmark_type
+      );
+      $result = chado_query($sel_cvterm_sql, $query);
+      $landmark_cvterm = $result->fetchObject();
+      if (!$landmark_cvterm) {
+        tripal_report_error('tripal_feature', TRIPAL_ERROR,
+          'cannot find landmark feature type \'%landmark_type\'.',
+          array('%landmark_type' => $landmark_type));
+        return '';
+      }
+    }
+
     // iterate through each line of the GFF file
     print "Parsing Line $line_num (0.00%). Memory: " . number_format(memory_get_usage()) . " bytes\r";
     while ($line = fgets($fh)) {
@@ -491,20 +522,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
         $rstart = $region_matches[2];
         $rend = $region_matches[3];
         if ($landmark_type) {
-          $query = array(
-            ':cv_id' => $cv->cv_id,
-            ':name' => $landmark_type,
-            ':synonym' => $landmark_type
-          );
-          $result = chado_query($sel_cvterm_sql, $query);
-          $cvterm = $result->fetchObject();
-          if (!$cvterm) {
-            tripal_report_error('tripal_feature', TRIPAL_ERROR,
-             'cannot find feature type \'%landmark_type\' on line %line_num of the GFF file',
-              array('%landmark_type' => $landmark_type, '%line_num' => $line_num));
-            return '';
-          }
-          tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $rid,
+          tripal_feature_load_gff3_feature($organism, $analysis_id, $landmark_cvterm, $rid,
             $rid, '', 'f', 'f', 1, 0);
         }
         continue;
@@ -561,13 +579,18 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
       if (strcmp($phase, '.') == 0) {
         $phase = '';
       }
-      $result = chado_query($sel_cvterm_sql, array(':cv_id' => $cv->cv_id, ':name' => $type, ':synonym' => $type));
-
-      $cvterm = $result->fetchObject();
-      if (!$cvterm) {
-        tripal_report_error('tripal_feature', TRIPAL_ERROR, 'cannot find feature term \'%type\' on line %line_num of the GFF file',
-          array('%type' => $type, '%line_num' => $line_num));
-        return '';
+      if (array_key_exists($type, $cvterm_lookup)) {
+        $cvterm = $cvterm_lookup[$type];
+      }
+      else {
+        $result = chado_query($sel_cvterm_sql, array(':cv_id' => $cv->cv_id, ':name' => $type, ':synonym' => $type));
+        $cvterm = $result->fetchObject();
+        $cvterm_lookup[$type] = $cvterm;
+        if (!$cvterm) {
+          tripal_report_error('tripal_feature', TRIPAL_ERROR, 'cannot find feature term \'%type\' on line %line_num of the GFF file',
+            array('%type' => $type, '%line_num' => $line_num));
+          return '';
+        }
       }
 
       // break apart each of the attributes
@@ -650,7 +673,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
               }
             }
             else {
-              // we found the organism in the database so use it
+              // We found the organism in the database so use it.
               $feature_organism = $org[0];
             }
           }
@@ -661,7 +684,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
             $skip_feature = 1;
           }
         }
-        // get the list of non-reserved attributes
+        // Get the list of non-reserved attributes.
         elseif (strcmp($tag_name, 'Alias') != 0        and strcmp($tag_name, 'Parent') != 0 and
                 strcmp($tag_name, 'Target') != 0       and strcmp($tag_name, 'Gap') != 0 and
                 strcmp($tag_name, 'Derives_from') != 0 and strcmp($tag_name, 'Note') != 0 and
@@ -673,49 +696,46 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
           }
         }
       }
-      // if neither name nor uniquename are provided then generate one
+      // If neither name nor uniquename are provided then generate one.
       if (!$attr_uniquename and !$attr_name) {
-        // check if an alternate ID field is suggested, if so, then use
-        // that for the name
+        // Check if an alternate ID field is suggested, if so, then use
+        // that for the name.
         if (array_key_exists($alt_id_attr, $tags)) {
           $attr_uniquename = $tags[$alt_id_attr][0];
           $attr_name = $attr_uniquename;
         }
-        // if the row has a parent then generate a uniquename using the parent name
+        // If the row has a parent then generate a uniquename using the parent name
         // add the date to the name in the event there are more than one child with
         // the same parent.
         elseif (array_key_exists('Parent', $tags)) {
-          $date = getdate();
           $attr_uniquename = $tags['Parent'][0] . "-$type-$landmark-" . $date[0] . ":" . ($fmin + 1) . ".." . $fmax;
           $attr_name = $attr_uniquename;
         }
-        // generate a unique name based on the date, type and location
-        // and set the name to simply be the type
+        // Generate a unique name based on the date, type and location
+        // and set the name to simply be the type.
         else {
-          $date = getdate();
           $attr_uniquename = $date[0] . "-$type-$landmark:" . ($fmin + 1) . ".." . $fmax;
           $attr_name = $type;
         }
       }
 
-      // if a name is not specified then use the unique name as the name
+      // If a name is not specified then use the unique name as the name
       if (strcmp($attr_name, '') == 0) {
         $attr_name = $attr_uniquename;
       }
 
-      // if an ID attribute is not specified then we must generate a
+      // If an ID attribute is not specified then we must generate a
       // unique ID. Do this by combining the attribute name with the date
       // and line number.
       if (!$attr_uniquename) {
-        $date = getdate();
         $attr_uniquename = $attr_name . '-' . $date[0] . '-' . $line_num;
       }
 
-      // make sure the landmark sequence exists in the database.  If the user
-      // has not specified a landmark type (and it's not requiredin the GFF foramt)
-      // then We don't know the type of the landmark so we'll hope that it's unique across
-      // all types for the orgnaism. Only do this test if the landmark and the feature are
-      // different.
+      // Make sure the landmark sequence exists in the database.  If the user
+      // has not specified a landmark type (and it's not required in the GFF
+      // format) then we don't know the type of the landmark so we'll hope
+      // that it's unique across all types for the orgnaism. Only do this
+      // test if the landmark and the feature are different.
       if (!$remove and !(strcmp($landmark, $attr_uniquename) == 0 or strcmp($landmark, $attr_name) == 0)) {
         $select = array(
           'organism_id' => $organism->organism_id,
@@ -760,16 +780,17 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
           return '';
         }
       }
-
-      // if the option is to remove or refresh then we want to remove
+/*
+      // If the option is to remove or refresh then we want to remove
       // the feature from the database.
       if ($remove or $refresh) {
+        // Next remove the feature itself.
         $sql = "DELETE FROM {feature}
                 WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
         $match = array(
-           'organism_id' => $feature_organism->organism_id,
-           'uniquename'  => $attr_uniquename,
-           'type_id'     => $cvterm->cvterm_id
+          'organism_id' => $feature_organism->organism_id,
+          'uniquename'  => $attr_uniquename,
+          'type_id'     => $cvterm->cvterm_id
         );
         $result = chado_delete_record('feature', $match);
         if (!$result) {
@@ -779,19 +800,19 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
         $feature = 0;
         unset($result);
       }
-
-      // add or update the feature and all properties
+ */
+      // Add or update the feature and all properties.
       if ($update or $refresh or $add_only) {
 
-        // add/update the feature
+        // Add/update the feature.
         $feature = tripal_feature_load_gff3_feature($feature_organism, $analysis_id, $cvterm,
           $attr_uniquename, $attr_name, $residues, $attr_is_analysis,
           $attr_is_obsolete, $add_only, $score);
 
         if ($feature) {
 
-          // add a record for this feature to the tripal_gff_temp table for
-          // later lookup
+          // Add a record for this feature to the tripal_gff_temp table for
+          // later lookup.
           $values = array(
             'feature_id' => $feature->feature_id,
             'organism_id' => $feature->organism_id,
@@ -830,7 +851,8 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
           }
           // add parent relationships
           if (array_key_exists('Parent', $tags)) {
-            tripal_feature_load_gff3_parents($feature, $cvterm, $tags['Parent'], $feature_organism->organism_id, $fmin);
+            tripal_feature_load_gff3_parents($feature, $cvterm, $tags['Parent'],
+              $feature_organism->organism_id, $strand, $phase, $fmin, $fmax);
           }
 
           // add target relationships
@@ -851,7 +873,8 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
           }
           // add the Derives_from relationship (e.g. polycistronic genes).
           if (array_key_exists('Derives_from', $tags)) {
-            tripal_feature_load_gff3_derives_from($feature, $tags['Derives_from'][0], $feature_organism);
+            tripal_feature_load_gff3_derives_from($feature, $cvterm, $tags['Derives_from'][0],
+              $feature_organism, $fmin, $fmax);
           }
           // add in the GFF3_source dbxref so that GBrowse can find the feature using the source column
           $source_ref = array('GFF_source:' . $source);
@@ -869,7 +892,76 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
       }
     }
 
+    // Do some last bit of processing.
     if (!$remove) {
+
+      // First, add any protein sequences if needed.
+      $sql = "SELECT feature_id FROM {tripal_gffcds_temp} LIMIT 1 OFFSET 1";
+      $has_cds = chado_query($sql)->fetchField();
+      if ($has_cds) {
+        print "\nAdding protein sequences if CDS exist and no proteins in GFF...\n";
+        $sql = "
+          SELECT F.feature_id, F.name, F.uniquename, TGCT.strand,
+            CVT.cvterm_id, CVT.name as feature_type,
+            min(TGCT.fmin) as fmin, max(TGCT.fmax) as fmax,
+            TGPT.feature_id as protein_id, TGPT.fmin as protein_fmin,
+            TGPT.fmax as protein_fmax
+          FROM {tripal_gffcds_temp} TGCT
+            INNER JOIN {feature} F on F.feature_id = TGCT.parent_id
+            INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
+            LEFT JOIN {tripal_gffprotein_temp} TGPT on TGPT.parent_id = F.feature_id
+          GROUP BY F.feature_id, F.name, F.uniquename, CVT.cvterm_id, CVT.name,
+            TGPT.feature_id, TGPT.fmin, TGPT.fmax, TGCT.strand
+        ";
+        $results = chado_query($sql);
+        $protein_cvterm = tripal_get_cvterm(array(
+          'name' => 'polypeptide',
+          'cv_id' => array(
+            'name' => 'sequence'
+          )
+        ));
+        while ($result = $results->fetchObject()) {
+          // If a protein exists with this same parent then don't add a new
+          // protein.
+          if (!$result->protein_id) {
+            // Get details about this protein
+            $uname = $result->uniquename . '-protein';
+            $name =  $result->name;
+            $values = array(
+              'parent_id' => $result->feature_id,
+              'fmin' => $result->fmin
+            );
+            $min_phase = chado_select_record('tripal_gffcds_temp', array('phase'), $values);
+            $values = array(
+              'parent_id' => $result->feature_id,
+              'fmax' => $result->fmax
+            );
+            $max_phase = chado_select_record('tripal_gffcds_temp', array('phase'), $values);
+
+            $pfmin = $result->fmin;
+            $pfmax = $result->fmax;
+            if ($result->strand == '-1') {
+              $pfmax -= $max_phase[0]->phase;
+            }
+            else {
+              $pfmin += $min_phase[0]->phase;
+            }
+            // Add the new protein record.
+            $feature = tripal_feature_load_gff3_feature($organism, $analysis_id,
+              $protein_cvterm, $uname, $name, '', 'f', 'f', 1, 0);
+            // Add the derives_from relationship.
+            $cvterm = tripal_get_cvterm(array('cvterm_id' => $result->cvterm_id));
+            tripal_feature_load_gff3_derives_from($feature, $cvterm,
+              $result->uniquename, $organism, $pfmin, $pfmax);
+            // Add the featureloc record. Set the start of the protein to
+            // be the start of the coding sequence minus the phase.
+            tripal_feature_load_gff3_featureloc($feature, $organism, $landmark,
+              $pfmin, $pfmax, $strand, '', 'f', 'f', '', 0);
+          }
+        }
+      }
+
+
       print "\nSetting ranks of children...\n";
 
       // get features in a relationship that are also children of an alignment
@@ -977,37 +1069,42 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
  *
  * @ingroup gff3_loader
  */
-function tripal_feature_load_gff3_derives_from($feature, $subject, $organism) {
+function tripal_feature_load_gff3_derives_from($feature, $cvterm, $object,
+  $organism, $fmin, $fmax) {
 
-  // get the subject. If the subject is not in the tripal_gff_temp table
-  // then look for the subject in the feature table using the unique name.
-  // if it is not unique then we can provide an error
+  $type = $cvterm->name;
+
+  // First look for the object feature in the temp table to get it's type.
   $values = array(
     'organism_id' => $organism->organism_id,
-    'uniquename' => $subject,
+    'uniquename' => $object,
   );
   $result = chado_select_record('tripal_gff_temp', array('type_name'), $values);
-  $type_id = array();
+  $type_id = NULL;
   if (count($result) > 0) {
-    $type_id = array(
+    $otype = tripal_get_cvterm(array(
       'name' => $result[0]->type_name,
       'cv_id' => array(
         'name' => 'sequence'
-      ),
-    );
+      )
+    ));
+    if ($otype) {
+      $type_id = $otype->cvterm_id;
+    }
   }
 
-  // if we don't have a subject type then look for the feature in the feature table
-  if (empty($type_id)) {
+  // If the object wasn't in the temp table then look for it in the
+  // feature table and get it's type.
+  if (!$type_id) {
     $result = chado_select_record('feature', array('type_id'), $values);
     if (count($result) > 1) {
-      watchdog("tripal_feature", "Cannot find subject type for feature,'%subject' , in 'derives_from' relationship. Multiple matching features exist with this uniquename.",
-        array('%subject' => $subject), WATCHDOG_WARNING);
+      watchdog("tripal_feature", "Cannot find feature type for, '%subject' , in 'derives_from' relationship. Multiple matching features exist with this uniquename.",
+        array('%subject' => $object), WATCHDOG_WARNING);
       return '';
     }
     else if (count($result) == 0) {
-      watchdog("tripal_feature", "Cannot find subject type for feature,'%subject' , in 'derives_from' relationship.",
-        array('%subject' => $subject), WATCHDOG_WARNING);
+      watchdog("tripal_feature", "Cannot find feature type for, '%subject' , in 'derives_from' relationship.",
+        array('%subject' => $object), WATCHDOG_WARNING);
       return '';
     }
     else {
@@ -1015,28 +1112,39 @@ function tripal_feature_load_gff3_derives_from($feature, $subject, $organism) {
     }
   }
 
-  // get the subject feature
+  // Get the object feature.
   $match = array(
     'organism_id' => $organism->organism_id,
-    'uniquename' => $subject,
-    'type_id' => array(
-      'name' => $subject_type,
-      'cv_id' => array(
-        'name' => 'sequence'
-      ),
-    ),
+    'uniquename' => $object,
+    'type_id' => $type_id,
   );
-  $sfeature = chado_select_record('feature', array('feature_id'), $match);
-  if (count($sfeature)==0) {
+  $ofeature = chado_select_record('feature', array('feature_id'), $match);
+  if (count($ofeature) == 0) {
     tripal_report_error('tripal_feature', TRIPAL_ERROR, "Could not add 'Derives_from' relationship " .
       "for %uniquename and %subject.  Subject feature, '%subject', " .
       "cannot be found", array('%uniquename' => $feature->uniquename, '%subject' => $subject));
     return;
   }
 
-   // now check to see if the relationship already exists
+  // If this feature is a protein then add it to the tripal_gffprotein_temp.
+  if ($type == 'protein' or $type == 'polypeptide') {
+    $values = array(
+      'feature_id' => $feature->feature_id,
+      'parent_id' => $ofeature[0]->feature_id,
+      'fmin' => $fmin,
+      'fmax' => $fmax
+    );
+    $result = chado_insert_record('tripal_gffprotein_temp', $values);
+    if (!$result) {
+      tripal_report_error('tripal_feature', TRIPAL_ERROR, "Cound not save record in temporary protein table, Cannot continue.", array());
+      exit;
+    }
+  }
+
+   // Now check to see if the relationship already exists. If it does
+   // then just return.
   $values = array(
-    'object_id' => $sfeature[0]->feature_id,
+    'object_id' => $ofeature[0]->feature_id,
     'subject_id' => $feature->feature_id,
     'type_id' => array(
        'cv_id' => array(
@@ -1070,13 +1178,14 @@ function tripal_feature_load_gff3_derives_from($feature, $subject, $organism) {
  *
  * @ingroup gff3_loader
  */
-function tripal_feature_load_gff3_parents($feature, $cvterm, $parents, $organism_id, $fmin) {
+function tripal_feature_load_gff3_parents($feature, $cvterm, $parents,
+  $organism_id, $strand, $phase, $fmin, $fmax) {
 
   $uname = $feature->uniquename;
   $type = $cvterm->name;
   $rel_type = 'part_of';
 
-  // prepare these SQL statements that will be used repeatedly.
+  // Prepare these SQL statements that will be used repeatedly.
   $cvterm_sql = "
     SELECT CVT.cvterm_id
     FROM {cvterm} CVT
@@ -1085,9 +1194,9 @@ function tripal_feature_load_gff3_parents($feature, $cvterm, $parents, $organism
     WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
   ";
 
-  // iterate through the parents in the list
+  // Iterate through the parents in the list.
   foreach ($parents as $parent) {
-    // get the parent cvterm
+    // Get the parent cvterm.
     $values = array(
       'organism_id' => $organism_id,
       'uniquename' => $parent,
@@ -1136,6 +1245,24 @@ function tripal_feature_load_gff3_parents($feature, $cvterm, $parents, $organism
             array());
         }
       }
+
+      // If this feature is a CDS and now that we know the parent we can
+      // add it to the tripal_gffcds_temp table for later lookup.
+      if ($type == 'CDS') {
+        $values = array(
+          'feature_id' => $feature->feature_id,
+          'parent_id' => $parent_feature->feature_id,
+          'fmin' => $fmin,
+          'fmax' => $fmax,
+          'strand' => $strand,
+          'phase' => $phase,
+        );
+        $result = chado_insert_record('tripal_gffcds_temp', $values);
+        if (!$result) {
+          tripal_report_error('tripal_feature', TRIPAL_ERROR, "Cound not save record in temporary CDS table, Cannot continue.", array());
+          exit;
+        }
+      }
     }
     else {
       tripal_report_error("tripal_feature", TRIPAL_WARNING, "Cannot establish relationship '$uname' ($type) $rel_type '$parent' ($parent_type): Cannot find the parent",
@@ -1482,12 +1609,12 @@ function tripal_feature_load_gff3_alias($feature, $aliases) {
 function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uniquename,
   $name, $residues, $is_analysis = 'f', $is_obsolete = 'f', $add_only, $score) {
 
-  // check to see if the feature already exists
+  // Check to see if the feature already exists.
   $feature = NULL;
   $fselect = array(
-     'organism_id' => $organism->organism_id,
-     'uniquename' => $uniquename,
-     'type_id' => $cvterm->cvterm_id
+    'organism_id' => $organism->organism_id,
+    'uniquename' => $uniquename,
+    'type_id' => $cvterm->cvterm_id
   );
   $columns = array('feature_id', 'name', 'uniquename', 'seqlen', 'organism_id', 'type_id');
   $result = chado_select_record('feature', $columns, $fselect);
@@ -1508,21 +1635,21 @@ function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uni
     $is_analysis = 'TRUE';
   }
 
-  // insert the feature if it does not exist otherwise perform an update
+  // Insert the feature if it does not exist otherwise perform an update.
   if (!$feature) {
     $values = array(
-       'organism_id' => $organism->organism_id,
-       'name' => $name,
-       'uniquename' => $uniquename,
-//       'residues' => $residues,
-//       'seqlen' => drupal_strlen($residues),
-       'md5checksum' => md5($residues),
-       'type_id' => $cvterm->cvterm_id,
-       'is_analysis' => $is_analysis,
-       'is_obsolete' => $is_obsolete,
+      'organism_id' => $organism->organism_id,
+      'name' => $name,
+      'uniquename' => $uniquename,
+//      'residues' => $residues,
+//      'seqlen' => drupal_strlen($residues),
+      'md5checksum' => md5($residues),
+      'type_id' => $cvterm->cvterm_id,
+      'is_analysis' => $is_analysis,
+      'is_obsolete' => $is_obsolete,
     );
-    $result = chado_insert_record('feature', $values);
-    if (!$result) {
+    $feature = (object) chado_insert_record('feature', $values);
+    if (!$feature) {
       tripal_report_error("tripal_feature", TRIPAL_WARNING, "Failed to insert feature '$uniquename' ($cvterm->name)", array());
       return 0;
     }
@@ -1548,17 +1675,13 @@ function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uni
     }
   }
   else {
-    // the feature exists and we don't want to update it so return
+    // The feature exists and we don't want to update it so return
     // a value of 0.  This will stop all downstream property additions
-    return 0;
+    return $feature;
   }
 
-  // get the newly added feature
-  $columns = array('feature_id', 'name', 'uniquename', 'seqlen', 'organism_id', 'type_id');
-  $result = chado_select_record('feature', $columns, $fselect);
-  $feature = $result[0];
-
-  // add the analysisfeature entry to the analysisfeature table if it doesn't already exist
+  // Add the analysisfeature entry to the analysisfeature table if
+  // it doesn't already exist.
   $af_values = array(
     'analysis_id' => $analysis_id,
     'feature_id' => $feature->feature_id
@@ -1589,6 +1712,7 @@ function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uni
       }
     }
   }
+
   return $feature;
 }
 

+ 125 - 33
tripal_feature/tripal_feature.install

@@ -48,19 +48,21 @@ function tripal_feature_requirements($phase) {
  * @ingroup tripal_feature
  */
 function tripal_feature_install() {
-  // create the module's data directory
+  // Create the module's data directory.
   tripal_create_files_dir('tripal_feature');
 
-  // add the materialized view
+  // Add the materialized view.
   tripal_feature_add_organism_count_mview();
 
-  // create the temp table we will use for loading GFF files
-  tripal_cv_create_tripal_gff_temp();
+  // Add the custom tables.
+  tripal_feature_add_tripal_gff_temp_table();
+  tripal_feature_add_tripal_gffcds_temp_table();
+  tripal_feature_add_tripal_gffprotein_temp_table();
 
-  // add the vocabularies used by the feature module:
+  // Add the vocabularies used by the feature module.
   tripal_feature_add_cvs();
 
-  // set the default vocabularies
+  // Set the default vocabularies.
   tripal_set_default_cv('feature', 'type_id', 'sequence');
   tripal_set_default_cv('featureprop', 'type_id', 'feature_property');
   tripal_set_default_cv('feature_relationship', 'type_id', 'feature_relationship');
@@ -75,36 +77,117 @@ function tripal_feature_uninstall() {
 
 }
 
+function tripal_feature_add_tripal_gff_temp_table() {
+  $schema = array(
+    'table' => 'tripal_gff_temp',
+    'fields' => array(
+      'feature_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'organism_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'uniquename' => array(
+        'type' => 'text',
+        'not null' => TRUE,
+      ),
+      'type_name' => array(
+        'type' => 'varchar',
+        'length' => '1024',
+        'not null' => TRUE,
+      ),
+    ),
+    'indexes' => array(
+      'tripal_gff_temp_idx0' => array('feature_id'),
+      'tripal_gff_temp_idx0' => array('organism_id'),
+      'tripal_gff_temp_idx1' => array('uniquename'),
+    ),
+    'unique keys' => array(
+      'tripal_gff_temp_uq0' => array('feature_id'),
+      'tripal_gff_temp_uq1' => array('uniquename', 'organism_id', 'type_name'),
+    ),
+  );
+  chado_create_custom_table('tripal_gff_temp', $schema, TRUE);
+}
+
 /**
- * Create a temporary table used for loading gff3 files
  *
- * @ingroup tripal_feature
  */
-function tripal_cv_create_tripal_gff_temp() {
-  // the tripal_obo_temp table is used for temporary housing of records when loading OBO files
-  // we create it here using plain SQL because we want it to be in the chado schema but we
-  // do not want to use the Tripal Custom Table API because we don't want it to appear in the
-  // list of custom tables.  It needs to be available for the Tripal Chado API so we create it
-  // here and then define it in the tripal_cv/api/tripal_cv.schema.api.inc
-  if (!db_table_exists('chado.tripal_gff_temp')) {
-    $sql = "
-      CREATE TABLE {tripal_gff_temp} (
-        feature_id integer NOT NULL,
-        organism_id integer NOT NULL,
-        uniquename text NOT NULL,
-        type_name character varying(1024) NOT NULL,
-        CONSTRAINT tripal_gff_temp_uq0 UNIQUE (feature_id),
-        CONSTRAINT tripal_gff_temp_uq1 UNIQUE (uniquename, organism_id, type_name)
-      );
-    ";
-    chado_query($sql);
-    $sql = "CREATE INDEX tripal_gff_temp_idx0 ON {tripal_gff_temp} USING btree (feature_id)";
-    chado_query($sql);
-    $sql = "CREATE INDEX tripal_gff_temp_idx1 ON {tripal_gff_temp} USING btree (organism_id)";
-    chado_query($sql);
-    $sql = "CREATE INDEX tripal_gff_temp_idx2 ON {tripal_gff_temp} USING btree (uniquename)";
-    chado_query($sql);
-  }
+function tripal_feature_add_tripal_gffcds_temp_table() {
+  $schema = array(
+    'table' => 'tripal_gffcds_temp',
+    'fields' => array(
+      'feature_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'parent_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'phase' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'strand' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'fmin' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'fmax' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+    ),
+    'indexes' => array(
+      'tripal_gff_temp_idx0' => array('feature_id'),
+      'tripal_gff_temp_idx0' => array('parent_id'),
+    ),
+    'unique keys' => array(
+      'tripal_gff_temp_uq0' => array('feature_id'),
+    ),
+  );
+  chado_create_custom_table('tripal_gffcds_temp', $schema, TRUE);
+}
+
+/**
+ *
+ */
+function tripal_feature_add_tripal_gffprotein_temp_table() {
+  $schema = array(
+    'table' => 'tripal_gffprotein_temp',
+    'fields' => array(
+      'feature_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'parent_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'fmin' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'fmax' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+    ),
+    'indexes' => array(
+      'tripal_gff_temp_idx0' => array('feature_id'),
+      'tripal_gff_temp_idx0' => array('parent_id'),
+    ),
+    'unique keys' => array(
+      'tripal_gff_temp_uq0' => array('feature_id'),
+    ),
+  );
+  chado_create_custom_table('tripal_gffprotein_temp', $schema, TRUE);
 }
 
 /**
@@ -412,4 +495,13 @@ function tripal_feature_update_7201() {
     $error = $e->getMessage();
     throw new DrupalUpdateException('Failed to complete update' . $error);
   }
+}
+
+/**
+ * Adds the temporary tables used for loading GFF files.
+ */
+function tripal_feature_update_7202() {
+  tripal_feature_add_tripal_gff_temp_table();
+  tripal_feature_add_tripal_gffcds_temp_table();
+  tripal_feature_add_tripal_gffprotein_temp_table();
 }

+ 4 - 6
tripal_feature/tripal_feature.module

@@ -13,7 +13,6 @@
  */
 
 require_once 'api/tripal_feature.api.inc';
-require_once 'api/tripal_feature.schema.api.inc';
 require_once 'api/tripal_feature.DEPRECATED.inc';
 
 require_once 'theme/tripal_feature.theme.inc';
@@ -170,7 +169,7 @@ function tripal_feature_menu() {
     'access arguments' => array('administer tripal feature'),
     'type' => MENU_LOCAL_TASK,
     'file' =>  'includes/tripal_core.toc.inc',
-    'file path' => drupal_get_path('module', 'tripal_core'), 
+    'file path' => drupal_get_path('module', 'tripal_core'),
     'weight' => 3
   );
   $items['admin/tripal/chado/tripal_feature/configuration'] = array(
@@ -401,9 +400,8 @@ function tripal_feature_job_describe_args($callback, $args) {
     }
 
     $new_args['Sequence Type'] = $args[2];
-    $new_args['Is Unique Name'] = $args[3];
+    $new_args['Is Unique Name'] = $args[3] ? 'Yes' : 'No';
     $new_args['Features Names'] = $args[4];
-
   }
   elseif ($callback == 'tripal_feature_load_gff3') {
 
@@ -492,8 +490,8 @@ function tripal_feature_match_features_page($id) {
     GROUP BY F.name, F.uniquename, F.feature_id, O.genus, O.species,
       O.organism_id, CVT.cvterm_id, CVT.name, CF.nid
   ";
-  
-  $args = array(':uname' => $id, ':fname' => $id, ':sname' => $id); 
+
+  $args = array(':uname' => $id, ':fname' => $id, ':sname' => $id);
   $results = chado_query($sql, $args);
 
   $num_matches = 0;

+ 0 - 1
tripal_organism/api/tripal_organism.api.inc

@@ -153,7 +153,6 @@ function tripal_get_organism_select_options($syncd_only = TRUE) {
 
     // iterate through the organisms and build an array of those that are synced
     foreach ($orgs as $org) {
-      $args = array(':organism_id' => $org->organism_id);
       $org_list[$org->organism_id] = $org->genus . ' ' . $org->species;
     }
   }