Forráskód Böngészése

Improved admin delete feature interface, added API for analysis select options, updated GFF to add proteins if not in GFF, fixed a few bugs

Stephen Ficklin 9 éve
szülő
commit
deb9d0059f

+ 43 - 1
tripal_analysis/api/tripal_analysis.api.inc

@@ -108,7 +108,7 @@ function tripal_get_analysis($identifier, $options) {
     $property = $identifiers['property'];
     unset($identifiers['property']);
     $analysis = chado_get_record_with_property(
-      array('table' => 'analysis', 'base_records' => $identifiers), 
+      array('table' => 'analysis', 'base_records' => $identifiers),
       array('type_name' => $property)
     );
   }
@@ -154,4 +154,46 @@ function tripal_get_analysis($identifier, $options) {
   else {
     return $analysis;
   }
+}
+/**
+ * Returns a list of analyses that are currently synced with Drupal to use in select lists
+ *
+ * @param $syncd_only
+ *   Whether or not to return all chado analyses or just those sync'd with drupal. Defaults
+ *   to TRUE (only sync'd analyses)
+ * @return
+ *   An array of analyses sync'd with Drupal where each value is the analysis scientific
+ *   name and the keys are analysis_id's
+ *
+ * @ingroup tripal_analysis_api
+ */
+function tripal_get_analysis_select_options($syncd_only = TRUE) {
+  $analysis_list = array();
+  $analysis_list[] = 'Select an analysis';
+
+  if ($syncd_only) {
+    $sql = "
+      SELECT *
+      FROM public.chado_analysis CA
+        INNER JOIN {analysis} A ON A.analysis_id = CO.analysis_id
+      ORDER BY A.name
+    ";
+    $orgs = chado_query($sql);
+
+    // iterate through the analyses and build an array of those that are synced
+    foreach ($analyses as $analysis) {
+      $analysis_list[$analysis->analysis_id] = $analysis->name;
+    }
+  }
+  else {
+    // use this SQL statement for getting the analyses
+    $csql =  "SELECT * FROM {analysis} ORDER BY name";
+    $analyses = chado_query($csql);
+
+    // iterate through the analyses and build an array of those that are synced
+    foreach ($analyses as $analysis) {
+      $analysis_list[$analysis->analysis_id] = $analysis->name;
+    }
+  }
+  return $analysis_list;
 }

+ 1 - 1
tripal_core/api/tripal_core.chado_query.api.inc

@@ -1499,7 +1499,7 @@ function chado_query($sql, $args = array()) {
     // the featureloc table has some indexes that use function that call other functions
     // and those calls do not reference a schema, therefore, any tables with featureloc
     // must automaticaly have the chado schema set as active to find
-    if (preg_match('/chado.featureloc/i', $sql)) {
+    if (preg_match('/chado.featureloc/i', $sql) or preg_match('/chado.feature/i', $sql)) {
       $previous_db = chado_set_active('chado') ;
       $results = db_query($sql, $args);
       chado_set_active($previous_db);

+ 10 - 3
tripal_core/includes/tripal_core.jobs.inc

@@ -252,10 +252,17 @@ function tripal_jobs_view($job_id) {
 
   // We do not know what the arguments are for and we want to provide a
   // meaningful description to the end-user. So we use a callback function
-  // deinfed in the module that created the job to describe in an array
+  // defined in the module that created the job to describe in an array
   // the arguments provided.  If the callback fails then just use the
-  // arguments as they are
-  $args = preg_split("/::/", $job->arguments);
+  // arguments as they are.  Historically, job arguments were separated with
+  // two colon. We now store them as a serialized array. So, we need to handle
+  // both cases.
+  if (preg_match("/::/", $job->arguments)) {
+    $args = preg_split("/::/", $job->arguments);
+  }
+  else {
+    $args = unserialize($job->arguments);
+  }
   $arg_hook = $job->modulename . "_job_describe_args";
   if (is_callable($arg_hook)) {
     $new_args = call_user_func_array($arg_hook, array($job->callback, $args));

+ 0 - 53
tripal_feature/api/tripal_feature.schema.api.inc

@@ -1,53 +0,0 @@
-<?php
-/**
- * @file
- * Further describe some of the feature tables to the tripal schema
- */
-
-/**
- * Implements hook_chado_schema_v1_2_tripal_gff_temp()
- * Purpose: To describe the structure of 'tripal_gff_temp' to tripal
- * @see chado_insert_record()
- * @see chado_update_record()
- * @see chado_select_record()
- *
- * @return
- *    An array describing the 'tripal_gff_temp' table
- *
- * @ingroup tripal_chado_v1.2_schema_api
- *
- */
-function tripal_feature_chado_schema_v1_2_tripal_gff_temp() {
-  $schema = array(
-    'table' => 'tripal_gff_temp',
-    'fields' => array(
-      'feature_id' => array(
-        'type' => 'int',
-        'not null' => TRUE,
-      ),
-      'organism_id' => array(
-        'type' => 'int',
-        'not null' => TRUE,
-      ),
-      'uniquename' => array(
-        'type' => 'text',
-        'not null' => TRUE,
-      ),
-      'type_name' => array(
-        'type' => 'varchar',
-        'length' => '1024',
-        'not null' => TRUE,
-      ),
-    ),
-    'indexes' => array(
-      'tripal_gff_temp_idx0' => array('feature_id'),
-      'tripal_gff_temp_idx0' => array('organism_id'),
-      'tripal_gff_temp_idx1' => array('uniquename'),
-    ),
-    'unique keys' => array(
-      'tripal_gff_temp_uq0' => array('feature_id'),
-      'tripal_gff_temp_uq1' => array('uniquename', 'organism_id', 'type_name'),
-    ),
-  );
-  return $schema;
-}

+ 67 - 109
tripal_feature/includes/tripal_feature.delete.inc

@@ -10,14 +10,6 @@
  * @ingroup tripal_feature
  */
 function tripal_feature_delete_form() {
-  // get the list of organisms
-  $sql = "SELECT * FROM {organism} ORDER BY genus, species";
-  $org_rset = chado_query($sql);
-  $organisms = array();
-  $organisms[''] = '';
-  while ($organism = $org_rset->fetchObject()) {
-    $organisms[$organism->organism_id] = "$organism->genus $organism->species ($organism->common_name)";
-  }
   $form['desc'] = array(
     '#markup' => t("Use one or more of the following fields to identify sets of features to be deleted."),
   );
@@ -35,12 +27,15 @@ function tripal_feature_delete_form() {
     '#description' => t('Select this checbox if the names listed in the feature
       names box above are the unique name of the feature rather than the human readable names.'),
   );
+  $cv = tripal_get_cv(array('name' => 'sequence'));
   $form['seq_type']= array(
-    '#type' => 'textfield',
-    '#title' => t('Sequence Type'),
-    '#description' => t('Please enter the Sequence Ontology term that describes the features to be deleted. Use in conjunction with an organism or anaylysis.'),
+   '#title'       => t('Feature Type'),
+   '#type'        => 'textfield',
+   '#description' => t("Choose the feature type."),
+   '#autocomplete_path' => "admin/tripal/chado/tripal_cv/cvterm/auto_name/$cv->cv_id",
   );
 
+  $organisms = tripal_get_organism_select_options(FALSE);
   $form['organism_id'] = array(
    '#title'       => t('Organism'),
    '#type'        => 'select',
@@ -48,23 +43,13 @@ function tripal_feature_delete_form() {
    '#options'     => $organisms,
   );
 
-
-  // get the list of analyses
-  $sql = "SELECT * FROM {analysis} ORDER BY name";
-  $org_rset = chado_query($sql);
-  $analyses = array();
-  $analyses[''] = '';
-  while ($analysis = $org_rset->fetchObject()) {
-    $analyses[$analysis->analysis_id] = "$analysis->name ($analysis->program $analysis->programversion, $analysis->sourcename)";
-  }
-  //  TODO: ADD THIS BACK IN LATER
-  //
-  //   $form['analysis']['analysis_id'] = array (
-  //     '#title'       => t('Analysis'),
-  //     '#type'        => t('select'),
-  //     '#description' => t("Choose the analysis for which associated features will be deleted."),
-  //     '#options'     => $analyses,
-  //   );
+  $analyses = tripal_get_analysis_select_options(FALSE);
+  $form['analysis_id'] = array (
+    '#title'       => t('Analysis'),
+    '#type'        => t('select'),
+    '#description' => t("Choose the analysis for which associated features will be deleted."),
+    '#options'     => $analyses,
+  );
 
   $form['button'] = array(
     '#type' => 'submit',
@@ -81,30 +66,13 @@ function tripal_feature_delete_form() {
 function tripal_feature_delete_form_validate($form, &$form_state) {
   $organism_id   = $form_state['values']['organism_id'];
   $seq_type      = trim($form_state['values']['seq_type']);
-  //$analysis_id   = $form_state['values']['analysis_id'];
+  $analysis_id   = $form_state['values']['analysis_id'];
   $is_unique     = $form_state['values']['is_unique'];
   $feature_names = $form_state['values']['feature_names'];
 
-  if (!$organism_id and !$seq_type and !$feature_names) { // !$anaysis_id and
+  if (!$analysis_id and !$organism_id and !$seq_type and !$feature_names) {
     form_set_error('feature_names', t("Please select at least one option"));
   }
-
-  // check to make sure the types exists
-  if ($seq_type) {
-    $cvtermsql = "
-      SELECT CVT.cvterm_id
-      FROM {cvterm} CVT
-        INNER JOIN {cv} CV on CVT.cv_id = CV.cv_id
-        LEFT JOIN {cvtermsynonym} CVTS on CVTS.cvterm_id = CVT.cvterm_id
-      WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
-    ";
-    $cvterm = chado_query($cvtermsql,
-      array(':cvname' => 'sequence', ':name' => $seq_type, ':synonym' => $seq_type))->fetchObject();
-    if (!$cvterm) {
-      form_set_error('seq_type', t("The Sequence Ontology (SO) term selected for the " .
-      "sequence type is not available in the database. Please check spelling or select another."));
-    }
-  }
 }
 
 /**
@@ -117,13 +85,11 @@ function tripal_feature_delete_form_submit($form, &$form_state) {
 
   $organism_id   = $form_state['values']['organism_id'];
   $seq_type      = trim($form_state['values']['seq_type']);
-  //$analysis_id   = $form_state['values']['analysis_id'];
+  $analysis_id   = $form_state['values']['analysis_id'];
   $is_unique     = $form_state['values']['is_unique'];
   $feature_names = $form_state['values']['feature_names'];
-  $analysis_id = NULL;
 
   $args = array($organism_id, $analysis_id, $seq_type, $is_unique, $feature_names);
-
   tripal_add_job("Delete features", 'tripal_feature',
     'tripal_feature_delete_features', $args, $user->uid);
 }
@@ -153,21 +119,14 @@ function tripal_feature_delete_features($organism_id, $analysis_id, $seq_type,
   global $user;
   $match = array();
 
-  // Deleting of features will cause a cascade delete on the
-  // featureloc table which in turn will wind up calling create_point
-  // function which is not prefix with the schema, and an error occurs.
-  // Therefore, we set the active database to chado to get around that
-  // problem.
-  $previous_db = chado_set_active('chado');
-  
-  // begin the transaction
+  // Begin the transaction.
   $transaction = db_transaction();
   print "\nNOTE: Deleting features is performed using a database transaction. \n" .
       "If the load fails or is terminated prematurely then the entire set of \n" .
       "deletions is rolled back and will not be found in the database\n\n";
   try {
 
-    // if feature names have been provided then handle that separately
+    // If feature names have been provided then handle those
     if ($feature_names) {
       $names = preg_split('/\s+/', $feature_names);
       if (sizeof($names) == 1) {
@@ -181,33 +140,59 @@ function tripal_feature_delete_features($organism_id, $analysis_id, $seq_type,
       }
       $num_deletes = chado_select_record('feature', array('count(*) as cnt'), $match);
       print "Deleting " . $num_deletes[0]->cnt . " features\n";
-      
+
       chado_delete_record('feature', $match);
+      return;
+    }
+
+    // Now handle the combintation of all other inputs.
+    $args = array();
+    $sql = "";
+    $join = '';
+    $where = '';
+    if ($analysis_id) {
+      $join .= 'INNER JOIN {analysisfeature} AF on F.feature_id = AF.feature_id ';
+      $join .= 'INNER JOIN {analysis} A on A.analysis_id = AF.analysis_id ';
+      $where .= 'AND A.analysis_id = :analysis_id ';
+      $args[':analysis_id'] = $analysis_id;
     }
-  
-    // if the user has provided an analysis_id then handle that separately
-    elseif ($analysis_id) {
-      tripal_feature_delete_by_analysis();
+    if ($organism_id) {
+      $where .= 'AND F.organism_id = :organism_id ';
+      $args[':organism_id'] = $organism_id;
     }
-    else {
-  
-      if ($organism_id) {
-        $match['organism_id'] = $organism_id;
-      }
-      if ($seq_type) {
-        $match['type_id'] = array(
-          'name' => $seq_type,
-          'cv_id' => array(
-            'name' => 'sequence'
-          ),
-        );
-      }
-      $num_deletes = chado_select_record('feature', array('count(*) as cnt'), $match);
-      print "Deleting " . $num_deletes[0]->cnt . " features\n";
-      chado_delete_record('feature', $match);
+    if ($seq_type) {
+      $join .= 'INNER JOIN {cvterm} CVT ON CVT.cvterm_id = F.type_id';
+      $where .= 'AND CVT.name = :type_name';
+      $args[':type_name'] = $seq_type;
     }
-  
-    print "Removing orphaned feature pages\n";
+
+    // Do not perform a delete if we have no additions to the where clause
+    // otherwise all features will be deleted and this is probably not what
+    // is wanted.
+    if (!$where) {
+      throw new Exception('Cannot delete features as no filters are available');
+    }
+    // First, count the number of records to be deleted
+    $sql = "
+      SELECT count(F.feature_id)
+      FROM {feature} F
+        $join
+      WHERE 1=1 $where
+    ";
+    $num_deleted = chado_query($sql, $args)->fetchField();
+    // Second, delete the records.
+    $sql = "
+      DELETE FROM {feature} WHERE feature_id IN (
+        SELECT F.feature_id
+        FROM {feature} F
+          $join
+        WHERE 1=1 $where
+      )
+    ";
+    chado_query($sql, $args);
+    print "Deletiong completed successfully. Deleted $num_deleted feature(s).\n";
+
+    print "Now removing orphaned feature pages\n";
     chado_cleanup_orphaned_nodes('feature');
   }
   catch (Exception $e) {
@@ -217,33 +202,6 @@ function tripal_feature_delete_features($organism_id, $analysis_id, $seq_type,
     watchdog_exception('tripal_feature', $e);
     return 0;
   }
-  chado_set_active($previous_db);
   print "\nDone\n";
 }
 
-/**
- * Function to delete features based on an analysis passed in. This has not yet been
- * implemented in the form
- *
- * @todo: Implement this functionality and then add back in the form field
- *
- * @param $organism_id
- *   (Optional) The organism_id of the features to delete
- * @param $analysis_id
- *   (Optional) The analysis_id of the features to delete
- * @param $seq_type
- *   (Optional) The cvterm.name of the feature types to delete
- * @param $is_unique
- *   (Optional) A Boolean stating whether the names are unique (ie: feature.uniquename)
- *   or not (ie: feature.name)
- * @param $feature_names
- *   (Optional) A space separated list of the names of features to delete
- * @param $job
- *   The tripal_job id
- *
- * @ingroup tripal_feature
- */
-function tripal_feature_delete_by_analysis($organism_id, $analysis_id, $seq_type,
-  $is_unique, $feature_names, $job = NULL) {
-
-}

+ 217 - 104
tripal_feature/includes/tripal_feature.gff_loader.inc

@@ -125,20 +125,26 @@ function tripal_feature_gff3_load_form() {
                          database will not be altered.'),
     '#default_value' => 1,
   );
-  $form['import_options']['refresh']= array(
-    '#type' => 'checkbox',
-    '#title' => t('Import all and replace'),
-    '#required' => FALSE,
-    '#description' => t('Existing features will be updated and feature properties not
-                         present in the GFF file will be removed.'),
-  );
-  $form['import_options']['remove']= array(
-    '#type' => 'checkbox',
-    '#title' => t('Delete features'),
-    '#required' => FALSE,
-    '#description' => t('Features present in the GFF file that exist in the database
-                         will be removed rather than imported'),
-  );
+// SPF: there are bugs in refreshing and removing features.  The bugs arise
+//      if a feature in the GFF does not have a uniquename. GenSAS will auto
+//      generate this uniquename and it will not be the same as a previous
+//      load because it uses the date.  This causes orphaned CDS/exons, UTRs
+//      to be left behind during a delete or refresh.  So, the short term
+//      fix is to remove these options.
+//   $form['import_options']['refresh']= array(
+//     '#type' => 'checkbox',
+//     '#title' => t('Import all and replace'),
+//     '#required' => FALSE,
+//     '#description' => t('Existing features will be updated and feature properties not
+//                          present in the GFF file will be removed.'),
+//   );
+//   $form['import_options']['remove']= array(
+//     '#type' => 'checkbox',
+//     '#title' => t('Delete features'),
+//     '#required' => FALSE,
+//     '#description' => t('Features present in the GFF file that exist in the database
+//                          will be removed rather than imported'),
+//   );
   $form['import_options']['create_organism']= array(
     '#type' => 'checkbox',
     '#title' => t('Create organism'),
@@ -218,8 +224,8 @@ function tripal_feature_gff3_load_form_validate($form, &$form_state) {
   $create_organism = $form_state['values']['create_organism'];
   $add_only = $form_state['values']['add_only'];
   $update   = $form_state['values']['update'];
-  $refresh  = $form_state['values']['refresh'];
-  $remove   = $form_state['values']['remove'];
+  $refresh  = 0; //$form_state['values']['refresh'];
+  $remove   = 0; //$form_state['values']['remove'];
   $use_transaction   = $form_state['values']['use_transaction'];
   $line_number   = trim($form_state['values']['line_number']);
   $landmark_type   = trim($form_state['values']['landmark_type']);
@@ -264,8 +270,8 @@ function tripal_feature_gff3_load_form_submit($form, &$form_state) {
   $organism_id = $form_state['values']['organism_id'];
   $add_only = $form_state['values']['add_only'];
   $update   = $form_state['values']['update'];
-  $refresh  = $form_state['values']['refresh'];
-  $remove   = $form_state['values']['remove'];
+  $refresh  = 0; //$form_state['values']['refresh'];
+  $remove   = 0; //$form_state['values']['remove'];
   $analysis_id = $form_state['values']['analysis_id'];
   $use_transaction   = $form_state['values']['use_transaction'];
   $target_organism_id = $form_state['values']['target_organism_id'];
@@ -381,10 +387,16 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
   $job = NULL) {
 
   $ret = array();
+  $date = getdate();
 
-  // empty the temp table
+  // empty the temp tables
   $sql = "DELETE FROM {tripal_gff_temp}";
   chado_query($sql);
+  $sql = "DELETE FROM {tripal_gffcds_temp}";
+  chado_query($sql);
+  $sql = "DELETE FROM {tripal_gffprotein_temp}";
+  chado_query($sql);
+
 
   // begin the transaction
   $transaction = null;
@@ -650,7 +662,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
               }
             }
             else {
-              // we found the organism in the database so use it
+              // We found the organism in the database so use it.
               $feature_organism = $org[0];
             }
           }
@@ -661,7 +673,7 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
             $skip_feature = 1;
           }
         }
-        // get the list of non-reserved attributes
+        // Get the list of non-reserved attributes.
         elseif (strcmp($tag_name, 'Alias') != 0        and strcmp($tag_name, 'Parent') != 0 and
                 strcmp($tag_name, 'Target') != 0       and strcmp($tag_name, 'Gap') != 0 and
                 strcmp($tag_name, 'Derives_from') != 0 and strcmp($tag_name, 'Note') != 0 and
@@ -673,49 +685,46 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
           }
         }
       }
-      // if neither name nor uniquename are provided then generate one
+      // If neither name nor uniquename are provided then generate one.
       if (!$attr_uniquename and !$attr_name) {
-        // check if an alternate ID field is suggested, if so, then use
-        // that for the name
+        // Check if an alternate ID field is suggested, if so, then use
+        // that for the name.
         if (array_key_exists($alt_id_attr, $tags)) {
           $attr_uniquename = $tags[$alt_id_attr][0];
           $attr_name = $attr_uniquename;
         }
-        // if the row has a parent then generate a uniquename using the parent name
+        // If the row has a parent then generate a uniquename using the parent name
         // add the date to the name in the event there are more than one child with
         // the same parent.
         elseif (array_key_exists('Parent', $tags)) {
-          $date = getdate();
           $attr_uniquename = $tags['Parent'][0] . "-$type-$landmark-" . $date[0] . ":" . ($fmin + 1) . ".." . $fmax;
           $attr_name = $attr_uniquename;
         }
-        // generate a unique name based on the date, type and location
-        // and set the name to simply be the type
+        // Generate a unique name based on the date, type and location
+        // and set the name to simply be the type.
         else {
-          $date = getdate();
           $attr_uniquename = $date[0] . "-$type-$landmark:" . ($fmin + 1) . ".." . $fmax;
           $attr_name = $type;
         }
       }
 
-      // if a name is not specified then use the unique name as the name
+      // If a name is not specified then use the unique name as the name
       if (strcmp($attr_name, '') == 0) {
         $attr_name = $attr_uniquename;
       }
 
-      // if an ID attribute is not specified then we must generate a
+      // If an ID attribute is not specified then we must generate a
       // unique ID. Do this by combining the attribute name with the date
       // and line number.
       if (!$attr_uniquename) {
-        $date = getdate();
         $attr_uniquename = $attr_name . '-' . $date[0] . '-' . $line_num;
       }
 
-      // make sure the landmark sequence exists in the database.  If the user
-      // has not specified a landmark type (and it's not requiredin the GFF foramt)
-      // then We don't know the type of the landmark so we'll hope that it's unique across
-      // all types for the orgnaism. Only do this test if the landmark and the feature are
-      // different.
+      // Make sure the landmark sequence exists in the database.  If the user
+      // has not specified a landmark type (and it's not required in the GFF
+      // format) then we don't know the type of the landmark so we'll hope
+      // that it's unique across all types for the orgnaism. Only do this
+      // test if the landmark and the feature are different.
       if (!$remove and !(strcmp($landmark, $attr_uniquename) == 0 or strcmp($landmark, $attr_name) == 0)) {
         $select = array(
           'organism_id' => $organism->organism_id,
@@ -760,16 +769,17 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
           return '';
         }
       }
-
-      // if the option is to remove or refresh then we want to remove
+/*
+      // If the option is to remove or refresh then we want to remove
       // the feature from the database.
       if ($remove or $refresh) {
+        // Next remove the feature itself.
         $sql = "DELETE FROM {feature}
                 WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
         $match = array(
-           'organism_id' => $feature_organism->organism_id,
-           'uniquename'  => $attr_uniquename,
-           'type_id'     => $cvterm->cvterm_id
+          'organism_id' => $feature_organism->organism_id,
+          'uniquename'  => $attr_uniquename,
+          'type_id'     => $cvterm->cvterm_id
         );
         $result = chado_delete_record('feature', $match);
         if (!$result) {
@@ -779,19 +789,19 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
         $feature = 0;
         unset($result);
       }
-
-      // add or update the feature and all properties
+ */
+      // Add or update the feature and all properties.
       if ($update or $refresh or $add_only) {
 
-        // add/update the feature
+        // Add/update the feature.
         $feature = tripal_feature_load_gff3_feature($feature_organism, $analysis_id, $cvterm,
           $attr_uniquename, $attr_name, $residues, $attr_is_analysis,
           $attr_is_obsolete, $add_only, $score);
 
         if ($feature) {
 
-          // add a record for this feature to the tripal_gff_temp table for
-          // later lookup
+          // Add a record for this feature to the tripal_gff_temp table for
+          // later lookup.
           $values = array(
             'feature_id' => $feature->feature_id,
             'organism_id' => $feature->organism_id,
@@ -830,7 +840,8 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
           }
           // add parent relationships
           if (array_key_exists('Parent', $tags)) {
-            tripal_feature_load_gff3_parents($feature, $cvterm, $tags['Parent'], $feature_organism->organism_id, $fmin);
+            tripal_feature_load_gff3_parents($feature, $cvterm, $tags['Parent'],
+              $feature_organism->organism_id, $strand, $phase, $fmin, $fmax);
           }
 
           // add target relationships
@@ -851,7 +862,8 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
           }
           // add the Derives_from relationship (e.g. polycistronic genes).
           if (array_key_exists('Derives_from', $tags)) {
-            tripal_feature_load_gff3_derives_from($feature, $tags['Derives_from'][0], $feature_organism);
+            tripal_feature_load_gff3_derives_from($feature, $cvterm, $tags['Derives_from'][0],
+              $feature_organism, $fmin, $fmax);
           }
           // add in the GFF3_source dbxref so that GBrowse can find the feature using the source column
           $source_ref = array('GFF_source:' . $source);
@@ -869,7 +881,76 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
       }
     }
 
+    // Do some last bit of processing.
     if (!$remove) {
+
+      // First, add any protein sequences if needed.
+      $sql = "SELECT feature_id FROM {tripal_gffcds_temp} LIMIT 1 OFFSET 1";
+      $has_cds = chado_query($sql)->fetchField();
+      if ($has_cds) {
+        print "\nAdding protein sequences if CDS exist and no proteins in GFF...\n";
+        $sql = "
+          SELECT F.feature_id, F.name, F.uniquename, TGCT.strand,
+            CVT.cvterm_id, CVT.name as feature_type,
+            min(TGCT.fmin) as fmin, max(TGCT.fmax) as fmax,
+            TGPT.feature_id as protein_id, TGPT.fmin as protein_fmin,
+            TGPT.fmax as protein_fmax
+          FROM {tripal_gffcds_temp} TGCT
+            INNER JOIN {feature} F on F.feature_id = TGCT.parent_id
+            INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
+            LEFT JOIN {tripal_gffprotein_temp} TGPT on TGPT.parent_id = F.feature_id
+          GROUP BY F.feature_id, F.name, F.uniquename, CVT.cvterm_id, CVT.name,
+            TGPT.feature_id, TGPT.fmin, TGPT.fmax, TGCT.strand
+        ";
+        $results = chado_query($sql);
+        $protein_cvterm = tripal_get_cvterm(array(
+          'name' => 'polypeptide',
+          'cv_id' => array(
+            'name' => 'sequence'
+          )
+        ));
+        while ($result = $results->fetchObject()) {
+          // If a protein exists with this same parent then don't add a new
+          // protein.
+          if (!$result->protein_id) {
+            // Get details about this protein
+            $uname = $result->uniquename . '-protein';
+            $name =  $result->name;
+            $values = array(
+              'parent_id' => $result->feature_id,
+              'fmin' => $result->fmin
+            );
+            $min_phase = chado_select_record('tripal_gffcds_temp', array('phase'), $values);
+            $values = array(
+              'parent_id' => $result->feature_id,
+              'fmax' => $result->fmax
+            );
+            $max_phase = chado_select_record('tripal_gffcds_temp', array('phase'), $values);
+
+            $pfmin = $result->fmin;
+            $pfmax = $result->fmax;
+            if ($result->strand == '-1') {
+              $pfmax -= $max_phase[0]->phase;
+            }
+            else {
+              $pfmin += $min_phase[0]->phase;
+            }
+            // Add the new protein record.
+            $feature = tripal_feature_load_gff3_feature($organism, $analysis_id,
+              $protein_cvterm, $uname, $name, '', 'f', 'f', 1, 0);
+            // Add the derives_from relationship.
+            $cvterm = tripal_get_cvterm(array('cvterm_id' => $result->cvterm_id));
+            tripal_feature_load_gff3_derives_from($feature, $cvterm,
+              $result->uniquename, $organism, $pfmin, $pfmax);
+            // Add the featureloc record. Set the start of the protein to
+            // be the start of the coding sequence minus the phase.
+            tripal_feature_load_gff3_featureloc($feature, $organism, $landmark,
+              $pfmin, $pfmax, $strand, '', 'f', 'f', '', 0);
+          }
+        }
+      }
+
+
       print "\nSetting ranks of children...\n";
 
       // get features in a relationship that are also children of an alignment
@@ -977,37 +1058,42 @@ function tripal_feature_load_gff3($gff_file, $organism_id, $analysis_id,
  *
  * @ingroup gff3_loader
  */
-function tripal_feature_load_gff3_derives_from($feature, $subject, $organism) {
+function tripal_feature_load_gff3_derives_from($feature, $cvterm, $object,
+  $organism, $fmin, $fmax) {
+
+  $type = $cvterm->name;
 
-  // get the subject. If the subject is not in the tripal_gff_temp table
-  // then look for the subject in the feature table using the unique name.
-  // if it is not unique then we can provide an error
+  // First look for the object feature in the temp table to get it's type.
   $values = array(
     'organism_id' => $organism->organism_id,
-    'uniquename' => $subject,
+    'uniquename' => $object,
   );
   $result = chado_select_record('tripal_gff_temp', array('type_name'), $values);
-  $type_id = array();
+  $type_id = NULL;
   if (count($result) > 0) {
-    $type_id = array(
+    $otype = tripal_get_cvterm(array(
       'name' => $result[0]->type_name,
       'cv_id' => array(
         'name' => 'sequence'
-      ),
-    );
+      )
+    ));
+    if ($otype) {
+      $type_id = $otype->cvterm_id;
+    }
   }
 
-  // if we don't have a subject type then look for the feature in the feature table
-  if (empty($type_id)) {
+  // If the object wasn't in the temp table then look for it in the
+  // feature table and get it's type.
+  if (!$type_id) {
     $result = chado_select_record('feature', array('type_id'), $values);
     if (count($result) > 1) {
-      watchdog("tripal_feature", "Cannot find subject type for feature,'%subject' , in 'derives_from' relationship. Multiple matching features exist with this uniquename.",
-        array('%subject' => $subject), WATCHDOG_WARNING);
+      watchdog("tripal_feature", "Cannot find feature type for, '%subject' , in 'derives_from' relationship. Multiple matching features exist with this uniquename.",
+        array('%subject' => $object), WATCHDOG_WARNING);
       return '';
     }
     else if (count($result) == 0) {
-      watchdog("tripal_feature", "Cannot find subject type for feature,'%subject' , in 'derives_from' relationship.",
-        array('%subject' => $subject), WATCHDOG_WARNING);
+      watchdog("tripal_feature", "Cannot find feature type for, '%subject' , in 'derives_from' relationship.",
+        array('%subject' => $object), WATCHDOG_WARNING);
       return '';
     }
     else {
@@ -1015,28 +1101,39 @@ function tripal_feature_load_gff3_derives_from($feature, $subject, $organism) {
     }
   }
 
-  // get the subject feature
+  // Get the object feature.
   $match = array(
     'organism_id' => $organism->organism_id,
-    'uniquename' => $subject,
-    'type_id' => array(
-      'name' => $subject_type,
-      'cv_id' => array(
-        'name' => 'sequence'
-      ),
-    ),
+    'uniquename' => $object,
+    'type_id' => $type_id,
   );
-  $sfeature = chado_select_record('feature', array('feature_id'), $match);
-  if (count($sfeature)==0) {
+  $ofeature = chado_select_record('feature', array('feature_id'), $match);
+  if (count($ofeature) == 0) {
     tripal_report_error('tripal_feature', TRIPAL_ERROR, "Could not add 'Derives_from' relationship " .
       "for %uniquename and %subject.  Subject feature, '%subject', " .
       "cannot be found", array('%uniquename' => $feature->uniquename, '%subject' => $subject));
     return;
   }
 
-   // now check to see if the relationship already exists
+  // If this feature is a protein then add it to the tripal_gffprotein_temp.
+  if ($type == 'protein' or $type == 'polypeptide') {
+    $values = array(
+      'feature_id' => $feature->feature_id,
+      'parent_id' => $ofeature[0]->feature_id,
+      'fmin' => $fmin,
+      'fmax' => $fmax
+    );
+    $result = chado_insert_record('tripal_gffprotein_temp', $values);
+    if (!$result) {
+      tripal_report_error('tripal_feature', TRIPAL_ERROR, "Cound not save record in temporary protein table, Cannot continue.", array());
+      exit;
+    }
+  }
+
+   // Now check to see if the relationship already exists. If it does
+   // then just return.
   $values = array(
-    'object_id' => $sfeature[0]->feature_id,
+    'object_id' => $ofeature[0]->feature_id,
     'subject_id' => $feature->feature_id,
     'type_id' => array(
        'cv_id' => array(
@@ -1070,13 +1167,14 @@ function tripal_feature_load_gff3_derives_from($feature, $subject, $organism) {
  *
  * @ingroup gff3_loader
  */
-function tripal_feature_load_gff3_parents($feature, $cvterm, $parents, $organism_id, $fmin) {
+function tripal_feature_load_gff3_parents($feature, $cvterm, $parents,
+  $organism_id, $strand, $phase, $fmin, $fmax) {
 
   $uname = $feature->uniquename;
   $type = $cvterm->name;
   $rel_type = 'part_of';
 
-  // prepare these SQL statements that will be used repeatedly.
+  // Prepare these SQL statements that will be used repeatedly.
   $cvterm_sql = "
     SELECT CVT.cvterm_id
     FROM {cvterm} CVT
@@ -1085,9 +1183,9 @@ function tripal_feature_load_gff3_parents($feature, $cvterm, $parents, $organism
     WHERE cv.name = :cvname and (CVT.name = :name or CVTS.synonym = :synonym)
   ";
 
-  // iterate through the parents in the list
+  // Iterate through the parents in the list.
   foreach ($parents as $parent) {
-    // get the parent cvterm
+    // Get the parent cvterm.
     $values = array(
       'organism_id' => $organism_id,
       'uniquename' => $parent,
@@ -1136,6 +1234,24 @@ function tripal_feature_load_gff3_parents($feature, $cvterm, $parents, $organism
             array());
         }
       }
+
+      // If this feature is a CDS and now that we know the parent we can
+      // add it to the tripal_gffcds_temp table for later lookup.
+      if ($type == 'CDS') {
+        $values = array(
+          'feature_id' => $feature->feature_id,
+          'parent_id' => $parent_feature->feature_id,
+          'fmin' => $fmin,
+          'fmax' => $fmax,
+          'strand' => $strand,
+          'phase' => $phase,
+        );
+        $result = chado_insert_record('tripal_gffcds_temp', $values);
+        if (!$result) {
+          tripal_report_error('tripal_feature', TRIPAL_ERROR, "Cound not save record in temporary CDS table, Cannot continue.", array());
+          exit;
+        }
+      }
     }
     else {
       tripal_report_error("tripal_feature", TRIPAL_WARNING, "Cannot establish relationship '$uname' ($type) $rel_type '$parent' ($parent_type): Cannot find the parent",
@@ -1482,12 +1598,12 @@ function tripal_feature_load_gff3_alias($feature, $aliases) {
 function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uniquename,
   $name, $residues, $is_analysis = 'f', $is_obsolete = 'f', $add_only, $score) {
 
-  // check to see if the feature already exists
+  // Check to see if the feature already exists.
   $feature = NULL;
   $fselect = array(
-     'organism_id' => $organism->organism_id,
-     'uniquename' => $uniquename,
-     'type_id' => $cvterm->cvterm_id
+    'organism_id' => $organism->organism_id,
+    'uniquename' => $uniquename,
+    'type_id' => $cvterm->cvterm_id
   );
   $columns = array('feature_id', 'name', 'uniquename', 'seqlen', 'organism_id', 'type_id');
   $result = chado_select_record('feature', $columns, $fselect);
@@ -1508,21 +1624,21 @@ function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uni
     $is_analysis = 'TRUE';
   }
 
-  // insert the feature if it does not exist otherwise perform an update
+  // Insert the feature if it does not exist otherwise perform an update.
   if (!$feature) {
     $values = array(
-       'organism_id' => $organism->organism_id,
-       'name' => $name,
-       'uniquename' => $uniquename,
-//       'residues' => $residues,
-//       'seqlen' => drupal_strlen($residues),
-       'md5checksum' => md5($residues),
-       'type_id' => $cvterm->cvterm_id,
-       'is_analysis' => $is_analysis,
-       'is_obsolete' => $is_obsolete,
+      'organism_id' => $organism->organism_id,
+      'name' => $name,
+      'uniquename' => $uniquename,
+//      'residues' => $residues,
+//      'seqlen' => drupal_strlen($residues),
+      'md5checksum' => md5($residues),
+      'type_id' => $cvterm->cvterm_id,
+      'is_analysis' => $is_analysis,
+      'is_obsolete' => $is_obsolete,
     );
-    $result = chado_insert_record('feature', $values);
-    if (!$result) {
+    $feature = (object) chado_insert_record('feature', $values);
+    if (!$feature) {
       tripal_report_error("tripal_feature", TRIPAL_WARNING, "Failed to insert feature '$uniquename' ($cvterm->name)", array());
       return 0;
     }
@@ -1548,17 +1664,13 @@ function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uni
     }
   }
   else {
-    // the feature exists and we don't want to update it so return
+    // The feature exists and we don't want to update it so return
     // a value of 0.  This will stop all downstream property additions
-    return 0;
+    return $feature;
   }
 
-  // get the newly added feature
-  $columns = array('feature_id', 'name', 'uniquename', 'seqlen', 'organism_id', 'type_id');
-  $result = chado_select_record('feature', $columns, $fselect);
-  $feature = $result[0];
-
-  // add the analysisfeature entry to the analysisfeature table if it doesn't already exist
+  // Add the analysisfeature entry to the analysisfeature table if
+  // it doesn't already exist.
   $af_values = array(
     'analysis_id' => $analysis_id,
     'feature_id' => $feature->feature_id
@@ -1589,6 +1701,7 @@ function tripal_feature_load_gff3_feature($organism, $analysis_id, $cvterm, $uni
       }
     }
   }
+
   return $feature;
 }
 

+ 125 - 33
tripal_feature/tripal_feature.install

@@ -48,19 +48,21 @@ function tripal_feature_requirements($phase) {
  * @ingroup tripal_feature
  */
 function tripal_feature_install() {
-  // create the module's data directory
+  // Create the module's data directory.
   tripal_create_files_dir('tripal_feature');
 
-  // add the materialized view
+  // Add the materialized view.
   tripal_feature_add_organism_count_mview();
 
-  // create the temp table we will use for loading GFF files
-  tripal_cv_create_tripal_gff_temp();
+  // Add the custom tables.
+  tripal_feature_add_tripal_gff_temp_table();
+  tripal_feature_add_tripal_gffcds_temp_table();
+  tripal_feature_add_tripal_gffprotein_temp_table();
 
-  // add the vocabularies used by the feature module:
+  // Add the vocabularies used by the feature module.
   tripal_feature_add_cvs();
 
-  // set the default vocabularies
+  // Set the default vocabularies.
   tripal_set_default_cv('feature', 'type_id', 'sequence');
   tripal_set_default_cv('featureprop', 'type_id', 'feature_property');
   tripal_set_default_cv('feature_relationship', 'type_id', 'feature_relationship');
@@ -75,36 +77,117 @@ function tripal_feature_uninstall() {
 
 }
 
+function tripal_feature_add_tripal_gff_temp_table() {
+  $schema = array(
+    'table' => 'tripal_gff_temp',
+    'fields' => array(
+      'feature_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'organism_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'uniquename' => array(
+        'type' => 'text',
+        'not null' => TRUE,
+      ),
+      'type_name' => array(
+        'type' => 'varchar',
+        'length' => '1024',
+        'not null' => TRUE,
+      ),
+    ),
+    'indexes' => array(
+      'tripal_gff_temp_idx0' => array('feature_id'),
+      'tripal_gff_temp_idx0' => array('organism_id'),
+      'tripal_gff_temp_idx1' => array('uniquename'),
+    ),
+    'unique keys' => array(
+      'tripal_gff_temp_uq0' => array('feature_id'),
+      'tripal_gff_temp_uq1' => array('uniquename', 'organism_id', 'type_name'),
+    ),
+  );
+  chado_create_custom_table('tripal_gff_temp', $schema, TRUE);
+}
+
 /**
- * Create a temporary table used for loading gff3 files
  *
- * @ingroup tripal_feature
  */
-function tripal_cv_create_tripal_gff_temp() {
-  // the tripal_obo_temp table is used for temporary housing of records when loading OBO files
-  // we create it here using plain SQL because we want it to be in the chado schema but we
-  // do not want to use the Tripal Custom Table API because we don't want it to appear in the
-  // list of custom tables.  It needs to be available for the Tripal Chado API so we create it
-  // here and then define it in the tripal_cv/api/tripal_cv.schema.api.inc
-  if (!db_table_exists('chado.tripal_gff_temp')) {
-    $sql = "
-      CREATE TABLE {tripal_gff_temp} (
-        feature_id integer NOT NULL,
-        organism_id integer NOT NULL,
-        uniquename text NOT NULL,
-        type_name character varying(1024) NOT NULL,
-        CONSTRAINT tripal_gff_temp_uq0 UNIQUE (feature_id),
-        CONSTRAINT tripal_gff_temp_uq1 UNIQUE (uniquename, organism_id, type_name)
-      );
-    ";
-    chado_query($sql);
-    $sql = "CREATE INDEX tripal_gff_temp_idx0 ON {tripal_gff_temp} USING btree (feature_id)";
-    chado_query($sql);
-    $sql = "CREATE INDEX tripal_gff_temp_idx1 ON {tripal_gff_temp} USING btree (organism_id)";
-    chado_query($sql);
-    $sql = "CREATE INDEX tripal_gff_temp_idx2 ON {tripal_gff_temp} USING btree (uniquename)";
-    chado_query($sql);
-  }
+function tripal_feature_add_tripal_gffcds_temp_table() {
+  $schema = array(
+    'table' => 'tripal_gffcds_temp',
+    'fields' => array(
+      'feature_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'parent_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'phase' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'strand' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'fmin' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'fmax' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+    ),
+    'indexes' => array(
+      'tripal_gff_temp_idx0' => array('feature_id'),
+      'tripal_gff_temp_idx0' => array('parent_id'),
+    ),
+    'unique keys' => array(
+      'tripal_gff_temp_uq0' => array('feature_id'),
+    ),
+  );
+  chado_create_custom_table('tripal_gffcds_temp', $schema, TRUE);
+}
+
+/**
+ *
+ */
+function tripal_feature_add_tripal_gffprotein_temp_table() {
+  $schema = array(
+    'table' => 'tripal_gffprotein_temp',
+    'fields' => array(
+      'feature_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'parent_id' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'fmin' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+      'fmax' => array(
+        'type' => 'int',
+        'not null' => TRUE,
+      ),
+    ),
+    'indexes' => array(
+      'tripal_gff_temp_idx0' => array('feature_id'),
+      'tripal_gff_temp_idx0' => array('parent_id'),
+    ),
+    'unique keys' => array(
+      'tripal_gff_temp_uq0' => array('feature_id'),
+    ),
+  );
+  chado_create_custom_table('tripal_gffprotein_temp', $schema, TRUE);
 }
 
 /**
@@ -412,4 +495,13 @@ function tripal_feature_update_7201() {
     $error = $e->getMessage();
     throw new DrupalUpdateException('Failed to complete update' . $error);
   }
+}
+
+/**
+ * Adds the temporary tables used for loading GFF files.
+ */
+function tripal_feature_update_7202() {
+  tripal_feature_add_tripal_gff_temp_table();
+  tripal_feature_add_tripal_gffcds_temp_table();
+  tripal_feature_add_tripal_gffprotein_temp_table();
 }

+ 4 - 6
tripal_feature/tripal_feature.module

@@ -13,7 +13,6 @@
  */
 
 require_once 'api/tripal_feature.api.inc';
-require_once 'api/tripal_feature.schema.api.inc';
 require_once 'api/tripal_feature.DEPRECATED.inc';
 
 require_once 'theme/tripal_feature.theme.inc';
@@ -170,7 +169,7 @@ function tripal_feature_menu() {
     'access arguments' => array('administer tripal feature'),
     'type' => MENU_LOCAL_TASK,
     'file' =>  'includes/tripal_core.toc.inc',
-    'file path' => drupal_get_path('module', 'tripal_core'), 
+    'file path' => drupal_get_path('module', 'tripal_core'),
     'weight' => 3
   );
   $items['admin/tripal/chado/tripal_feature/configuration'] = array(
@@ -401,9 +400,8 @@ function tripal_feature_job_describe_args($callback, $args) {
     }
 
     $new_args['Sequence Type'] = $args[2];
-    $new_args['Is Unique Name'] = $args[3];
+    $new_args['Is Unique Name'] = $args[3] ? 'Yes' : 'No';
     $new_args['Features Names'] = $args[4];
-
   }
   elseif ($callback == 'tripal_feature_load_gff3') {
 
@@ -492,8 +490,8 @@ function tripal_feature_match_features_page($id) {
     GROUP BY F.name, F.uniquename, F.feature_id, O.genus, O.species,
       O.organism_id, CVT.cvterm_id, CVT.name, CF.nid
   ";
-  
-  $args = array(':uname' => $id, ':fname' => $id, ':sname' => $id); 
+
+  $args = array(':uname' => $id, ':fname' => $id, ':sname' => $id);
   $results = chado_query($sql, $args);
 
   $num_matches = 0;

+ 0 - 1
tripal_organism/api/tripal_organism.api.inc

@@ -153,7 +153,6 @@ function tripal_get_organism_select_options($syncd_only = TRUE) {
 
     // iterate through the organisms and build an array of those that are synced
     foreach ($orgs as $org) {
-      $args = array(':organism_id' => $org->organism_id);
       $org_list[$org->organism_id] = $org->genus . ' ' . $org->species;
     }
   }