Browse Source

Updated Chado Feature API to match fixes in 7.x-2.x

Stephen Ficklin 9 years ago
parent
commit
10b28816e9
2 changed files with 135 additions and 129 deletions
  1. 134 129
      tripal_chado/api/modules/tripal_chado.feature.api.inc
  2. 1 0
      tripal_chado/tripal_chado.info

+ 134 - 129
tripal_chado/api/modules/tripal_chado.feature.api.inc

@@ -5,7 +5,7 @@
  */
 
 /**
- * @defgroup tripal_chado_api Feature API
+ * @defgroup tripal_feature_api Feature API
  * @ingroup tripal_api
  * @{
  * Provides an application programming interface (API) for working with features
@@ -21,7 +21,7 @@
  * @return
  *   an upper-case reverse complemented sequence
  *
- * @ingroup tripal_chado_api
+ * @ingroup tripal_feature_api
  */
 function tripal_reverse_compliment_sequence($sequence) {
 
@@ -45,59 +45,67 @@ function tripal_reverse_compliment_sequence($sequence) {
 }
 
 /**
- * Retrieves the sequences for a given feature. If a feature has multiple alignments
- *  or multiple relationships then multiple sequences will be returned.
+ * Retrieves the sequences for a given feature.
+ *
+ * If a feature has multiple alignments or multiple relationships then
+ * multiple sequences will be returned.
  *
  * @param $feature
  *   An associative array describing the feature. Valid keys include:
- *    - feature_id: The feature_id of the feature for which the sequence will be retrieved
+ *    - feature_id: The feature_id of the feature for which the sequence will
+ *      be retrieved
  *    - name: The feature name. This will appear on the FASTA definition line
- *    - parent_id:  (optional) only retrieve a sequence if 'derive_from_parent' is true
- *        and the parent matches this ID.
+ *    - parent_id:  (optional) only retrieve a sequence if 'derive_from_parent'
+ *      is true and the parent matches this ID.
  *    - featureloc_id: (optional) only retrieve a sequence if 'derive_from_parent' is
- *        true and the alignment is defined with this featureloc_id
+ *      true and the alignment is defined with this featureloc_id
  * @param $options
  *   An associative array of options. Valid keys include:
- *    - width: Indicate the number of bases to use per line.  A new line will be added
- *        after the specified number of bases on each line.
- *    - is_html: Set to '1' if  the sequence is meant to be displayed on a web page.
- *        This will cause a <br> tag to separate lines of the FASTA sequence.
- *    - derive_from_parent: Set to '1' if the sequence should be obtained from the parent
- *        to which this feature is aligned.
- *    - aggregate: Set to '1' if the sequence should only contain sub features, excluding
- *        intro sub feature sequence.  For example, set this option to obtain just
- *        the coding sequence of an mRNA.
- *    - upstream: An integer specifing the number of upstream bases to include in the output
- *    - downstream: An integer specifying the number of downstream bases to include in the
- *        output.
- *    - sub_feature_types: Only include sub features (or child features) of the types
- *        provided in the array
- *    - relationship_type: If a relationship name is provided (e.g. sequence_of) then any
- *        sequences that are in relationships of this type with matched sequences are also included
- *    - relationship_part: If a relationship is provided in the preceeding argument then
- *        the rel_part must be either 'object' or 'subject' to indicate which side of the
- *        relationship the matched features belong
+ *    - width: Indicate the number of bases to use per line.  A new line will
+ *      be added after the specified number of bases on each line.
+ *    - is_html: Set to '1' if  the sequence is meant to be displayed on a web
+ *      page. This will cause a <br> tag to separate lines of the FASTA sequence.
+ *    - derive_from_parent: Set to '1' if the sequence should be obtained from
+ *      the parent to which this feature is aligned.
+ *    - aggregate: Set to '1' if the sequence should only contain sub features,
+ *      excluding intro sub feature sequence.  For example, set this option to
+ *      obtain just the coding sequence of an mRNA.
+ *    - upstream: An integer specifing the number of upstream bases to include
+ *      in the output
+ *    - downstream: An integer specifying the number of downstream bases to
+ *      include in the output.
+ *    - sub_feature_types: Only include sub features (or child features) of
+ *      the types provided in the array
+ *    - relationship_type: If a relationship name is provided (e.g. sequence_of)
+ *      then any sequences that are in relationships of this type with matched
+ *      sequences are also included
+ *    - relationship_part: If a relationship is provided in the preceeding
+ *      argument then the rel_part must be either 'object' or 'subject' to
+ *      indicate which side of the relationship the matched features belong
  *
  * @return
  *   an array of matching sequence in the following keys for each sequence:
- *      'types'         => an array of feature types that were used to derive the sequence (e.g. from an aggregated sequence)
+ *      'types'         => an array of feature types that were used to derive
+ *         the sequence (e.g. from an aggregated sequence)
  *      'upstream'      => the number of upstream bases included in the sequence
- *      'downstream'    => the number of downstream bases included in the sequence
+ *      'downstream'    => the number of downstream bases included in the
+ *        sequence
  *      'defline'       => the definintion line used to create a FASTA sequence
  *      'residues'      => the residues
- *      'featureloc_id' => the featureloc_id if the sequences is from an alignment
+ *      'featureloc_id' => the featureloc_id if the sequences is from an
+ *        alignment
  *
- * @ingroup tripal_chado_api
+ * @ingroup tripal_feature_api
  */
 function tripal_get_feature_sequences($feature, $options) {
 
-  // default values for finding the feature
+  // Default values for finding the feature.
   $feature_id         = array_key_exists('feature_id', $feature)     ? $feature['feature_id']     : 0;
   $parent_id          = array_key_exists('parent_id', $feature)      ? $feature['parent_id']      : 0;
   $featureloc_id      = array_key_exists('featureloc_id', $feature)  ? $feature['featureloc_id']  : 0;
   $feature_name       = array_key_exists('name', $feature)           ? $feature['name']           : '';
 
-  // default values for building the sequence
+  // Default values for building the sequence.
   $num_bases_per_line = array_key_exists('width', $options)              ? $options['width']              : 50;
   $derive_from_parent = array_key_exists('derive_from_parent', $options) ? $options['derive_from_parent'] : 0;
   $aggregate          = array_key_exists('aggregate', $options)          ? $options['aggregate']          : 0;
@@ -108,18 +116,25 @@ function tripal_get_feature_sequences($feature, $options) {
   $rel_part           = array_key_exists('relationship_part', $options)  ? $options['relationship_part']  : '';
   $is_html            = array_key_exists('is_html', $options)            ? $options['is_html']            : 0;
 
-  // make sure the sub_features variable is an array
+  if (!$upstream) {
+    $upstream = 0;
+  }
+  if (!$downstream) {
+    $downstream = 0;
+  }
+
+  // Make sure the sub_features variable is an array.
   if (!is_array($sub_features)) {
-    tripal_report_error('tripal_chado', TRIPAL_ERROR,
+    tripal_report_error('tripal_feature', TRIPAL_ERROR,
       "'sub_features' option must be an array for function tripal_get_feature_sequences().",
       array()
     );
     return array();
   }
 
-  // if a relationship was specified then retreive and the sequences that
+  // If a relationship was specified then retreive and the sequences that
   // have the given relationship and the recurse to extract the appropriate
-  // sequence
+  // sequence.
   if ($rel_part == "object" or $rel_part == "subject") {
     if ($rel_part == "subject") {
       $sql = '
@@ -154,7 +169,7 @@ function tripal_get_feature_sequences($feature, $options) {
     $sequences = '';
     while ($feature = $features->fetchObject()) {
 
-      // recurse and get the sequences for these in the relationship
+      // Recurse and get the sequences for these in the relationship.
       if ($rel_part == "subject") {
         $defline = "$feature_name, $relationship, $feature->uniquename $feature->feature_type ($feature->genus $feature->species)";
       }
@@ -179,7 +194,7 @@ function tripal_get_feature_sequences($feature, $options) {
     }
   }
 
-  // prepare the queries we're going to use later during the render phase
+  // Prepare the queries we're going to use later during the render phase
   // This SQL statement uses conditionals in the select clause to handle
   // cases cases where the alignment is in the reverse direction and when
   // the upstream and downstream extensions go beyond the lenght of the
@@ -251,7 +266,7 @@ function tripal_get_feature_sequences($feature, $options) {
         INNER JOIN {organism} OO  on OF.organism_id   = OO.organism_id
       WHERE SF.feature_id = :feature_id and NOT (OF.residues = \'\' or OF.residues IS NULL)) as tbl1
   ';
-  // this query is meant to get all of the sub features of any given
+  // This query is meant to get all of the sub features of any given
   // feature (arg #1) and order them as they appear on the reference
   // feature (arg #2).
   $sfsql = '
@@ -264,7 +279,7 @@ function tripal_get_feature_sequences($feature, $options) {
     WHERE FR.object_id = :feature_id and PF.feature_id = :srcfeature_id
     ORDER BY FL.fmin ASC
   ';
-  // for counting the number of children
+  // For counting the number of children.
   $fsql ='
     SELECT count(*) as num_children
     FROM {feature_relationship} FR
@@ -275,26 +290,30 @@ function tripal_get_feature_sequences($feature, $options) {
     WHERE FR.object_id = :feature_id and PF.feature_id = :srcfeature_id
   ';
 
-  // the array to be returned
+  // The array to be returned.
   $sequences = array();
 
-  // if we need to get the sequence from the parent then do so now.
+  // If we need to get the sequence from the parent then do so now.
   if ($derive_from_parent) {
 
-    // execute the query to get the sequence from the parent
+    // Execute the query to get the sequence from the parent.
     $parents = chado_query($parent_sql, array(':upstream' => $upstream, ':downstream' => $downstream, ':feature_id' => $feature_id));
     while ($parent = $parents->fetchObject()) {
 
-      // if the user specified a particular parent and this one doesn't match then skip it
+      // If the user specified a particular parent and this one doesn't
+      // match then skip it.
       if ($parent_id and $parent_id != $parent->srcfeature_id) {
         continue;
       }
-      // if the user specified a particular featureloc_id and this one doesn't match then skip it
+      // if the user specified a particular featureloc_id and this one
+      // doesn't match then skip it.
       if ($featureloc_id and $featureloc_id != $parent->featureloc_id) {
         continue;
       }
-      $seq = '';  // initialize the sequence for each parent
+      // Initialize the sequence for each parent.
+      $seq = '';
       $notes = '';
+      $types = array();
 
       // if we are to aggregate then we will ignore the feature returned
       // by the query above and rebuild it using the sub features
@@ -304,17 +323,16 @@ function tripal_get_feature_sequences($feature, $options) {
         $children = chado_query($sfsql, array(':feature_id' => $feature_id, ':srcfeature_id' => $parent->srcfeature_id));
         $num_children = chado_query($fsql, array(':feature_id' => $feature_id, ':srcfeature_id' => $parent->srcfeature_id))->fetchField();
 
-        // iterate through the sub features and concat their sequences. They
+        // Iterate through the sub features and concat their sequences. They
         // should already be in order.
-        $types = array();
         $i = 0;
         while ($child = $children->fetchObject()) {
-          // if the callee has specified that only certain sub features should be
+          // If the callee has specified that only certain sub features should be
           // included then continue if this child is not one of those allowed
-          // subfeatures
+          // subfeatures.
           if (count($sub_features) > 0 and !in_array($child->type_name, $sub_features)) {
             $i++;
-             continue;
+            continue;
           }
 
           // keep up with the types
@@ -403,9 +421,9 @@ function tripal_get_feature_sequences($feature, $options) {
         $notes .= "Excludes all bases but those of type(s): " . implode(', ', $types) . ". " ;
       }
 
-      // construct the definition line for this feature.
-      // to construct the defline we need a featureloc record, so we'll create one using
-      // the information we have
+      // Construct the definition line for this feature. To construct the
+      // defline we need a featureloc record, so we'll create one using
+      // the information we have.
       $featureloc = new stdClass;
       $featureloc->feature_id = $feature;
       $featureloc->fmin = $parent->adjfmin;
@@ -417,7 +435,7 @@ function tripal_get_feature_sequences($feature, $options) {
       $featureloc->srcfeature_id->organism_id = new stdClass;
       $featureloc->srcfeature_id->organism_id->genus = $parent->genus;
       $featureloc->srcfeature_id->organism_id->species = $parent->species;
-      // get a proper feature object
+      // Get a proper feature object.
       $f = chado_generate_var('feature', array('feature_id' => $feature_id));
       $defline = tripal_get_fasta_defline($f, $notes, $featureloc, '', $length);
 
@@ -432,12 +450,14 @@ function tripal_get_feature_sequences($feature, $options) {
       );
     }
   }
-  // if we are not getting the sequence from the parent sequence then
-  // use what comes through from the feature record
+  // If we are not getting the sequence from the parent sequence then
+  // use what comes through from the feature record.
   else {
-    $sql = "SELECT * FROM {feature} F WHERE feature_id = :feature_id";
-    $values = chado_query($sql, array(':feature_id' => $feature_id))->fetchObject();
-    $residues = $values->residues;
+
+    $f = chado_generate_var('feature', array('feature_id' => $feature_id));
+    $f = chado_expand_var($f, 'field', 'feature.residues');
+
+    $residues = $f->residues;
     $length = strlen($residues);
     if ($is_html) {
       $residues = wordwrap($residues, $num_bases_per_line, "<br>", TRUE);
@@ -447,12 +467,11 @@ function tripal_get_feature_sequences($feature, $options) {
     }
 
     // get the definintion line for this feature
-    $f = chado_generate_var('feature', array('feature_id' => $feature_id));
     $defline = tripal_get_fasta_defline($f, '', NULL, '', $length);
 
     // add to the sequence array
     $sequences[] = array(
-      'types'      => $values->type,
+      'types'      => $f->type_id->name,
       'upstream'   => 0,
       'downstream' => 0,
       'defline'    => $defline,
@@ -465,51 +484,54 @@ function tripal_get_feature_sequences($feature, $options) {
 }
 
 /**
- * Retreives or prints multiple sequences from features found using the options provided.
  *
  * @param $options
  *   An associative array of options for selecting a feature. Valid keys include:
- *    - org_commonname: The common name of the organism for which sequences should be retrieved
+ *    - org_commonname: The common name of the organism for which sequences
+ *      should be retrieved
  *    - genus: The genus of the organism for which sequences should be retrieved
- *    - species: The species of the organism for which sequences should be retrieved
- *    - analysis_name: The name of an analysis to which sequences belong. Only those that are
- *        associated with the analysis will be retrieved.
+ *    - species: The species of the organism for which sequences should be
+ *      retrieved
+ *    - analysis_name: The name of an analysis to which sequences belong. Only
+ *      those that are associated with the analysis will be retrieved.
  *    - type: The type of feature (a sequence ontology term).
  *    - feature_name: the name of the feature. Can be an array of feature names.
- *    - feature_uname: the uniquename of the feature.  Can be an array of feature unique names.
- *    - upstream: An integer specifing the number of upstream bases to include in the output
- *    - downstream: An integer specifying the number of downstream bases to include in the
- *        output.
- *    - derive_from_parent: Set to '1' if the sequence should be obtained from the parent
- *        to which this feature is aligned.
- *    - aggregate: Set to '1' if the sequence should only contain sub features, excluding
- *        intro sub feature sequence.  For example, set this option to obtain just
- *        the coding sequence of an mRNA.
- *    - sub_feature_types: Only include sub features (or child features) of the types
- *        provided in the array
- *    - relationship_type: If a relationship name is provided (e.g. sequence_of) then any
- *        sequences that are in relationships of this type with matched sequences are also included
- *    - relationship_part: If a relationship is provided in the preceeding argument then
- *        the rel_part must be either 'object' or 'subject' to indicate which side of the
- *        relationship the matched features belong
- *    - width: Indicate the number of bases to use per line.  A new line will be added
- *        after the specified number of bases on each line.
- *    - is_html: Set to '1' if  the sequence is meant to be displayed on a web page.
- *        This will cause a <br> tag to separate lines of the FASTA sequence.
- *    - print: Set to TRUE to print the sequences rather otherwise an array
- *        of sequences will be returned.
+ *    - feature_uname: the uniquename of the feature.  Can be an array of
+ *      feature unique names.
+ *    - upstream: An integer specifing the number of upstream bases to include
+ *      in the output
+ *    - downstream: An integer specifying the number of downstream bases to
+ *      include in the output.
+ *    - derive_from_parent: Set to '1' if the sequence should be obtained from
+ *      the parent to which this feature is aligned.
+ *    - aggregate: Set to '1' if the sequence should only contain sub features,
+ *      excluding intro sub feature sequence.  For example, set this option to
+ *      obtain just the coding sequence of an mRNA.
+ *    - sub_feature_types: Only include sub features (or child features) of
+ *      the types provided in the array
+ *    - relationship_type: If a relationship name is provided (e.g. sequence_of)
+ *      then any sequences that are in relationships of this type with matched
+ *      sequences are also included
+ *    - relationship_part: If a relationship is provided in the preceeding
+ *      argument then the rel_part must be either 'object' or 'subject' to
+ *      indicate which side of the relationship the matched features belong
+ *    - width: Indicate the number of bases to use per line.  A new line will
+ *      be added after the specified number of bases on each line.
+ *    - is_html: Set to '1' if  the sequence is meant to be displayed on a
+ *      web page. This will cause a <br> tag to separate lines of the FASTA
+ *      sequence.
  * @return
- *   Returns an array of sequences unless the option 'print' is used.  If 'print is used
- *   then nothing is returned but sequences are printed to STDOUT.  If returned, the
- *   sequences will be in an array with the following keys for each sequence:
- *      'types'         => an array of feature types that were used to derive the sequence (e.g. from an aggregated sequence)
- *      'upstream'      => the number of upstream bases included in the sequence
- *      'downstream'    => the number of downstream bases included in the sequence
+ *   Returns an array of sequences. The sequences will be in an array with the
+ *   following keys for each sequence:
+ *      'types'         => an array of feature types that were used to derive
+ *         the sequence (e.g. from an aggregated sequence)
+ *      'upstream'      => the number of upstream bases in the sequence
+ *      'downstream'    => the number of downstream bases in the sequence
  *      'defline'       => the definintion line used to create a FASTA sequence
  *      'residues'      => the residues
- *      'featureloc_id' => the featureloc_id if the sequences is from an alignment
+ *      'featureloc_id' => the featureloc_id if from an alignment
  *
- * @ingroup tripal_chado_api
+ * @ingroup tripal_feature_api
  */
 function tripal_get_bulk_feature_sequences($options) {
 
@@ -529,13 +551,6 @@ function tripal_get_bulk_feature_sequences($options) {
   $num_bases_per_line = array_key_exists('width', $options)              ? $options['width']              : 50;
   $upstream           = array_key_exists('upstream', $options)           ? $options['upstream']       : 0;
   $downstream         = array_key_exists('downstream', $options)         ? $options['downstream']     : 0;
-  $print              = array_key_exists('print', $options)              ? $options['print']              : FALSE;
-
-  $sub_features = explode(',', $child);
-
-  if (!$output_format) {
-    $output_format = 'fasta_txt';
-  }
 
   if (!$type and !$feature_name and !$genus) {
     print "Please provide a type, feature name or genus\n";
@@ -544,13 +559,18 @@ function tripal_get_bulk_feature_sequences($options) {
 
   // get the list of features
   $vars = array();
-  $sql  = "SELECT DISTINCT F.feature_id, F.name, F.uniquename, O.genus, O.species, CVT.name as feature_type " .
-      "FROM {feature} F " .
-      "  INNER JOIN {organism} O on O.organism_id = F.organism_id " .
-      "  INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id ";
+  $sql  = "
+    SELECT DISTINCT F.feature_id, F.name, F.uniquename,
+      O.genus, O.species, CVT.name as feature_type
+    FROM {feature} F
+      INNER JOIN {organism} O on O.organism_id = F.organism_id
+      INNER JOIN {cvterm} CVT on CVT.cvterm_id = F.type_id
+  ";
   if ($analysis_name) {
-    $sql .= "  INNER JOIN {analysisfeature} AF on AF.feature_id = F.feature_id " .
-        "  INNER JOIN {analysis} A on AF.analysis_id = A.analysis_id ";
+    $sql .= "
+      INNER JOIN {analysisfeature} AF on AF.feature_id = F.feature_id
+      INNER JOIN {analysis} A on AF.analysis_id = A.analysis_id
+    ";
   }
   $sql .= "WHERE (1=1) ";
   if ($org_commonname) {
@@ -607,30 +627,15 @@ function tripal_get_bulk_feature_sequences($options) {
   $num_seqs = 0;
   $q = chado_query($sql, $vars);
 
-
   $sequences = array();
   while ($feature = $q->fetchObject()) {
-
     // get the sequences
     $seqs = tripal_get_feature_sequences(array('feature_id' => $feature->feature_id), $options);
-
-    if ($print) {
-      foreach ($seqs as $seq) {
-        print ">" . $seq['defline'] . "\n";
-        print $seq['residues'] . "\n";
-      }
-    }
-    else {
-      $sequences = array_merge($sequences, $seqs);
-    }
+    $sequences = array_merge($sequences, $seqs);
     $num_seqs++;
   }
-  if (!$print) {
-    return $seqs;
-  }
-  elseif ($num_seqs == 0) {
-    print "No Sequences Found";
-  }
+
+  return $sequences;
 }
 
 /**

+ 1 - 0
tripal_chado/tripal_chado.info

@@ -13,3 +13,4 @@ stylesheets[all][] = theme/css/tripal_chado.css
 
 dependencies[] = tripal
 dependencies[] = date
+dependencies[] = image