Selaa lähdekoodia

Made additional adjustments to sequence page

Stephen Ficklin 11 vuotta sitten
vanhempi
commit
e6e017890c

+ 76 - 49
tripal_feature/api/tripal_feature.api.inc

@@ -51,6 +51,10 @@ function tripal_reverse_compliment_sequence($sequence) {
  *   An associative array describing the feature. Valid keys include:
  *    - feature_id: The feature_id of the feature for which the sequence will be retrieved
  *    - name: The feature name. This will appear on the FASTA definition line
+ *    - parent_id:  (optional) only retrieve a sequence if 'derive_from_parent' is true 
+ *        and the parent matches this ID.
+ *    - featureloc_id: (optional) only retrieve a sequence if 'derive_from_parent' is 
+ *        true and the alignment is defined with this featureloc_id
  * @param $options
  *   An associative array of options. Valid keys include:
  *    - width: Indicate the number of bases to use per line.  A new line will be added
@@ -78,15 +82,17 @@ function tripal_reverse_compliment_sequence($sequence) {
  *        relationship the matched features belong
  *
  * @return
- *   The DNA/protein sequence formated as requested.
+ *   an array of matching sequence formated as requested.
  *
  * @ingroup tripal_feature_api
  */
 function tripal_get_sequence($feature, $options) {
 
   // default values for finding the feature
-  $feature_id   = array_key_exists('feature_id', $feature) ? $feature['feature_id'] : 0;
-  $feature_name = array_key_exists('name', $feature)       ? $feature['name']       : '';
+  $feature_id    = array_key_exists('feature_id', $feature)     ? $feature['feature_id']     : 0;
+  $parent_id     = array_key_exists('parent_id', $feature)      ? $feature['parent_id']      : 0;
+  $featureloc_id = array_key_exists('featureloc_id', $feature)  ? $feature['featureloc_id']  : 0;
+  $feature_name  = array_key_exists('name', $feature)           ? $feature['name']           : '';
 
   // default values for building the sequence
   $num_bases_per_line = array_key_exists('width', $options)              ? $options['width']              : 50;
@@ -99,14 +105,18 @@ function tripal_get_sequence($feature, $options) {
   $relationship       = array_key_exists('relationship_type', $options)  ? $options['relationship_type']  : '';
   $rel_part           = array_key_exists('relationship_part', $options)  ? $options['relationship_part']  : '';
   
+  // make sure the sub_features variable is an array
   if (!is_array($sub_features)) {
     tripal_report_error('tripal_deprecated', TRIPAL_ERROR,
       "'sub_features' option must be an array for function tripal_get_sequence().",
       array()
     );
-    return;
+    return array();
   }
 
+  // if a relationship was specified then retreive and the sequences that
+  // have the given relationship and the recurse to extract the appropriate 
+  // sequence
   if ($rel_part == "object" or $rel_part == "subject") {
     if ($rel_part == "subject") {
       $sql = '
@@ -148,10 +158,11 @@ function tripal_get_sequence($feature, $options) {
       if ($rel_part == "object") {
         $defline = "$feature->uniquename $feature->feature_type ($feature->genus $feature->species), $relationship, $feature_name";
       }
-      $sequences .= tripal_get_sequence(
+      return tripal_get_sequence(
         array(
           'feature_id' => $feature->feature_id, 
-          'name' => $defline
+          'name' => $defline,
+          'parent_id' => $parent_id,
         ),
         array(
           'width' => $num_bases_per_line, 
@@ -164,7 +175,6 @@ function tripal_get_sequence($feature, $options) {
         )
       );
     }
-    return $sequences;
   }
 
   // prepare the queries we're going to use later during the render phase
@@ -173,13 +183,13 @@ function tripal_get_sequence($feature, $options) {
   // the upstream and downstream extensions go beyond the lenght of the
   // parent sequence.
   $sql ='
-    SELECT srcname, srcfeature_id, strand, srctypename, typename,
+    SELECT featureloc_id, srcname, srcfeature_id, strand, srctypename, typename,
       fmin, fmax, upstream, downstream, adjfmin, adjfmax,
       substring(residues from (adjfmin + 1) for (upstream + (fmax - fmin) + downstream))  as residues,
       genus, species
     FROM (
       SELECT
-        OF.name srcname, FL.srcfeature_id, FL.strand,
+        FL.featureloc_id, OF.name srcname, FL.srcfeature_id, FL.strand,
         OCVT.name as srctypename, SCVT.name as typename,
         FL.fmin, FL.fmax, OO.genus, OO.species,
         CASE
@@ -263,15 +273,25 @@ function tripal_get_sequence($feature, $options) {
     WHERE FR.object_id = :feature_id and PF.feature_id = :srcfeature_id
   ';
 
+  // the array to be returned
+  $sequences = array();
+  
   // if we need to get the sequence from the parent then do so now.
   if ($derive_from_parent) {
-    
-    $residues = '';
 
     // execute the query to get the sequence from the parent
     $parents = chado_query($sql, array(':upstream' => $upstream, ':downstream' => $downstream, ':feature_id' => $feature_id));
 
     while ($parent = $parents->fetchObject()) {
+      
+      // if the user specified a particular parent and this one doesn't match then skip it
+      if ($parent_id and $parent_id != $parent->srcfeature_id) {
+        continue;
+      }
+      // if the user specified a particular featureloc_id and this one doesn't match then skip it
+      if ($featureloc_id and $featureloc_id != $parent->featureloc_id) {
+        continue;
+      }
       $seq = '';  // initialize the sequence for each parent
 
       // if we are to aggregate then we will ignore the feature returned
@@ -354,7 +374,6 @@ function tripal_get_sequence($feature, $options) {
         $seq = tripal_feature_reverse_complement($seq);
         $dir = 'reverse';
       }
-
       // now format for display
       if ($output_format == 'fasta_html') {
          $seq = wordwrap($seq, $num_bases_per_line, "<br>", TRUE);
@@ -362,34 +381,35 @@ function tripal_get_sequence($feature, $options) {
       elseif ($output_format == 'fasta_txt') {
          $seq = wordwrap($seq, $num_bases_per_line, "\r\n", TRUE);
       }
-      $residues .= ">$feature_name. Sequence derived from feature of type, '$parent->srctypename', of $parent->genus $parent->species: $parent->srcname:" . ($parent->adjfmin + 1) . ".." . $parent->adjfmax . " ($dir). ";
-      if (count($types) > 0) {
-        $residues .= "Excludes all bases but those of type(s): " . implode(', ', $types) . ". " ;
-      }
-      if ($parent->upstream > 0) {
-         $residues .= "Includes " . $parent->upstream . " bases upstream.  ";
-      }
-      if ($parent->downstream > 0) {
-         $residues .= "Includes " . $parent->downstream . " bases downstream.  ";
-      }
       if (!$seq) {
-
         if ($output_format == 'fasta_html') {
-          $residues .= "No sequence available.</br>";
+          $notes .= "No sequence available.</br>";
         }
         else {
-          $residues .= "No sequence available.\r\n";
+          $notes .= "No sequence available.\r\n";
         }
       }
-      else {
-        if ($output_format == 'fasta_html') {
-          $residues .= "<br>";
-        }
-        $residues .= "\r\n" . $seq . "\r\n";
-        if ($output_format == 'fasta_html') {
-          $residues .= "<br>";
-        }
+      
+      $notes = "Sequence derived from feature of type, '$parent->srctypename', of $parent->genus $parent->species: $parent->srcname:" . ($parent->adjfmin + 1) . ".." . $parent->adjfmax . " ($dir). ";
+      /*
+      if (count($types) > 0) {
+        $notes .= "Excludes all bases but those of type(s): " . implode(', ', $types) . ". " ;
+      }
+      if ($parent->upstream > 0) {
+         $notes .= "Includes " . $parent->upstream . " bases upstream.  ";
       }
+      if ($parent->downstream > 0) {
+         $notes .= "Includes " . $parent->downstream . " bases downstream.  ";
+      }      
+      */
+      $sequences[] = array(
+        'types'         => $types,
+        'upstream'      => $parent->upstream,
+        'downstream'    => $parent->downstream,
+        'notes'         => $notes,
+        'residues'      => $seq,
+        'featureloc_id' => $parent->featureloc_id,
+      );
     }
   }
   // if we are not getting the sequence from the parent sequence then
@@ -404,16 +424,17 @@ function tripal_get_sequence($feature, $options) {
     elseif ($output_format == 'fasta_txt') {
        $residues = wordwrap($residues, $num_bases_per_line, "\r\n", TRUE);
     }
-    $residues = ">$feature_name\r\n$residues\r\n";
-  }
 
-  // format the residues for display
-  if ($residues and $num_bases_per_line) {
-    if ($output_format == 'fasta_html') {
-       $residues = '<span style="font-family: monospace;">' . $residues . '</span>';
-    }
+    $sequences[] = array(
+      'types'      => $values->type,
+      'upstream'   => 0,
+      'downstream' => 0,
+      'notes'      => '',
+      'residues'   => $residues,
+    );
   }
-  return $residues;
+
+  return $sequences;
 }
 
 /**
@@ -446,21 +467,27 @@ function tripal_format_fasta_sequence($feature, $desc) {
  * 
  * @param $feature
  *   A single feature object containing all the fields from the chado.feature table
- * @param $alignment
+ * @param $featureloc
  *   Optional: A single featureloc object generated using chado_generate_var that
  *   contains a record from the chado.featureloc table.
+ * @param $type
+ *   Optional: the type of sequence. By default the feature type is used.
  *   
  * @return
  *   A string of the format: uniquename|name|type|feature_id
  *   or if an alignment:  srcfeature_name:fmin..fmax[+-]; alignment of uniquename|name|type|feature_id 
  */
-function tripal_get_fasta_defline($feature, $featureloc = NULL) {
-   $defline = $feature->uniquename . "|" . $feature->name . "|" . $feature->type_id->name . "|" . $feature->feature_id; 
-   if ($featureloc) {
-     $defline = tripal_get_location_string($featureloc) . "; alignment of " . $defline;
-   }
-   
-   return $defline;
+function tripal_get_fasta_defline($feature, $featureloc = NULL, $type = '') {
+  
+  if (!$type) {
+    $type = $feature->type_id->name;
+  }
+  $defline = $feature->uniquename . "|" . $feature->name . "|" . $type . "|" . $feature->feature_id; 
+  if ($featureloc) {
+    $defline = $defline . "; derived from alignment at " .tripal_get_location_string($featureloc);
+  }
+  
+  return $defline;
 }
 
 /**

+ 84 - 43
tripal_feature/theme/templates/tripal_feature_sequence.tpl.php

@@ -9,6 +9,7 @@
  *
  */
 
+
 $feature = $variables['node']->feature;
 
 // we don't want to get the sequence for traditionally large types. They are
@@ -30,19 +31,17 @@ $featureloc_sequences = $feature->featureloc_sequences;
 if ($residues or count($featureloc_sequences) > 0) { 
 
   $sequences_html = '';  // a variable for holding all sequences HTML text
-  $list_items = array(); // a list to be used for theming of content on this page ?>
-  
-  <div class="tripal_feature-data-block-desc tripal-data-block-desc">The following sequences are available for this feature:</div> <?php
+  $list_items = array(); // a list to be used for theming of content on this page
   
   // ADD IN RESIDUES FOR THIS FEATURE
   // add in the residues if they are present
   if ($residues) {
-    $list_items[] = '<a href="#residues">Current ' . $feature->type_id->name . ' sequence</a>';
+    $list_items[] = '<a href="#residues">' . $feature->type_id->name . ' sequence</a>';
      
     // format the sequence to break every 50 residues
     $sequences_html .= '<a name="residues"></a>';
     $sequences_html .= '<div id="residues" class="tripal_feature-sequence-item">';
-    $sequences_html .= '<p><b>Current ' . $feature->type_id->name . ' sequence</b></p>';
+    $sequences_html .= '<p><b>' . $feature->type_id->name . ' sequence</b></p>';
     $sequences_html .= '<pre class="tripal_feature-sequence">';
     $sequences_html .= '>' . tripal_get_fasta_defline($feature) . "\n";
     $sequences_html .= preg_replace("/(.{50})/","\\1<br>",$feature->residues);
@@ -59,6 +58,7 @@ if ($residues or count($featureloc_sequences) > 0) {
   $all_relationships = $feature->all_relationships;
   $object_rels = $all_relationships['object'];
   $has_coding_seq = 0;
+  $coding_seq = '';
   foreach ($object_rels as $rel_type => $rels){
     foreach ($rels as $subject_type => $subjects){
       foreach ($subjects as $subject){
@@ -69,10 +69,10 @@ if ($residues or count($featureloc_sequences) > 0) {
           $protein = chado_expand_var($protein, 'field', 'feature.residues');
           
           if ($protein->residues) {
-            $list_items[] = '<a href="#residues">Protein sequence of ' . $protein->name . '</a>';
+            $list_items[] = '<a href="#residues">protein sequence</a>';
             $sequences_html .= '<a name="protein-' . $protein->feature_id . '"></a>';
             $sequences_html .= '<div id="protein-' . $protein->feature_id . '" class="tripal_feature-sequence-item">';
-            $sequences_html .= '<p><b>Protein sequence of ' . $protein->name . '</b></p>';
+            $sequences_html .= '<p><b>protein sequence of ' . $protein->name . '</b></p>';
             $sequences_html .= '<pre class="tripal_feature-sequence">';
             $sequences_html .= '>' . tripal_get_fasta_defline($protein) . "\n";
             $sequences_html .= preg_replace("/(.{50})/","\\1<br>", $protein->residues);
@@ -82,10 +82,15 @@ if ($residues or count($featureloc_sequences) > 0) {
           }
         }
         
-        // we want to know if there are any coding sequences associated with this feature
-        // if so we will use some code a bit later on to get those sequences
+        // If the CDS has sequences then concatenate those. The objects 
+        // should be returned in order of rank
         if ($rel_type == 'part of' and $subject_type == 'CDS') {
-          $has_coding_seq = 1;
+          $cds = $subject->record->subject_id;
+          $cds = chado_expand_var($cds, 'field', 'feature.residues');
+          if ($cds->residues) {
+            $has_coding_seq = 1;
+            $coding_seq .= $cds->residues;
+          }
         }
         
         // add any other sequences that are related through a relationship
@@ -94,6 +99,19 @@ if ($residues or count($featureloc_sequences) > 0) {
     }
   }
   
+  // CODING SEQUENCES FROM RELATIONSHIPS
+  // add in any CDS sequences.
+  if ($has_coding_seq) {
+    $list_items[] = '<a href="#coding_sequence">coding sequence </a>';
+    $sequences_html .= '<a name="coding_sequence"></a>';
+    $sequences_html .= '<div id="coding_sequence" class="tripal_feature-sequence-item">';
+    $sequences_html .= '<p><b>coding sequence</b></p>';
+    $sequences_html .= '<pre class="tripal_feature-sequence">';
+    $sequences_html .= $coding_seq;
+    $sequences_html .= '</pre>';
+    $sequences_html .= '<a href="#sequences-top">back to top</a>';
+    $sequences_html .= '</div>';
+  }
   
   /* ADD IN ALIGNMENT SEQUENCES FOR THIS FEATURE
    * For retreiving the sequence from an alignment we would typically make a call to
@@ -113,49 +131,55 @@ if ($residues or count($featureloc_sequences) > 0) {
   if(count($featureloc_sequences) > 0){
     foreach($featureloc_sequences as $src => $attrs){
       // the $attrs array has the following keys
-      //   * src:  a unique identifier combining the feature id with the cvterm id
+      //   * id:  a unique identifier combining the feature id with the cvterm id
       //   * type: the type of sequence (e.g. mRNA, etc)
       //   * location:  the alignment location
       //   * defline: the definition line
       //   * formatted_seq: the formatted sequences
-      $list_items[] = '<a href="#' . $attrs['src'] . '">Alignment at  ' . $attrs['location'] . "</a>";
-      $sequences_html .= '<a name="' . $attrs['src'] . '"></a>';
-      $sequences_html .= '<div id="' . $attrs['src'] . '" class="tripal_feature-sequence-item">';
-      $sequences_html .= '<p><b>Alignment at  ' . $attrs['location'] .'</b></p>';
+      //   * featureloc:  the feature object aligned to
+      $list_items[] = '<a href="#' . $attrs['id'] . '">'. $feature->type_id->name . ' from alignment at  ' . $attrs['location'] . "</a>";
+      $sequences_html .= '<a name="' . $attrs['id'] . '"></a>';
+      $sequences_html .= '<div id="' . $attrs['id'] . '" class="tripal_feature-sequence-item">';
+      $sequences_html .= '<p><b>'. $feature->type_id->name . ' from alignment at  ' . $attrs['location'] .'</b></p>';
       $sequences_html .= $attrs['formatted_seq'];
       $sequences_html .= '<a href="#sequences-top">back to top</a>';
       $sequences_html .= '</div>';
     }
-  }
-  
-  // CODING SEQUENCES
-  // add in any CDS sequences. 
-  if ($has_coding_seq) {
-    // use the tripal_get_sequence() API function to retreive the CDS sequences
+    
+    // check to see if this alignment has any CDS. If so, generate a CDS sequence
     $cds_sequence = tripal_get_sequence(
-      array(
-        'feature_id' => $feature->feature_id,
-        'name' => $feature->name, 
-      ),
-      array(
-        'width' => 50,  // FASTA sequence should have 50 chars per line
-        'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
-        'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
-        'output_format' => 'fasta_txt', // we just want plain text, we'll format it here.
-        'sub_feature_types' => array('CDS'), // we're looking for CDS features
-        ''
-      )
+        array(
+          'feature_id' => $feature->feature_id,
+          'parent_id' => $attrs['featureloc']->srcfeature_id->feature_id,
+          'name' => $feature->name,
+          'featureloc_id' => $attrs['featureloc']->featureloc_id,
+        ),
+        array(
+          'width' => 50,  // FASTA sequence should have 50 chars per line
+          'derive_from_parent' => 1, // CDS are in parent-child relationships so we want to use the sequence from the parent
+          'aggregate' => 1, // we want to combine all CDS for this feature into a single sequence
+          'output_format' => 'fasta_txt', // we just want plain text, we'll format it here.
+          'sub_feature_types' => array('CDS'), // we're looking for CDS features
+          ''
+        )
     );
-    $list_items[] = '<a href="#coding_sequence">Coding sequence </a>';
-    $sequences_html .= '<a name="coding_sequence"></a>';
-    $sequences_html .= '<div id="coding_sequence" class="tripal_feature-sequence-item">';
-    $sequences_html .= '<p><b>Coding sequence</b></p>';
-    $sequences_html .= '<pre class="tripal_feature-sequence">';
-    $sequences_html .= $cds_sequence;
-    $sequences_html .= '</pre>';
-    $sequences_html .= '<a href="#sequences-top">back to top</a>';
-    $sequences_html .= '</div>';
-  }
+    if (count($cds_sequence) > 0) {
+      $list_items[] = '<a href="#coding_' . $attrs['id'] . '">coding sequnece from alignment at  ' . $attrs['location'] . "</a>";
+      $sequences_html .= '<a name="ccoding_' . $attrs['id'] . '"></a>';
+      $sequences_html .= '<div id="coding_' . $attrs['id'] . '" class="tripal_feature-sequence-item">';
+      $sequences_html .= '<p><b>Coding sequence (CDS) from alignment at  ' . $attrs['location'] . '</b></p>';
+      $sequences_html .= '<pre class="tripal_feature-sequence">';
+      $sequences_html .= '>' . tripal_get_fasta_defline($feature, $attrs['featureloc'], 'CDS') . "\n";
+      $sequences_html .= $cds_sequence[0]['residues'];
+      $sequences_html .= '</pre>';
+      $sequences_html .= '<a href="#sequences-top">back to top</a>';
+      $sequences_html .= '</div>';
+    }
+  } 
+  ?>
+
+  <div class="tripal_feature-data-block-desc tripal-data-block-desc">The following sequences are available for this feature:</div> 
+  <?php
   
   // first add a list at the top of the page that can be formatted as the
   // user desires.  We use the theme_item_list function of Drupal to create 
@@ -169,6 +193,23 @@ if ($residues or count($featureloc_sequences) > 0) {
     'type' => 'ul',
     'attributes' => array(),
   ));
+
+  $message = 'Administrators, sequences will appear on this page if:
+    <ul>
+      <li>This feature has residues stored in the "residues" field of the feature table of Chado.</li>
+      <li>This feature has a protein feature associated via the "feature_relationship" table of Chado with a
+          relationship of type "derives from" and the protein feature has residues.</li>
+      <li>This feature has one or more CDS feature associated via the "feature_relationship" table of Chado with a
+          relationship of type "part of". If the CDS features have residues then those will be concatenated
+          and presented as a feature.</li>
+      <li>This feature is aligned to another feature (e.g. scaffold, or chromosome). In this case, the
+          sequence underlying the alignment will be shown</li>
+      <li>This feature is aligned to another feature (e.g. scaffold, or chromosome) and this feature has
+          one ore more CDS features associated.  The CDS sequenes underlying the alignment will be
+          shown.</li>
+    </ul>
+    </p>';
+  print tripal_set_message($message, TRIPAL_INFO, array('return_html' => 1));
   
   // now print the sequences
   print $sequences_html;

+ 2 - 1
tripal_feature/theme/tripal_feature.theme.inc

@@ -140,7 +140,7 @@ function tripal_feature_load_featureloc_sequences($feature_id, $featurelocs) {
         $parts = $rparts;
       }
 
-      $floc_sequences[$src]['src'] = $src;
+      $floc_sequences[$src]['id'] = $src;
       $floc_sequences[$src]['type'] = $featureloc->feature_id->type_id->name;
       $args = array(':feature_id' => $featureloc->srcfeature_id->feature_id);
       $start = $featureloc->fmin + 1;
@@ -169,6 +169,7 @@ function tripal_feature_load_featureloc_sequences($feature_id, $featurelocs) {
       }
       $floc_sequences[$src]['location'] = tripal_get_location_string($featureloc);
       $floc_sequences[$src]['defline'] = tripal_get_fasta_defline($featureloc->feature_id, $featureloc);
+      $floc_sequences[$src]['featureloc'] = $featureloc;
       $floc_sequences[$src]['formatted_seq'] =  tripal_feature_color_sequence($residues, $parts, $floc_sequences[$src]['defline']);
     }
   }