浏览代码

Fixed SQL on sequence retrieval for relationships

Stephen Ficklin 11 年之前
父节点
当前提交
22519a8f6e

+ 42 - 41
tripal_feature/api/tripal_feature.api.inc

@@ -468,12 +468,13 @@ function tripal_feature_reverse_complement($sequence) {
  *   output.
  * @param $sub_features
  *   Only include sub features (or child features) of the types provided in the array 
- * @param $subject_rel
- *   Retreives the sequence of any features in relationship with the feature_id
- *   where the feature_id is the subject of the relationship.
- * @param $object_rel
- *   Retreives the sequence of any features in relationship with the feature_id
- *   where the feature_id is the object of the relationship. 
+ * @param $relationship
+ *   If a relationship name is provided (e.g. sequence_of) then any sequences that
+ *   are in relationships of this type with matched sequences are also included
+ * @param $rel_part
+ *   If a relationship is provided in the preceeding argument then the rel_part
+ *   must be either 'object' or 'subject' to indicate which side of the 
+ *   relationship the matched features belong
  *      
  * @return
  *   The DNA/protein sequence formated as requested.
@@ -482,7 +483,7 @@ function tripal_feature_reverse_complement($sequence) {
  */
 function tripal_feature_get_formatted_sequence($feature_id, $feature_name, 
   $num_bases_per_line, $derive_from_parent, $aggregate, $output_format,
-  $upstream, $downstream, $sub_features = array(), $subject_rel, $object_rel) {
+  $upstream, $downstream, $sub_features = array(), $relationship = '', $rel_part = '') {
   
   // to speed things up we need to make sure we have a persistent connection
   $connection = tripal_db_persistent_chado(); 
@@ -494,60 +495,60 @@ function tripal_feature_get_formatted_sequence($feature_id, $feature_name,
      $downstream = 0;
   }
   
-  if ($subject_rel or $object_rel) { 
-    if($subject_rel) {
+  if ($rel_part == "object" or $rel_part == "subject") { 
+    if($rel_part == "subject") {
       $psql = '
-        PREPARE feature_by_subject (int, text) AS
-        SELECT F2.feature_id, F2.name, F2.uniquename, CVT2.name as feature_type, O.genus, O.species
-        FROM feature F
-          INNER JOIN feature_relationship FR ON FR.subject_id  = F.feature_id
-          INNER JOIN cvterm CVT              ON CVT.cvterm_id  = FR.type_id
-          INNER JOIN feature F2              ON F2.feature_id  = FR.object_id
-          INNER JOIN cvterm CVT2             ON CVT2.cvterm_id = F2.type_id
-          INNER JOIN organism O              ON O.organism_id  = F2.organism_id
+        PREPARE feature_rel_get_object (int, text) AS
+        SELECT FO.feature_id, FO.name, FO.uniquename, CVTO.name as feature_type, O.genus, O.species
+        FROM feature FS
+          INNER JOIN feature_relationship FR ON FR.subject_id   = FS.feature_id
+          INNER JOIN cvterm CVTFR            ON CVTFR.cvterm_id = FR.type_id
+          INNER JOIN feature FO              ON FO.feature_id   = FR.object_id
+          INNER JOIN cvterm CVTO             ON CVTO.cvterm_id  = FO.type_id
+          INNER JOIN organism O              ON O.organism_id   = FO.organism_id
         WHERE 
-          F.feature_id = $1 AND
-          CVT.name     = $2
+          FS.feature_id = $1 AND
+          CVTFR.name    = $2
       ';  
-      $status = tripal_core_chado_prepare('feature_by_subject', $psql, array('int', 'text'));
+      $status = tripal_core_chado_prepare('feature_rel_get_object', $psql, array('int', 'text'));
       if (!$status) {
         watchdog('tripal_feature', "init: not able to prepare SQL statement '%name'", 
           array('%name' => 'feature_by_subject'), 'WATCHDOG ERROR');
       }    
-      $sql = "EXECUTE feature_by_subject (%d,'%s')";
-      $features = chado_query($sql, $feature_id, $subject_rel); 
+      $sql = "EXECUTE feature_rel_get_object(%d,'%s')";
+      $features = chado_query($sql, $feature_id, $relationship); 
     }
-    if($object_rel) {
+    if($rel_part == "object") {
       $psql = '
-        PREPARE feature_by_object (int, text) AS
-        SELECT F2.feature_id, F2.name, F2.uniquename, CVT2.name as feature_type, O.genus, O.species
-        FROM feature F
-          INNER JOIN feature_relationship FR ON FR.object_id   = F.feature_id
-          INNER JOIN cvterm CVT              ON CVT.cvterm_id  = FR.type_id
-          INNER JOIN feature F2              ON F2.feature_id  = FR.subject_id
-          INNER JOIN cvterm CVT2             ON CVT2.cvterm_id = F2.type_id
-          INNER JOIN organism O              ON O.organism_id  = F2.organism_id
+        PREPARE feature_rel_get_subject (int, text) AS
+        SELECT FS.feature_id, FS.name, FS.uniquename, CVTO.name as feature_type, O.genus, O.species
+        FROM feature FO
+          INNER JOIN feature_relationship FR ON FR.object_id    = FO.feature_id
+          INNER JOIN cvterm CVTFR            ON CVTFR.cvterm_id = FR.type_id
+          INNER JOIN feature FS              ON FS.feature_id   = FR.subject_id
+          INNER JOIN cvterm CVTO             ON CVTO.cvterm_id  = FS.type_id
+          INNER JOIN organism O              ON O.organism_id   = FS.organism_id
         WHERE 
-          F.feature_id = $1 AND
-          CVT.name     = $2
+          FO.feature_id = $1 AND
+          CVTFR.name    = $2
       ';
-      $status = tripal_core_chado_prepare('feature_by_object', $psql, array('int', 'text'));
+      $status = tripal_core_chado_prepare('feature_rel_get_subject', $psql, array('int', 'text'));
       if (!$status) {
         watchdog('tripal_feature', "init: not able to prepare SQL statement '%name'", 
           array('%name' => 'feature_by_object'), 'WATCHDOG ERROR');
       }
-      $sql = "EXECUTE feature_by_object (%d,'%s')";
-      $features = chado_query($sql, $feature_id, $object_rel);     
+      $sql = "EXECUTE feature_rel_get_subject(%d,'%s')";
+      $features = chado_query($sql, $feature_id, $relationship);     
     }
     $sequences = '';
     while ($feature = db_fetch_object($features)) {  
 
       // recurse and get the sequences for these in the relationship
-      if ($subject_rel) {
-        $defline = "$feature_name -> $subject_rel -> $feature->uniquename $feature->feature_type ($feature->genus $feature->species)";
+      if ($rel_part == "subject") {
+        $defline = "$feature_name, $relationship, $feature->uniquename $feature->feature_type ($feature->genus $feature->species)";
       }
-      if ($object_rel) {
-        $defline = "$feature->uniquename $feature->feature_type ($feature->genus $feature->species) -> $object_rel -> $feature_name";
+      if ($rel_part == "object") {
+        $defline = "$feature->uniquename $feature->feature_type ($feature->genus $feature->species), $relationship, $feature_name";
       }
       $sequences .= tripal_feature_get_formatted_sequence($feature->feature_id, $defline, 
         $num_bases_per_line, $derive_from_parent, $aggregate, $output_format,
@@ -775,7 +776,7 @@ function tripal_feature_get_formatted_sequence($feature_id, $feature_name,
       elseif ($output_format == 'fasta_txt') {
          $seq = wordwrap($seq, $num_bases_per_line, "\r\n", TRUE);
       }
-      $residues .= ">$feature_name. Sequence derived from $parent->srctypename of $parent->genus $parent->species: $parent->srcname:" . ($parent->adjfmin + 1) . ".." . $parent->adjfmax ." ($dir). ";
+      $residues .= ">$feature_name. Sequence derived from feature of type, '$parent->srctypename', of $parent->genus $parent->species: $parent->srcname:" . ($parent->adjfmin + 1) . ".." . $parent->adjfmax ." ($dir). ";
       if (count($types) > 0) {
         $residues .= "Excludes all bases but those of type(s): " . implode(', ', $types) . ". " ;
       }

+ 2 - 2
tripal_feature/includes/seq_extract.inc

@@ -456,7 +456,7 @@ function tripal_feature_seq_extract_form_submit($form, &$form_state) {
  */
 function tripal_feature_seq_extract_get_features($org_commonname, $genus, $species, $analysis_name, 
   $type, $feature_name, $upstream, $downstream, $output_format, $derive_from_parent, $aggregate, 
-  $child, $subject_rel, $object_rel) {
+  $child, $relationship, $rel_part) {
     
   $sub_features = explode(',', $child);
     
@@ -534,7 +534,7 @@ function tripal_feature_seq_extract_get_features($org_commonname, $genus, $speci
     // generate the sequence
     $sequence = tripal_feature_get_formatted_sequence($feature_id, $feature_name, 
       $num_bases_per_line, $derive_from_parent, $aggregate, $output_format,
-      $upstream, $downstream, $sub_features, $subject_rel, $object_rel);
+      $upstream, $downstream, $sub_features, $relationship, $rel_part);
     
     // print the sequence
     print $sequence;

+ 11 - 6
tripal_feature/tripal_feature.drush.inc

@@ -41,8 +41,8 @@ function tripal_feature_drush_command() {
       'parent'   => dt('Set this argument to 1 to retrieve the sequence from the parent in an alignment rather than the residues column of the feature itself.'),
       'agg'      => dt('Set this argument to 1 to aggregate sub features into a single sequence.  This is useful, for example, for obtaining CDS sequence from an mRNA'),
       'child'    => dt('Set this argument to the sequence ontology term for the children to aggregate.  This is useful in the case where a gene has exons as well as CDSs and UTRs.  You may sepcify as many feature types as desired by separating each with a single comma (no spaces).'),
-      'object_rel'  => dt('Retreives the sequence of any features in relationship with the matched features from other criteria where the feature is the subject of the relationship'),
-      'subject_rel' => dt('Retreives the sequence of any features in relationship with the matched features from other criteria where the feature is the object of the relationship. '),
+      'relationship'  => dt('Retreives the sequence of any feature in the specified relationship with the matched features.'),
+      'rel_part' => dt('If a relationship is provided, then this will be "subject" or "object" indicating the side of the relationship for the matched features. If the matched features are the "object" then the "subject" features will have their sequences included in the output (and vice versa).'),
     ),
     'examples' => array(
       'Standard example' => 'drush tripal-current-job',
@@ -81,12 +81,17 @@ function drush_tripal_feature_tripal_get_sequence() {
   $derive_from_parent = drush_get_option('parent');
   $aggregate = drush_get_option('agg');
   $child = drush_get_option('child');
-  $object_rel = drush_get_option('object_rel');
-  $subject_rel = drush_get_option('subject_rel');
+  $relationship = drush_get_option('relationship');
+  $rel_part = drush_get_option('rel_part');
+
+  if($relationship and !$rel_part){
+    print "Please specify both 'relationship' and a 'rel_part' arguments. Both must be used together\n";
+    return;
+  }
     
   tripal_feature_seq_extract_get_features($org_commonname, $genus, $species, $analysis_name, 
     $type, $feature_name, $upstream, $downstream, $output_format, $derive_from_parent, 
-    $aggregate, $child, $subject_rel, $object_rel);   
+    $aggregate, $child, $relationship, $rel_part);
 }
 /*
  * 
@@ -94,4 +99,4 @@ function drush_tripal_feature_tripal_get_sequence() {
 function drush_tripal_feature_sync() {
   $feature_id = drush_get_option('id');  
   tripal_feature_sync_feature($feature_id);
-}
+}