فهرست منبع

Fixed a bug in the FASTA loader where the uniquename or name was being overwritten when it shouldn't have been.

spficklin 13 سال پیش
والد
کامیت
f52ccafa35
1فایلهای تغییر یافته به همراه79 افزوده شده و 36 حذف شده
  1. 79 36
      tripal_feature/fasta_loader.php

+ 79 - 36
tripal_feature/fasta_loader.php

@@ -433,6 +433,7 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
    $i = 0;
 
    $name = '';
+   $uname = '';
    $residues = '';
    $num_lines = sizeof($lines);
    $interval = intval($num_lines * 0.01);
@@ -448,17 +449,18 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
          tripal_job_set_progress($job,intval(($i/$num_lines)*100));
       }
 
-      // get the name, uniquename, accession and relationship subject from
-      // the definition line
+      // if we encounter a definition line then get the name, uniquename, 
+      // accession and relationship subject from the definition line
       if(preg_match('/^>/',$line)){
          // if we have a feature name then we are starting a new sequence
          // so let's handle the previous one before moving on
-         if($name){
+         if($name or $uname){
            tripal_feature_fasta_loader_handle_feature($name,$uname,$db_id,
               $accession,$subject,$rel_type,$parent_type,$library_id,$organism_id,$type,
               $source,$residues,$method,$re_name,$match_type);
            $residues = '';
            $name = '';
+           $uname = '';
          }
 
          $line = preg_replace("/^>/",'',$line);
@@ -469,8 +471,12 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
             }
             $name = trim($matches[1]);
          } else {
-            preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
-            $name = trim($matches[1]);
+            // if the match_type is name and no regular expression was provided
+            // then use the first word as the name, otherwise we don't set the name
+            if(strcmp($match_type,'Name')==0){
+               preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
+               $name = trim($matches[1]);
+            }
          } 
          // get the feature unique name
          if($re_uname){
@@ -479,12 +485,18 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
             }
             $uname = trim($matches[1]);
          } else {
-            preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
-            $uname = trim($matches[1]);
+            // if the match_type is name and no regular expression was provided
+            // then use the first word as the name, otherwise, we don't set the unqiuename
+            if(strcmp($match_type,'Unique name')==0){
+               preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
+               $uname = trim($matches[1]);
+            }
          } 
-               
+         // get the accession    
          preg_match("/$re_accession/",$line,$matches);
          $accession = trim($matches[1]);
+
+         // get the relationship subject
          preg_match("/$re_subject/",$line,$matches);
          $subject = trim($matches[1]);
       }
@@ -535,44 +547,75 @@ function tripal_feature_fasta_loader_handle_feature($name,$uname,$db_id,$accessi
                       WHERE organism_id = %d and name = '%s' and type_id = %d";
          $feature = db_fetch_object(db_query($feature_sql,$organism_id,$name,$cvterm->cvterm_id));
       }
-   } else {
+   }
+   if(strcmp($match_type,'Unique name')==0){
       $feature_sql = "SELECT * FROM {feature} 
                       WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
       $feature = db_fetch_object(db_query($feature_sql,$organism_id,$uname,$cvterm->cvterm_id));
    }
 
-   if(!$feature){
-       if(strcmp($method,'Insert only')==0 or strcmp($method,'Insert and update')==0){
-         // now insert the feature
-         $sql = "INSERT INTO {feature} 
-                    (organism_id, name, uniquename, residues, seqlen, 
-                     md5checksum,type_id,is_analysis,is_obsolete)
-                 VALUES(%d,'%s','%s','%s',%d, '%s', %d, %s, %s)";
-         $result = db_query($sql,$organism_id,$name,$uname,$residues,strlen($residues),
-                     md5($residues),$cvterm->cvterm_id,'false','false');
-         if(!$result){
-            print "ERROR: failed to insert feature '$name ($uname)'\n";
-            return 0;
-         } else {
-            print "Inserted feature $name ($uname)\n";
-         }
-      } 
-      else {
-         print "WARNING: failed to find feature '$name' ('$uname') while matching on " . strtolower($match_type) . ". Skipping\n";
+   if(!$feature and (strcmp($method,'Insert only')==0 or strcmp($method,'Insert and update')==0)){
+       // if we have a unique name but not a name then set them to be teh same 
+       // and vice versa
+       if(!$uname){
+          $uname = $name;
+       }
+       elseif(!$name){
+          $name = $uname;
+       }
+      // now insert the feature
+      $sql = "INSERT INTO {feature} 
+                 (organism_id, name, uniquename, residues, seqlen, 
+                  md5checksum,type_id,is_analysis,is_obsolete)
+              VALUES(%d,'%s','%s','%s',%d, '%s', %d, %s, %s)";
+      $result = db_query($sql,$organism_id,$name,$uname,$residues,strlen($residues),
+                  md5($residues),$cvterm->cvterm_id,'false','false');
+      if(!$result){
+         print "ERROR: failed to insert feature '$name ($uname)'\n";
          return 0;
+      } else {
+         print "Inserted feature $name ($uname)\n";
       }
-   } else {
+   } 
+   if(!$feature and (strcmp($method,'Update only')==0 or strcmp($method,'Insert and update')==0)){
+      print "WARNING: failed to find feature '$name' ('$uname') while matching on " . strtolower($match_type) . ". Skipping\n";
+      return 0;
+   }
+
+   if($feature and (strcmp($method,'Update only')==0 or strcmp($method,'Insert and update')==0)){
        if(strcmp($method,'Update only')==0 or strcmp($method,'Insert and update')==0){
          if(strcmp($match_type,'Name')==0){
-            $sql = "UPDATE {feature} 
-                     SET uniquename = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
-                     WHERE organism_id = %d and name = '%s' and type_id = %d";
-            $result = db_query($sql,$uname,$residues,strlen($residues),md5($residues),$organism_id,$name,$cvterm->cvterm_id);
+            // if we're matching on the name but do not have a new unique name then we
+            // don't want to update the uniquename.  If we do have a uniquename then we 
+            // should update it.  We only get a uniquename if there was a regular expression
+            // provided for pulling it out
+            if($uname){
+               $sql = "UPDATE {feature} 
+                        SET uniquename = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
+                        WHERE organism_id = %d and name = '%s' and type_id = %d";
+               $result = db_query($sql,$uname,$residues,strlen($residues),md5($residues),$organism_id,$name,$cvterm->cvterm_id);
+            } else {
+               $sql = "UPDATE {feature} 
+                        SET residues = '%s', seqlen = '%s', md5checksum = '%s'
+                        WHERE organism_id = %d and name = '%s' and type_id = %d";
+               $result = db_query($sql,$residues,strlen($residues),md5($residues),$organism_id,$name,$cvterm->cvterm_id);
+            }
          } else {
-            $sql = "UPDATE {feature} 
-                     SET name = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
-                     WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
-            $result = db_query($sql,$name,$residues,strlen($residues),md5($residues),$organism_id,$uname,$cvterm->cvterm_id);
+            // if we're matching on the unique name but do not have a new name then we
+            // don't want to update the name.  If we do have a name then we 
+            // should update it.  We only get a name if there was a regular expression
+            // provided for pulling it out
+            if($name){
+               $sql = "UPDATE {feature} 
+                        SET name = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
+                        WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
+               $result = db_query($sql,$name,$residues,strlen($residues),md5($residues),$organism_id,$uname,$cvterm->cvterm_id);
+            } else {
+               $sql = "UPDATE {feature} 
+                        SET residues = '%s', seqlen = '%s', md5checksum = '%s'
+                        WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
+               $result = db_query($sql,$residues,strlen($residues),md5($residues),$organism_id,$uname,$cvterm->cvterm_id);
+            }
          }
          if(!$result){
             print "ERROR: failed to update feature '$name ($uname)'\n";