Browse Source

Fixed a bug in the FASTA loader where the uniquename or name was being overwritten when it shouldn't have been.

spficklin 13 years ago
parent
commit
f52ccafa35
1 changed files with 79 additions and 36 deletions
  1. 79 36
      tripal_feature/fasta_loader.php

+ 79 - 36
tripal_feature/fasta_loader.php

@@ -433,6 +433,7 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
    $i = 0;
 
    $name = '';
+   $uname = '';
    $residues = '';
    $num_lines = sizeof($lines);
    $interval = intval($num_lines * 0.01);
@@ -448,17 +449,18 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
          tripal_job_set_progress($job,intval(($i/$num_lines)*100));
       }
 
-      // get the name, uniquename, accession and relationship subject from
-      // the definition line
+      // if we encounter a definition line then get the name, uniquename, 
+      // accession and relationship subject from the definition line
       if(preg_match('/^>/',$line)){
          // if we have a feature name then we are starting a new sequence
          // so let's handle the previous one before moving on
-         if($name){
+         if($name or $uname){
            tripal_feature_fasta_loader_handle_feature($name,$uname,$db_id,
               $accession,$subject,$rel_type,$parent_type,$library_id,$organism_id,$type,
               $source,$residues,$method,$re_name,$match_type);
            $residues = '';
            $name = '';
+           $uname = '';
          }
 
          $line = preg_replace("/^>/",'',$line);
@@ -469,8 +471,12 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
             }
             $name = trim($matches[1]);
          } else {
-            preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
-            $name = trim($matches[1]);
+            // if the match_type is name and no regular expression was provided
+            // then use the first word as the name, otherwise we don't set the name
+            if(strcmp($match_type,'Name')==0){
+               preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
+               $name = trim($matches[1]);
+            }
          } 
          // get the feature unique name
          if($re_uname){
@@ -479,12 +485,18 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
             }
             $uname = trim($matches[1]);
          } else {
-            preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
-            $uname = trim($matches[1]);
+            // if the match_type is name and no regular expression was provided
+            // then use the first word as the name, otherwise, we don't set the unqiuename
+            if(strcmp($match_type,'Unique name')==0){
+               preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
+               $uname = trim($matches[1]);
+            }
          } 
-               
+         // get the accession    
          preg_match("/$re_accession/",$line,$matches);
          $accession = trim($matches[1]);
+
+         // get the relationship subject
          preg_match("/$re_subject/",$line,$matches);
          $subject = trim($matches[1]);
       }
@@ -535,44 +547,75 @@ function tripal_feature_fasta_loader_handle_feature($name,$uname,$db_id,$accessi
                       WHERE organism_id = %d and name = '%s' and type_id = %d";
          $feature = db_fetch_object(db_query($feature_sql,$organism_id,$name,$cvterm->cvterm_id));
       }
-   } else {
+   }
+   if(strcmp($match_type,'Unique name')==0){
       $feature_sql = "SELECT * FROM {feature} 
                       WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
       $feature = db_fetch_object(db_query($feature_sql,$organism_id,$uname,$cvterm->cvterm_id));
    }
 
-   if(!$feature){
-       if(strcmp($method,'Insert only')==0 or strcmp($method,'Insert and update')==0){
-         // now insert the feature
-         $sql = "INSERT INTO {feature} 
-                    (organism_id, name, uniquename, residues, seqlen, 
-                     md5checksum,type_id,is_analysis,is_obsolete)
-                 VALUES(%d,'%s','%s','%s',%d, '%s', %d, %s, %s)";
-         $result = db_query($sql,$organism_id,$name,$uname,$residues,strlen($residues),
-                     md5($residues),$cvterm->cvterm_id,'false','false');
-         if(!$result){
-            print "ERROR: failed to insert feature '$name ($uname)'\n";
-            return 0;
-         } else {
-            print "Inserted feature $name ($uname)\n";
-         }
-      } 
-      else {
-         print "WARNING: failed to find feature '$name' ('$uname') while matching on " . strtolower($match_type) . ". Skipping\n";
+   if(!$feature and (strcmp($method,'Insert only')==0 or strcmp($method,'Insert and update')==0)){
+       // if we have a unique name but not a name then set them to be teh same 
+       // and vice versa
+       if(!$uname){
+          $uname = $name;
+       }
+       elseif(!$name){
+          $name = $uname;
+       }
+      // now insert the feature
+      $sql = "INSERT INTO {feature} 
+                 (organism_id, name, uniquename, residues, seqlen, 
+                  md5checksum,type_id,is_analysis,is_obsolete)
+              VALUES(%d,'%s','%s','%s',%d, '%s', %d, %s, %s)";
+      $result = db_query($sql,$organism_id,$name,$uname,$residues,strlen($residues),
+                  md5($residues),$cvterm->cvterm_id,'false','false');
+      if(!$result){
+         print "ERROR: failed to insert feature '$name ($uname)'\n";
          return 0;
+      } else {
+         print "Inserted feature $name ($uname)\n";
       }
-   } else {
+   } 
+   if(!$feature and (strcmp($method,'Update only')==0 or strcmp($method,'Insert and update')==0)){
+      print "WARNING: failed to find feature '$name' ('$uname') while matching on " . strtolower($match_type) . ". Skipping\n";
+      return 0;
+   }
+
+   if($feature and (strcmp($method,'Update only')==0 or strcmp($method,'Insert and update')==0)){
        if(strcmp($method,'Update only')==0 or strcmp($method,'Insert and update')==0){
          if(strcmp($match_type,'Name')==0){
-            $sql = "UPDATE {feature} 
-                     SET uniquename = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
-                     WHERE organism_id = %d and name = '%s' and type_id = %d";
-            $result = db_query($sql,$uname,$residues,strlen($residues),md5($residues),$organism_id,$name,$cvterm->cvterm_id);
+            // if we're matching on the name but do not have a new unique name then we
+            // don't want to update the uniquename.  If we do have a uniquename then we 
+            // should update it.  We only get a uniquename if there was a regular expression
+            // provided for pulling it out
+            if($uname){
+               $sql = "UPDATE {feature} 
+                        SET uniquename = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
+                        WHERE organism_id = %d and name = '%s' and type_id = %d";
+               $result = db_query($sql,$uname,$residues,strlen($residues),md5($residues),$organism_id,$name,$cvterm->cvterm_id);
+            } else {
+               $sql = "UPDATE {feature} 
+                        SET residues = '%s', seqlen = '%s', md5checksum = '%s'
+                        WHERE organism_id = %d and name = '%s' and type_id = %d";
+               $result = db_query($sql,$residues,strlen($residues),md5($residues),$organism_id,$name,$cvterm->cvterm_id);
+            }
          } else {
-            $sql = "UPDATE {feature} 
-                     SET name = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
-                     WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
-            $result = db_query($sql,$name,$residues,strlen($residues),md5($residues),$organism_id,$uname,$cvterm->cvterm_id);
+            // if we're matching on the unique name but do not have a new name then we
+            // don't want to update the name.  If we do have a name then we 
+            // should update it.  We only get a name if there was a regular expression
+            // provided for pulling it out
+            if($name){
+               $sql = "UPDATE {feature} 
+                        SET name = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
+                        WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
+               $result = db_query($sql,$name,$residues,strlen($residues),md5($residues),$organism_id,$uname,$cvterm->cvterm_id);
+            } else {
+               $sql = "UPDATE {feature} 
+                        SET residues = '%s', seqlen = '%s', md5checksum = '%s'
+                        WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
+               $result = db_query($sql,$residues,strlen($residues),md5($residues),$organism_id,$uname,$cvterm->cvterm_id);
+            }
          }
          if(!$result){
             print "ERROR: failed to update feature '$name ($uname)'\n";