15 лет назад · f52ccafa35
--- a/tripal_feature/fasta_loader.php
+++ b/tripal_feature/fasta_loader.php
@@ -433,6 +433,7 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
 
				    $i = 0;
			
 
				 
			
 
				    $name = '';
			
 
				+   $uname = '';
			
 
				    $residues = '';
			
 
				    $num_lines = sizeof($lines);
			
 
				    $interval = intval($num_lines * 0.01);
			
@@ -448,17 +449,18 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
 
				          tripal_job_set_progress($job,intval(($i/$num_lines)*100));
			
 
				       }
			
 
				 
			
 
				-      // get the name, uniquename, accession and relationship subject from
			
 
				-      // the definition line
			
 
				+      // if we encounter a definition line then get the name, uniquename, 
			
 
				+      // accession and relationship subject from the definition line
			
 
				       if(preg_match('/^>/',$line)){
			
 
				          // if we have a feature name then we are starting a new sequence
			
 
				          // so let's handle the previous one before moving on
			
 
				-         if($name){
			
 
				+         if($name or $uname){
			
 
				            tripal_feature_fasta_loader_handle_feature($name,$uname,$db_id,
			
 
				               $accession,$subject,$rel_type,$parent_type,$library_id,$organism_id,$type,
			
 
				               $source,$residues,$method,$re_name,$match_type);
			
 
				            $residues = '';
			
 
				            $name = '';
			
 
				+           $uname = '';
			
 
				          }
			
 
				 
			
 
				          $line = preg_replace("/^>/",'',$line);
			
@@ -469,8 +471,12 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
 
				             }
			
 
				             $name = trim($matches[1]);
			
 
				          } else {
			
 
				-            preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
			
 
				-            $name = trim($matches[1]);
			
 
				+            // if the match_type is name and no regular expression was provided
			
 
				+            // then use the first word as the name, otherwise we don't set the name
			
 
				+            if(strcmp($match_type,'Name')==0){
			
 
				+               preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
			
 
				+               $name = trim($matches[1]);
			
 
				+            }
			
 
				          } 
			
 
				          // get the feature unique name
			
 
				          if($re_uname){
			
@@ -479,12 +485,18 @@ function tripal_feature_load_fasta($dfile, $organism_id, $type,
 
				             }
			
 
				             $uname = trim($matches[1]);
			
 
				          } else {
			
 
				-            preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
			
 
				-            $uname = trim($matches[1]);
			
 
				+            // if the match_type is name and no regular expression was provided
			
 
				+            // then use the first word as the name, otherwise, we don't set the unqiuename
			
 
				+            if(strcmp($match_type,'Unique name')==0){
			
 
				+               preg_match("/^\s*(.*?)[\s\|].*$/",$line,$matches);
			
 
				+               $uname = trim($matches[1]);
			
 
				+            }
			
 
				          } 
			
 
				-               
			
 
				+         // get the accession    
			
 
				          preg_match("/$re_accession/",$line,$matches);
			
 
				          $accession = trim($matches[1]);
			
 
				+
			
 
				+         // get the relationship subject
			
 
				          preg_match("/$re_subject/",$line,$matches);
			
 
				          $subject = trim($matches[1]);
			
 
				       }
			
@@ -535,44 +547,75 @@ function tripal_feature_fasta_loader_handle_feature($name,$uname,$db_id,$accessi
 
				                       WHERE organism_id = %d and name = '%s' and type_id = %d";
			
 
				          $feature = db_fetch_object(db_query($feature_sql,$organism_id,$name,$cvterm->cvterm_id));
			
 
				       }
			
 
				-   } else {
			
 
				+   }
			
 
				+   if(strcmp($match_type,'Unique name')==0){
			
 
				       $feature_sql = "SELECT * FROM {feature} 
			
 
				                       WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
			
 
				       $feature = db_fetch_object(db_query($feature_sql,$organism_id,$uname,$cvterm->cvterm_id));
			
 
				    }
			
 
				 
			
 
				-   if(!$feature){
			
 
				-       if(strcmp($method,'Insert only')==0 or strcmp($method,'Insert and update')==0){
			
 
				-         // now insert the feature
			
 
				-         $sql = "INSERT INTO {feature} 
			
 
				-                    (organism_id, name, uniquename, residues, seqlen, 
			
 
				-                     md5checksum,type_id,is_analysis,is_obsolete)
			
 
				-                 VALUES(%d,'%s','%s','%s',%d, '%s', %d, %s, %s)";
			
 
				-         $result = db_query($sql,$organism_id,$name,$uname,$residues,strlen($residues),
			
 
				-                     md5($residues),$cvterm->cvterm_id,'false','false');
			
 
				-         if(!$result){
			
 
				-            print "ERROR: failed to insert feature '$name ($uname)'\n";
			
 
				-            return 0;
			
 
				-         } else {
			
 
				-            print "Inserted feature $name ($uname)\n";
			
 
				-         }
			
 
				-      } 
			
 
				-      else {
			
 
				-         print "WARNING: failed to find feature '$name' ('$uname') while matching on " . strtolower($match_type) . ". Skipping\n";
			
 
				+   if(!$feature and (strcmp($method,'Insert only')==0 or strcmp($method,'Insert and update')==0)){
			
 
				+       // if we have a unique name but not a name then set them to be teh same 
			
 
				+       // and vice versa
			
 
				+       if(!$uname){
			
 
				+          $uname = $name;
			
 
				+       }
			
 
				+       elseif(!$name){
			
 
				+          $name = $uname;
			
 
				+       }
			
 
				+      // now insert the feature
			
 
				+      $sql = "INSERT INTO {feature} 
			
 
				+                 (organism_id, name, uniquename, residues, seqlen, 
			
 
				+                  md5checksum,type_id,is_analysis,is_obsolete)
			
 
				+              VALUES(%d,'%s','%s','%s',%d, '%s', %d, %s, %s)";
			
 
				+      $result = db_query($sql,$organism_id,$name,$uname,$residues,strlen($residues),
			
 
				+                  md5($residues),$cvterm->cvterm_id,'false','false');
			
 
				+      if(!$result){
			
 
				+         print "ERROR: failed to insert feature '$name ($uname)'\n";
			
 
				          return 0;
			
 
				+      } else {
			
 
				+         print "Inserted feature $name ($uname)\n";
			
 
				       }
			
 
				-   } else {
			
 
				+   } 
			
 
				+   if(!$feature and (strcmp($method,'Update only')==0 or strcmp($method,'Insert and update')==0)){
			
 
				+      print "WARNING: failed to find feature '$name' ('$uname') while matching on " . strtolower($match_type) . ". Skipping\n";
			
 
				+      return 0;
			
 
				+   }
			
 
				+
			
 
				+   if($feature and (strcmp($method,'Update only')==0 or strcmp($method,'Insert and update')==0)){
			
 
				        if(strcmp($method,'Update only')==0 or strcmp($method,'Insert and update')==0){
			
 
				          if(strcmp($match_type,'Name')==0){
			
 
				-            $sql = "UPDATE {feature} 
			
 
				-                     SET uniquename = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
			
 
				-                     WHERE organism_id = %d and name = '%s' and type_id = %d";
			
 
				-            $result = db_query($sql,$uname,$residues,strlen($residues),md5($residues),$organism_id,$name,$cvterm->cvterm_id);
			
 
				+            // if we're matching on the name but do not have a new unique name then we
			
 
				+            // don't want to update the uniquename.  If we do have a uniquename then we 
			
 
				+            // should update it.  We only get a uniquename if there was a regular expression
			
 
				+            // provided for pulling it out
			
 
				+            if($uname){
			
 
				+               $sql = "UPDATE {feature} 
			
 
				+                        SET uniquename = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
			
 
				+                        WHERE organism_id = %d and name = '%s' and type_id = %d";
			
 
				+               $result = db_query($sql,$uname,$residues,strlen($residues),md5($residues),$organism_id,$name,$cvterm->cvterm_id);
			
 
				+            } else {
			
 
				+               $sql = "UPDATE {feature} 
			
 
				+                        SET residues = '%s', seqlen = '%s', md5checksum = '%s'
			
 
				+                        WHERE organism_id = %d and name = '%s' and type_id = %d";
			
 
				+               $result = db_query($sql,$residues,strlen($residues),md5($residues),$organism_id,$name,$cvterm->cvterm_id);
			
 
				+            }
			
 
				          } else {
			
 
				-            $sql = "UPDATE {feature} 
			
 
				-                     SET name = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
			
 
				-                     WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
			
 
				-            $result = db_query($sql,$name,$residues,strlen($residues),md5($residues),$organism_id,$uname,$cvterm->cvterm_id);
			
 
				+            // if we're matching on the unique name but do not have a new name then we
			
 
				+            // don't want to update the name.  If we do have a name then we 
			
 
				+            // should update it.  We only get a name if there was a regular expression
			
 
				+            // provided for pulling it out
			
 
				+            if($name){
			
 
				+               $sql = "UPDATE {feature} 
			
 
				+                        SET name = '%s', residues = '%s', seqlen = '%s', md5checksum = '%s'
			
 
				+                        WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
			
 
				+               $result = db_query($sql,$name,$residues,strlen($residues),md5($residues),$organism_id,$uname,$cvterm->cvterm_id);
			
 
				+            } else {
			
 
				+               $sql = "UPDATE {feature} 
			
 
				+                        SET residues = '%s', seqlen = '%s', md5checksum = '%s'
			
 
				+                        WHERE organism_id = %d and uniquename = '%s' and type_id = %d";
			
 
				+               $result = db_query($sql,$residues,strlen($residues),md5($residues),$organism_id,$uname,$cvterm->cvterm_id);
			
 
				+            }
			
 
				          }
			
 
				          if(!$result){
			
 
				             print "ERROR: failed to update feature '$name ($uname)'\n";