Просмотр исходного кода

removed one loop added added printGFF_parent_children function

Robb, Sofia 8 лет назад
Родитель
Сommit
39eaa1aa5e
1 измененных файлов с 31 добавлено и 28 удалено
  1. 31 28
      api/blast_ui.api.inc

+ 31 - 28
api/blast_ui.api.inc

@@ -712,6 +712,7 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){
   $tsv = fopen($blast_tsv, "r") or die("Unable to open tsv file!");
 
   // For each line in the TSV file...
+  // Need to go thru each line of tsv to find the first and last hsp of a hit.
   $results = array();
   while(!feof($tsv)) {
     $line = fgets($tsv);
@@ -739,26 +740,30 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){
 
     // Assign the important parts of the time to readable variables.
     $hitname = $parts[1];
-    $hspInfo = "$parts[0],$parts[8],$parts[9],$parts[6],$parts[7]";
+    $queryId = $parts[0];
+    $subjectStart = $parts[8];
+    $subjectEnd = $parts[9];
+    $queryStart = $part[6];
+    $queryEnd = $parts[7];
     $eval = $parts[10];
+    $hspInfo = "$queryId,$subjectStart,$subjectEnd,$queryStart,$queryEnd";
     $results[$hitname][$hspInfo] = $eval;
-
-    $IDs = array();
-    $count = 0;
-    $last_q = NULL;
-
-    // Need to go thru each line of tsv to find the first and last hsp of a hit.
-    // Need to get the smallest and largest coordinate for the parent feature
+  } // end tsv file while
+  $IDs = array();
+  $count = 0;
+  $last_s = NULL;
+   
+  // Need to get the smallest and largest coordinate for the parent feature
     foreach ($results as $s => $hspInfoArray) {
+      $count++;
       $hsp = 0;
       foreach ($hspInfoArray as $hspInfoStr => $e) {
         list($q,$ss,$se,$qs,$qe) = preg_split('/,/',$hspInfoStr);
-        if (!$last_q or  $q != $last_q) {
-          $count++;
-          $hsp=0;
+        if ($s != NULL and $s != $last_s) {
           printGFF_parent_children($gff,$IDs);
-          $IDs = array();
+         $IDs = array();
         }
+        $IDs["$s,$q"]['count']=$count;
         $q_strand = '+';
         if ($qs > $qe) {
           list($qs,$qe) = array($qe,$qs);
@@ -781,37 +786,35 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){
         }
         $hsp++;
         $IDs["$s,$q"]['HSPs'][] = join("\t", array($s, "BLASTRESULT" , "match_part" , $start , $end , $e , $IDs["$s,$q"]['strand'] , '.' , "ID=$s.$count.$hsp;Parent=$s.$count;Target=$q $qs $qe $q_strand"));
-        $last_q = $q;
+        $last_s = $s;
       }
     }
     printGFF_parent_children($gff,$IDs);
-  }
-
-  // Now can print a parent gff line and all the children.
-  // Note: the evalues seem to be sorted properly without actually sorting them.
-  // @todo: need to make sure this is always true.
-//  foreach ($IDs as $sq => $value ) {
-//    list ($s,$q) = preg_split('/,/' , $sq);
-//    $evalue =  $IDs["$s,$q"]['E'];
-//    $parent =  join ("\t", array($s, "BLASTRESULT" , "match" , $IDs["$s,$q"]['SS'] , $IDs["$s,$q"]['SE'] , $IDs["$s,$q"]['E'] , $IDs["$s,$q"]['strand'] , '.' , "ID=$s.$count;Name=$q($evalue)")) . "\n";
-//    $child = join ("\n",$IDs[$sq]['HSPs']) . "\n";
-//    fwrite($gff,$parent);
-//    fwrite($gff,$child);
-//  }
 
   // Close the files.
   fclose($tsv);
   fclose($gff);
 }
 
+/**
+ *   printGFF_parent_children
+ *   prints the GFF parent feature and all of its children features
+ *
+ *
+ *  @param $blast_feature_array
+ *  an array of the all the child features which is used to generate the smallest and largest coordinates for the parent
+ *
+ *
+ */
+
 function printGFF_parent_children ($gff,$blast_feature_array){
   foreach ($blast_feature_array as $sq => $value ) {
     list ($s,$q) = preg_split('/,/' , $sq);
+    $id_count = $blast_feature_array["$s,$q"]['count'];
     $evalue =  $blast_feature_array["$s,$q"]['E'];
-    $parent =  join ("\t", array($s, "BLASTRESULT" , "match" , $blast_feature_array["$s,$q"]['SS'] , $blast_feature_array["$s,$q"]['SE'] , $blast_feature_array["$s,$q"]['E'] , $blast_feature_array["$s,$q"]['strand'] , '.' , "ID=$s.$count;Name=$q($evalue)")) . "\n";
+    $parent =  join ("\t", array($s, "BLASTRESULT" , "match" , $blast_feature_array["$s,$q"]['SS'] , $blast_feature_array["$s,$q"]['SE'] , $blast_feature_array["$s,$q"]['E'] , $blast_feature_array["$s,$q"]['strand'] , '.' , "ID=$s.$id_count;Name=$q($evalue)")) . "\n";
     $child = join ("\n",$blast_feature_array[$sq]['HSPs']) . "\n";
     fwrite($gff,$parent);
     fwrite($gff,$child);
   }
-
 }