| 
					
				 | 
			
			
				@@ -744,7 +744,11 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   $tsv = fopen($blast_tsv, "r") or die("Unable to open tsv file!"); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   // For each line in the TSV file... 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $results = array(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  // Need to go thru each line of tsv to find the first and last hsp of a hit. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  $last_s = NULL; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  $hsp = NULL; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  $HitResult=array(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   while(!feof($tsv)) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     $line = fgets($tsv); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     $line = rtrim($line); 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -754,6 +758,8 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				       continue; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ## for keeping track of new queries and hits 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     // Each line has the following parts: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     //  0: query id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     //  1: subject id, 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -769,73 +775,89 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     // 11: bit score 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     $parts = preg_split('/\t/', $line); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    // Assign the important parts of the time to readable variables. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $hitname = $parts[1]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $queryId = $parts[0]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $subjectStart = $parts[8]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $subjectEnd = $parts[9]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $queryStart = $part[6]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $queryEnd = $parts[7]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $eval = $parts[10]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $hspInfo = "$queryId,$subjectStart,$subjectEnd,$queryStart,$queryEnd"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $results[$hitname][$hspInfo] = $eval; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // Assign the important parts of the line to readable variables. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $s = $parts[1]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $q = $parts[0]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $ss = $parts[8]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $se = $parts[9]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $qs = $part[6]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $qe = $parts[7]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $e = $parts[10]; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // if this is a new hit print the last and  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // empty the $HitResult array and 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // reset hsp counter 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if ($last_s != NULL and $s != $last_s ) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      printGFF_parent_children($gff,$HitResult); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      $HitResult = array(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+      $hsp=0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   // every line is a new hsp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $hsp++; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // determine query strand to use in match_part line, no need to store, just print 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $q_strand = '+'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if ($qs > $qe) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        list($qs,$qe) = array($qe,$qs); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        $q_strand = '-'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // determine subject (hit) strand to use in match line, needs to be stored 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $HitResult["$s,$q"]['strand']='+'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    list($start,$end) = array($ss,$se); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if($ss > $se) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       list($start,$end) = array($se,$ss); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       $HitResult["$s,$q"]['strand']='-'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // store smallest start 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     if (!array_key_exists('SS',$HitResult["$s,$q"]) or $ss < $HitResult["$s,$q"]['SS']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       $HitResult["$s,$q"]['SS'] = $ss; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    // store largest end 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     if (!array_key_exists('SE',$HitResult["$s,$q"]) or $se > $HitResult["$s,$q"]['SE']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       $HitResult["$s,$q"]['SE'] = $se; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     // store best evalue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     if (!array_key_exists('E',$HitResult["$s,$q"]) or $e < $HitResult["$s,$q"]['E']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       $HitResult["$s,$q"]['E'] = $e; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     }    
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     // generate the match_part line for each hsp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     $HitResult["$s,$q"]['HSPs'][] = join("\t", array($s, "BLASTRESULT" , "match_part" , $start , $end , $e , $HitResult["$s,$q"]['strand'] , '.' , "ID=$s.$q.$hsp;Parent=$s.$q;Target=$q $qs $qe $q_strand")); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     $last_s = $s; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   } // end tsv file while 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $IDs = array(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $count = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  $last_s = NULL; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  // print hit and hsp for the last hit 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  printGFF_parent_children($gff,$HitResult); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    // Need to go thru each line of tsv to find the first and last hsp of a hit. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    // Need to get the smallest and largest coordinate for the parent feature 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    foreach ($results as $s => $hspInfoArray) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      $count++; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      $hsp = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      foreach ($hspInfoArray as $hspInfoStr => $e) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        list($q,$ss,$se,$qs,$qe) = preg_split('/,/',$hspInfoStr); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if ($s != NULL and  $s != $last_s ) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          $hsp=0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $IDs["$s,$q"]['count']=$count; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $q_strand = '+'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if ($qs > $qe) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          list($qs,$qe) = array($qe,$qs); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          $q_strand = '-'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $IDs["$s,$q"]['strand']='+'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        list($start,$end) = array($ss,$se); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if($ss > $se) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          list($start,$end) = array($se,$ss); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          $IDs["$s,$q"]['strand']='-'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if (!array_key_exists('SS',$IDs["$s,$q"]) or $ss < $IDs["$s,$q"]['SS']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          $IDs["$s,$q"]['SS'] = $ss; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if (!array_key_exists('SE',$IDs["$s,$q"]) or $se > $IDs["$s,$q"]['SE']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          $IDs["$s,$q"]['SE'] = $se; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if (!array_key_exists('E',$IDs["$s,$q"]) or $e < $IDs["$s,$q"]['E']) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-          $IDs["$s,$q"]['E'] = $e; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $hsp++; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $IDs["$s,$q"]['HSPs'][] = join("\t", array($s, "BLASTRESULT" , "match_part" , $start , $end , $e , $IDs["$s,$q"]['strand'] , '.' , "ID=$s.$count.$hsp;Parent=$s.$count;Target=$q $qs $qe $q_strand")); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        $last_s = $s; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-      } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  // Close the files. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  fclose($tsv); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  fclose($gff); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // Now can print a parent gff line and all the children. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // Note: the evalues seem to be sorted properly without actually sorting them. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // @todo: need to make sure this is always true. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  foreach ($IDs as $sq => $value ) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+/** 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *   printGFF_parent_children 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *   prints the GFF parent feature and all of its children features 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *  @param $blast_feature_array 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ *  an array of the all the child features which is used to generate the smallest and largest coordinates for the parent 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ * 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ */ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+function printGFF_parent_children ($gff,$blast_feature_array){ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  foreach ($blast_feature_array as $sq => $value ) { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     list ($s,$q) = preg_split('/,/' , $sq); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $evalue =  $IDs["$s,$q"]['E']; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $count = $IDs["$s,$q"]['count']; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $parent =  join ("\t", array($s, "BLASTRESULT" , "match" , $IDs["$s,$q"]['SS'] , $IDs["$s,$q"]['SE'] , $IDs["$s,$q"]['E'] , $IDs["$s,$q"]['strand'] , '.' , "ID=$s.$count;Name=$q($evalue)")) . "\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    $child = join ("\n",$IDs[$sq]['HSPs']) . "\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $evalue =  $blast_feature_array["$s,$q"]['E']; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $parent =  join ("\t", array($s, "BLASTRESULT" , "match" , $blast_feature_array["$s,$q"]['SS'] , $blast_feature_array["$s,$q"]['SE'] , $blast_feature_array["$s,$q"]['E'] , $blast_feature_array["$s,$q"]['strand'] , '.' , "ID=$s.$q;Name=$q($evalue)")) . "\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    $child = join ("\n",$blast_feature_array["$s,$q"]['HSPs']) . "\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     fwrite($gff,$parent); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     fwrite($gff,$child); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  // Close the files. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  fclose($tsv); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-  fclose($gff); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 } 
			 |