|  | @@ -744,7 +744,11 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){
 | 
	
		
			
				|  |  |    $tsv = fopen($blast_tsv, "r") or die("Unable to open tsv file!");
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    // For each line in the TSV file...
 | 
	
		
			
				|  |  | -  $results = array();
 | 
	
		
			
				|  |  | +  // Need to go thru each line of tsv to find the first and last hsp of a hit.
 | 
	
		
			
				|  |  | +  $last_s = NULL;
 | 
	
		
			
				|  |  | +  $hsp = NULL;
 | 
	
		
			
				|  |  | +  $HitResult=array();
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  |    while(!feof($tsv)) {
 | 
	
		
			
				|  |  |      $line = fgets($tsv);
 | 
	
		
			
				|  |  |      $line = rtrim($line);
 | 
	
	
		
			
				|  | @@ -754,6 +758,8 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){
 | 
	
		
			
				|  |  |        continue;
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    ## for keeping track of new queries and hits
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      // Each line has the following parts:
 | 
	
		
			
				|  |  |      //  0: query id,
 | 
	
		
			
				|  |  |      //  1: subject id,
 | 
	
	
		
			
				|  | @@ -769,73 +775,89 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){
 | 
	
		
			
				|  |  |      // 11: bit score
 | 
	
		
			
				|  |  |      $parts = preg_split('/\t/', $line);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    // Assign the important parts of the time to readable variables.
 | 
	
		
			
				|  |  | -    $hitname = $parts[1];
 | 
	
		
			
				|  |  | -    $queryId = $parts[0];
 | 
	
		
			
				|  |  | -    $subjectStart = $parts[8];
 | 
	
		
			
				|  |  | -    $subjectEnd = $parts[9];
 | 
	
		
			
				|  |  | -    $queryStart = $part[6];
 | 
	
		
			
				|  |  | -    $queryEnd = $parts[7];
 | 
	
		
			
				|  |  | -    $eval = $parts[10];
 | 
	
		
			
				|  |  | -    $hspInfo = "$queryId,$subjectStart,$subjectEnd,$queryStart,$queryEnd";
 | 
	
		
			
				|  |  | -    $results[$hitname][$hspInfo] = $eval;
 | 
	
		
			
				|  |  | +    // Assign the important parts of the line to readable variables.
 | 
	
		
			
				|  |  | +    $s = $parts[1];
 | 
	
		
			
				|  |  | +    $q = $parts[0];
 | 
	
		
			
				|  |  | +    $ss = $parts[8];
 | 
	
		
			
				|  |  | +    $se = $parts[9];
 | 
	
		
			
				|  |  | +    $qs = $part[6];
 | 
	
		
			
				|  |  | +    $qe = $parts[7];
 | 
	
		
			
				|  |  | +    $e = $parts[10];
 | 
	
		
			
				|  |  | +     
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // if this is a new hit print the last and 
 | 
	
		
			
				|  |  | +    // empty the $HitResult array and
 | 
	
		
			
				|  |  | +    // reset hsp counter
 | 
	
		
			
				|  |  | +    if ($last_s != NULL and $s != $last_s ) {
 | 
	
		
			
				|  |  | +      printGFF_parent_children($gff,$HitResult);
 | 
	
		
			
				|  |  | +      $HitResult = array();
 | 
	
		
			
				|  |  | +      $hsp=0;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +   // every line is a new hsp
 | 
	
		
			
				|  |  | +    $hsp++;
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +    // determine query strand to use in match_part line, no need to store, just print
 | 
	
		
			
				|  |  | +    $q_strand = '+';
 | 
	
		
			
				|  |  | +    if ($qs > $qe) {
 | 
	
		
			
				|  |  | +        list($qs,$qe) = array($qe,$qs);
 | 
	
		
			
				|  |  | +        $q_strand = '-';
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    // determine subject (hit) strand to use in match line, needs to be stored
 | 
	
		
			
				|  |  | +    $HitResult["$s,$q"]['strand']='+';
 | 
	
		
			
				|  |  | +    list($start,$end) = array($ss,$se);
 | 
	
		
			
				|  |  | +    if($ss > $se) {
 | 
	
		
			
				|  |  | +       list($start,$end) = array($se,$ss);
 | 
	
		
			
				|  |  | +       $HitResult["$s,$q"]['strand']='-';
 | 
	
		
			
				|  |  | +     }
 | 
	
		
			
				|  |  | +  
 | 
	
		
			
				|  |  | +    // store smallest start
 | 
	
		
			
				|  |  | +     if (!array_key_exists('SS',$HitResult["$s,$q"]) or $ss < $HitResult["$s,$q"]['SS']) {
 | 
	
		
			
				|  |  | +       $HitResult["$s,$q"]['SS'] = $ss;
 | 
	
		
			
				|  |  | +     }
 | 
	
		
			
				|  |  | +    
 | 
	
		
			
				|  |  | +    // store largest end
 | 
	
		
			
				|  |  | +     if (!array_key_exists('SE',$HitResult["$s,$q"]) or $se > $HitResult["$s,$q"]['SE']) {
 | 
	
		
			
				|  |  | +       $HitResult["$s,$q"]['SE'] = $se;
 | 
	
		
			
				|  |  | +     }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +     // store best evalue
 | 
	
		
			
				|  |  | +     if (!array_key_exists('E',$HitResult["$s,$q"]) or $e < $HitResult["$s,$q"]['E']) {
 | 
	
		
			
				|  |  | +       $HitResult["$s,$q"]['E'] = $e;
 | 
	
		
			
				|  |  | +     }   
 | 
	
		
			
				|  |  | +    
 | 
	
		
			
				|  |  | +     // generate the match_part line for each hsp
 | 
	
		
			
				|  |  | +     $HitResult["$s,$q"]['HSPs'][] = join("\t", array($s, "BLASTRESULT" , "match_part" , $start , $end , $e , $HitResult["$s,$q"]['strand'] , '.' , "ID=$s.$q.$hsp;Parent=$s.$q;Target=$q $qs $qe $q_strand"));
 | 
	
		
			
				|  |  | +     $last_s = $s;
 | 
	
		
			
				|  |  |    } // end tsv file while
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  $IDs = array();
 | 
	
		
			
				|  |  | -  $count = 0;
 | 
	
		
			
				|  |  | -  $last_s = NULL;
 | 
	
		
			
				|  |  | +  // print hit and hsp for the last hit
 | 
	
		
			
				|  |  | +  printGFF_parent_children($gff,$HitResult);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    // Need to go thru each line of tsv to find the first and last hsp of a hit.
 | 
	
		
			
				|  |  | -    // Need to get the smallest and largest coordinate for the parent feature
 | 
	
		
			
				|  |  | -    foreach ($results as $s => $hspInfoArray) {
 | 
	
		
			
				|  |  | -      $count++;
 | 
	
		
			
				|  |  | -      $hsp = 0;
 | 
	
		
			
				|  |  | -      foreach ($hspInfoArray as $hspInfoStr => $e) {
 | 
	
		
			
				|  |  | -        list($q,$ss,$se,$qs,$qe) = preg_split('/,/',$hspInfoStr);
 | 
	
		
			
				|  |  | -        if ($s != NULL and  $s != $last_s ) {
 | 
	
		
			
				|  |  | -          $hsp=0;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        $IDs["$s,$q"]['count']=$count;
 | 
	
		
			
				|  |  | -        $q_strand = '+';
 | 
	
		
			
				|  |  | -        if ($qs > $qe) {
 | 
	
		
			
				|  |  | -          list($qs,$qe) = array($qe,$qs);
 | 
	
		
			
				|  |  | -          $q_strand = '-';
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        $IDs["$s,$q"]['strand']='+';
 | 
	
		
			
				|  |  | -        list($start,$end) = array($ss,$se);
 | 
	
		
			
				|  |  | -        if($ss > $se) {
 | 
	
		
			
				|  |  | -          list($start,$end) = array($se,$ss);
 | 
	
		
			
				|  |  | -          $IDs["$s,$q"]['strand']='-';
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        if (!array_key_exists('SS',$IDs["$s,$q"]) or $ss < $IDs["$s,$q"]['SS']) {
 | 
	
		
			
				|  |  | -          $IDs["$s,$q"]['SS'] = $ss;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        if (!array_key_exists('SE',$IDs["$s,$q"]) or $se > $IDs["$s,$q"]['SE']) {
 | 
	
		
			
				|  |  | -          $IDs["$s,$q"]['SE'] = $se;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        if (!array_key_exists('E',$IDs["$s,$q"]) or $e < $IDs["$s,$q"]['E']) {
 | 
	
		
			
				|  |  | -          $IDs["$s,$q"]['E'] = $e;
 | 
	
		
			
				|  |  | -        }
 | 
	
		
			
				|  |  | -        $hsp++;
 | 
	
		
			
				|  |  | -        $IDs["$s,$q"]['HSPs'][] = join("\t", array($s, "BLASTRESULT" , "match_part" , $start , $end , $e , $IDs["$s,$q"]['strand'] , '.' , "ID=$s.$count.$hsp;Parent=$s.$count;Target=$q $qs $qe $q_strand"));
 | 
	
		
			
				|  |  | -        $last_s = $s;
 | 
	
		
			
				|  |  | -      }
 | 
	
		
			
				|  |  | -    }
 | 
	
		
			
				|  |  | +  // Close the files.
 | 
	
		
			
				|  |  | +  fclose($tsv);
 | 
	
		
			
				|  |  | +  fclose($gff);
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -  // Now can print a parent gff line and all the children.
 | 
	
		
			
				|  |  | -  // Note: the evalues seem to be sorted properly without actually sorting them.
 | 
	
		
			
				|  |  | -  // @todo: need to make sure this is always true.
 | 
	
		
			
				|  |  | -  foreach ($IDs as $sq => $value ) {
 | 
	
		
			
				|  |  | +/**
 | 
	
		
			
				|  |  | + *   printGFF_parent_children
 | 
	
		
			
				|  |  | + *   prints the GFF parent feature and all of its children features
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + *  @param $blast_feature_array
 | 
	
		
			
				|  |  | + *  an array of the all the child features which is used to generate the smallest and largest coordinates for the parent
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + *
 | 
	
		
			
				|  |  | + */
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +function printGFF_parent_children ($gff,$blast_feature_array){
 | 
	
		
			
				|  |  | +  foreach ($blast_feature_array as $sq => $value ) {
 | 
	
		
			
				|  |  |      list ($s,$q) = preg_split('/,/' , $sq);
 | 
	
		
			
				|  |  | -    $evalue =  $IDs["$s,$q"]['E'];
 | 
	
		
			
				|  |  | -    $count = $IDs["$s,$q"]['count'];
 | 
	
		
			
				|  |  | -    $parent =  join ("\t", array($s, "BLASTRESULT" , "match" , $IDs["$s,$q"]['SS'] , $IDs["$s,$q"]['SE'] , $IDs["$s,$q"]['E'] , $IDs["$s,$q"]['strand'] , '.' , "ID=$s.$count;Name=$q($evalue)")) . "\n";
 | 
	
		
			
				|  |  | -    $child = join ("\n",$IDs[$sq]['HSPs']) . "\n";
 | 
	
		
			
				|  |  | +    $evalue =  $blast_feature_array["$s,$q"]['E'];
 | 
	
		
			
				|  |  | +    $parent =  join ("\t", array($s, "BLASTRESULT" , "match" , $blast_feature_array["$s,$q"]['SS'] , $blast_feature_array["$s,$q"]['SE'] , $blast_feature_array["$s,$q"]['E'] , $blast_feature_array["$s,$q"]['strand'] , '.' , "ID=$s.$q;Name=$q($evalue)")) . "\n";
 | 
	
		
			
				|  |  | +    $child = join ("\n",$blast_feature_array["$s,$q"]['HSPs']) . "\n";
 | 
	
		
			
				|  |  |      fwrite($gff,$parent);
 | 
	
		
			
				|  |  |      fwrite($gff,$child);
 | 
	
		
			
				|  |  |    }
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -  // Close the files.
 | 
	
		
			
				|  |  | -  fclose($tsv);
 | 
	
		
			
				|  |  | -  fclose($gff);
 | 
	
		
			
				|  |  |  }
 |