|
@@ -712,7 +712,6 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){
|
|
|
$tsv = fopen($blast_tsv, "r") or die("Unable to open tsv file!");
|
|
|
|
|
|
// For each line in the TSV file...
|
|
|
- // Need to go thru each line of tsv to find the first and last hsp of a hit.
|
|
|
$results = array();
|
|
|
while(!feof($tsv)) {
|
|
|
$line = fgets($tsv);
|
|
@@ -749,19 +748,20 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){
|
|
|
$hspInfo = "$queryId,$subjectStart,$subjectEnd,$queryStart,$queryEnd";
|
|
|
$results[$hitname][$hspInfo] = $eval;
|
|
|
} // end tsv file while
|
|
|
+
|
|
|
$IDs = array();
|
|
|
$count = 0;
|
|
|
$last_s = NULL;
|
|
|
-
|
|
|
- // Need to get the smallest and largest coordinate for the parent feature
|
|
|
+
|
|
|
+ // Need to go thru each line of tsv to find the first and last hsp of a hit.
|
|
|
+ // Need to get the smallest and largest coordinate for the parent feature
|
|
|
foreach ($results as $s => $hspInfoArray) {
|
|
|
$count++;
|
|
|
$hsp = 0;
|
|
|
foreach ($hspInfoArray as $hspInfoStr => $e) {
|
|
|
list($q,$ss,$se,$qs,$qe) = preg_split('/,/',$hspInfoStr);
|
|
|
- if ($s != NULL and $s != $last_s) {
|
|
|
- printGFF_parent_children($gff,$IDs);
|
|
|
- $IDs = array();
|
|
|
+ if ($s != NULL and $s != $last_s ) {
|
|
|
+ $hsp=0;
|
|
|
}
|
|
|
$IDs["$s,$q"]['count']=$count;
|
|
|
$q_strand = '+';
|
|
@@ -789,32 +789,21 @@ function convert_tsv2gff3($blast_tsv,$blast_gff){
|
|
|
$last_s = $s;
|
|
|
}
|
|
|
}
|
|
|
- printGFF_parent_children($gff,$IDs);
|
|
|
-
|
|
|
- // Close the files.
|
|
|
- fclose($tsv);
|
|
|
- fclose($gff);
|
|
|
-}
|
|
|
-
|
|
|
-/**
|
|
|
- * printGFF_parent_children
|
|
|
- * prints the GFF parent feature and all of its children features
|
|
|
- *
|
|
|
- *
|
|
|
- * @param $blast_feature_array
|
|
|
- * an array of the all the child features which is used to generate the smallest and largest coordinates for the parent
|
|
|
- *
|
|
|
- *
|
|
|
- */
|
|
|
|
|
|
-function printGFF_parent_children ($gff,$blast_feature_array){
|
|
|
- foreach ($blast_feature_array as $sq => $value ) {
|
|
|
+ // Now can print a parent gff line and all the children.
|
|
|
+ // Note: the evalues seem to be sorted properly without actually sorting them.
|
|
|
+ // @todo: need to make sure this is always true.
|
|
|
+ foreach ($IDs as $sq => $value ) {
|
|
|
list ($s,$q) = preg_split('/,/' , $sq);
|
|
|
- $id_count = $blast_feature_array["$s,$q"]['count'];
|
|
|
- $evalue = $blast_feature_array["$s,$q"]['E'];
|
|
|
- $parent = join ("\t", array($s, "BLASTRESULT" , "match" , $blast_feature_array["$s,$q"]['SS'] , $blast_feature_array["$s,$q"]['SE'] , $blast_feature_array["$s,$q"]['E'] , $blast_feature_array["$s,$q"]['strand'] , '.' , "ID=$s.$id_count;Name=$q($evalue)")) . "\n";
|
|
|
- $child = join ("\n",$blast_feature_array[$sq]['HSPs']) . "\n";
|
|
|
+ $evalue = $IDs["$s,$q"]['E'];
|
|
|
+ $count = $IDs["$s,$q"]['count'];
|
|
|
+ $parent = join ("\t", array($s, "BLASTRESULT" , "match" , $IDs["$s,$q"]['SS'] , $IDs["$s,$q"]['SE'] , $IDs["$s,$q"]['E'] , $IDs["$s,$q"]['strand'] , '.' , "ID=$s.$count;Name=$q($evalue)")) . "\n";
|
|
|
+ $child = join ("\n",$IDs[$sq]['HSPs']) . "\n";
|
|
|
fwrite($gff,$parent);
|
|
|
fwrite($gff,$child);
|
|
|
}
|
|
|
+
|
|
|
+ // Close the files.
|
|
|
+ fclose($tsv);
|
|
|
+ fclose($gff);
|
|
|
}
|