BED_utils.pm 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. package BED_utils;
  2. use strict;
  3. use warnings;
  4. use Carp;
  5. use Gene_obj;
  6. sub index_BED_as_gene_objs {
  7. my ($gff_filename, $gene_id_to_gene_obj_href) = @_;
  8. my %contig_to_gene_list;
  9. open (my $fh, $gff_filename) or die "Error, cannot open file $gff_filename";
  10. while (<$fh>) {
  11. if (/^\#/) { next; }
  12. chomp;
  13. unless (/\w/) { next; }
  14. my $bed_line = $_;
  15. my $gene_obj;
  16. eval {
  17. $gene_obj = &Gene_obj::BED_line_to_gene_obj($bed_line);
  18. my @introns = $gene_obj->get_intron_coordinates(); # this method breaks if all exons are single bases. Ignore these weird things.
  19. };
  20. if ($@) {
  21. print STDERR "ERROR, cannot create gene for bed line:\n$bed_line\n$@\n";
  22. next;
  23. }
  24. my $gene_id = $gene_obj->{TU_feat_name};
  25. my $indexed_gene_obj = $gene_id_to_gene_obj_href->{$gene_id};
  26. if ($indexed_gene_obj) {
  27. $indexed_gene_obj->add_isoform($gene_obj);
  28. }
  29. else {
  30. $gene_id_to_gene_obj_href->{$gene_id} = $gene_obj;
  31. my $contig = $gene_obj->{asmbl_id};
  32. push (@{$contig_to_gene_list{$contig}}, $gene_id);
  33. }
  34. }
  35. close $fh;
  36. return(\%contig_to_gene_list);
  37. }
  38. 1; #EOM