FastA.rename.pl 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. #!/usr/bin/env perl
  2. #
  3. # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
  4. # @update Oct-07-2015
  5. # @license artistic license 2.0
  6. #
  7. use warnings;
  8. use strict;
  9. use Getopt::Std;
  10. sub HELP_MESSAGE { die "
  11. .Description:
  12. Renames a set of sequences in FastA format.
  13. .Usage: $0 [options] list.txt seqs.fa > renamed.fa
  14. [options]
  15. -f Filter list. Ignores sequences NOT present in the list.
  16. -q Runs quietly.
  17. -h Prints this message and exits.
  18. [mandatory]
  19. list.txt Tab-delimited list of sequences, with the original ID in the
  20. first column and the ID to use in the second.
  21. seqs.fa FastA file containing the superset of sequences.
  22. renamed.fa FastA file to be created.
  23. " }
  24. my %o=();
  25. getopts('fhq', \%o);
  26. my($list, $fa) = @ARGV;
  27. ($list and $fa) or &HELP_MESSAGE;
  28. $o{h} and &HELP_MESSAGE;
  29. print STDERR "Reading list.\n" unless $o{q};
  30. open LI, "<", $list or die "Cannot read file: $list: $!\n";
  31. my %li = map { my $l=$_; chomp $l; my @r=split(/\t/,$l); $r[1] => $r[0] } <LI>;
  32. close LI;
  33. print STDERR "Renaming FastA.\n" unless $o{q};
  34. open FA, "<", $fa or die "Cannot read file: $fa: $!\n";
  35. my $good = 0;
  36. while(my $ln = <FA>){
  37. next if $ln =~ /^;/;
  38. chomp $ln;
  39. if($ln =~ m/^>((\S+).*)/){
  40. my $rep=0;
  41. $rep = ">".$li{$ln} if exists $li{$ln};
  42. $rep = ">".$li{$1} if exists $li{$1} and not $rep;
  43. $rep = ">".$li{">$1"} if exists $li{">$1"} and not $rep;
  44. $rep = ">".$li{$2} if exists $li{$2} and not $rep;
  45. if($rep){
  46. $ln = $rep;
  47. $good = 1;
  48. }
  49. }elsif($ln =~ m/^>/){
  50. $good=0;
  51. print STDERR "Warning: Non-cannonical defline, line $.: $ln\n";
  52. }
  53. print "$ln\n" if $good or not $o{f};
  54. }
  55. close FA;