root/bin/map_fasta_ids

Revision 270, 1.6 kB (checked in by bmoore, 2 months ago)

Modifications to some scripts by Barry

  • Property svn:executable set to *
Line 
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use Getopt::Long;
5
6 #-----------------------------------------------------------------------------
7 #----------------------------------- MAIN ------------------------------------
8 #-----------------------------------------------------------------------------
9 my $usage = "
10
11 Synopsis:
12
13 map_fasta_ids genome.all.id.map genome.all.transcripts.fasta
14
15 Description:
16
17 This script takes a id map file and changes the name of the ID in a
18 fasta file.  The map file is a two column tab delimited file with two
19 columns: old_name and new_name.  The ID in the fasta file must be the
20 first set of characters after the '>' and ending with a space.
21
22 ";
23
24
25 my ($help);
26 my $opt_success = GetOptions('help'    => \$help,
27                               );
28
29 die $usage if $help || ! $opt_success;
30
31 my ($map_file, $fasta_file) = @ARGV;
32 die $usage unless $map_file && $fasta_file;
33
34 # Read the map file and build a map hash;
35 open (my $MAP, '<', $map_file) or die "Can't open $map_file for reading\n$!\n";
36 my %map;
37 map {my ($old, $new) = split;$map{$old} = $new} (<$MAP>);
38 close $MAP;
39
40 # Open the fasta file for input unlink it to avoid clobbering it and open the
41 # same file for output.
42 open (my $IN, '<', $fasta_file) or die "Can't open $fasta_file for reading\n$!\n";
43 unlink($fasta_file);
44 open(my $OUT, '>', $fasta_file) or die "Can't open $fasta_file for writing\n$!\n";
45
46 # Just do it!
47 while (<$IN>) {
48         if  (/^>/) {
49                 my ($old_id) = $_ =~ /^>(\S+)/;
50                 if (exists $map{$old_id}) {
51                         my $new_id = $map{$old_id};
52                         s/^>$old_id/>$new_id/g;
53                 }
54                 else {
55                         print STDERR "WARNING:  No mapping available for $old_id\n";
56                 }
57         }
58         print $OUT $_;
59 }
Note: See TracBrowser for help on using the browser.