root/bin/map_data_ids

Revision 270, 2.0 kB (checked in by bmoore, 2 months ago)

Modifications to some scripts by Barry

  • Property svn:executable set to *
Line 
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use Getopt::Long;
5
6 #-----------------------------------------------------------------------------
7 #----------------------------------- MAIN ------------------------------------
8 #-----------------------------------------------------------------------------
9 my $usage = "
10
11 Synopsis:
12
13 map_data_ids genome.all.id.map data.txt
14
15 Description:
16
17 This script takes a id map file and changes the name of the ID in a
18 data file.  The map file is a two column tab delimited file with two
19 columns: old_name and new_name.  The data file is assumed to be
20 tab delimited by default, but this can be altered with the delimit
21 option.  The ID in the data file can be in any column and is specified
22 by the col option which defaults to the first column.
23
24 Options:
25
26   col      The column number (1 based) in the data file that
27            corresponds to old_name in the mapping file.
28   delimit  The delimeter for the data file.
29
30 ";
31
32
33 my ($help, $col, $delimit);
34 my $opt_success = GetOptions('help'      => \$help,
35                              'col=i'     => \$col,
36                              'delimit=s' => \$delimit,
37                               );
38
39 die $usage if $help || ! $opt_success;
40
41 $col ||= 1;
42 $col--;
43 $delimit ||= "\t";
44
45 my ($map_file, $data_file) = @ARGV;
46 die $usage unless $map_file && $data_file;
47
48 # Read the map file and build a map hash;
49 open (my $MAP, '<', $map_file) or die "Can't open $map_file for reading\n$!\n";
50 my %map;
51 map {my ($old, $new) = split;$map{$old} = $new} (<$MAP>);
52 close $MAP;
53
54 # Open the data file for input unlink it to avoid clobbering it and open the
55 # same file for output.
56 open (my $IN, '<', $data_file) or die "Can't open $data_file for reading\n$!\n";
57 unlink($data_file);
58 open(my $OUT, '>', $data_file) or die "Can't open $data_file for writing\n$!\n";
59
60 # Just do it!
61 while (<$IN>) {
62         chomp;
63         my @fields = split /$delimit/, $_;
64         if ($map{$fields[$col]}) {
65                 $fields[$col] = $map{$fields[$col]};
66         }
67         else {
68                 print STDERR "WARNING: No mapping available for $fields[$col]\n";
69         }
70         print $OUT join $delimit, @fields;
71         print $OUT "\n";
72 }
Note: See TracBrowser for help on using the browser.