root/bin/gff3_merge

Revision 279, 2.6 kB (checked in by bmoore, 2 weeks ago)

Carsons lib/maker/auto_annotator.pm update

  • Property svn:executable set to *
Line 
1 #! /usr/bin/perl -w
2
3 use strict;
4 use Getopt::Long;
5 use File::Temp qw(tempfile);
6
7 my $usage = "
8
9 Synopsis:
10
11 gff3_merge -d maker_datastore_index.log
12 gff3_merge -o genome.all.gff <gff3_file1> <gff3_file2> ...
13
14 Descriptions:
15
16 This script will take a MAKER datastore index log file, extract all
17 the relevant GFF3 files and combined GFF3 file.  The script can also
18 combine other correctly formated GFF3 files.  For this to work
19 properly you need to be in the same directory as the datastore index.
20
21 Options:
22
23   -d The location of the MAKER datastore index log file.
24   -o Alternate base name for the output files.
25
26 ";
27
28 my $datastore;
29 my @files;
30 my $outfile;
31
32 GetOptions ("datastor|d=s" => \$datastore,
33             "o=s" => \$outfile,
34             "help|?" => sub{print $usage; exit();}
35             );
36
37 if(! $datastore){
38     @files = @ARGV;
39 }
40
41 if(! $datastore && ! @files) {
42     print $usage;
43     exit();
44 }
45
46 if (! $outfile){
47     ($outfile) = $datastore =~ /([^\/]+)$/;
48     $outfile =~ s/_master_datastore_index.log//;
49     $outfile .= ".gff";
50     $outfile = "genome.all.gff" if(! defined $outfile);
51 }
52
53 die "ERROR: The file \'$datastore\' does not exist\n" if ($datastore && ! -e $datastore);
54 if ($datastore){
55     open(IN, "< $datastore");
56
57     #uniq the entries
58     my %seen;
59     while(my $e = <IN>){
60         next unless ($e =~ /FINISHED/);
61         next if $seen{$e};
62         $seen{$e}++;
63         chomp $e;
64         my ($id, $dir, $status) = split("\t", $e);
65         $dir =~ s/\/$//;
66         push(@files, $dir);
67     }
68
69     foreach my $file (@files){
70         $file =~ /([^\/]+)$/;
71         $file = "$file/$1.gff";
72     }
73 }
74
75 @files = sort @files;
76
77 open(my $GFF, "> $outfile");
78 print $GFF "\#\#gff-version 3\n";
79
80 my ($ANN, $ann_file) = tempfile();
81 my ($FAS, $fas_file) = tempfile();
82 print $FAS "\#\#FASTA\n";
83
84 my %uniq;
85 foreach my $file (@files){
86     die "ERROR: The file \'$file\' does not exist\n" if (! -e $file);
87     open(IN, "< $file") || die "ERROR: Could not open file \'$file\'\n";
88
89     my $FH = $ANN;
90
91     while (defined(my $line = <IN>)){
92         next if ($line =~ /^\#\#gff-version 3/);
93         if($line =~ /^\#\#genome-build/){
94             next if exists $uniq{$line};
95             $uniq{$line}++;
96             print $GFF $line;
97             next;
98         }
99         if($line =~ /^\#\#sequence-region/){
100             die "ERROR: This contig has already been added\: $line\n" if exists $uniq{$line};
101             $uniq{$line}++;
102             print $GFF $line;
103             next;
104         }
105         if ($line =~ /^\#\#FASTA/){
106             $FH = $FAS;
107             next;
108         }
109         if ($line =~ /^>/){
110             $FH = $FAS;
111         }
112         chomp $line;
113         print $FH $line . "\n" if($line);
114     }
115 }
116 close($GFF);
117 close($ANN);
118 close($FAS);
119
120 system ("cat $ann_file >> $outfile");
121 system ("cat $fas_file >> $outfile");
122 unlink("$ann_file");
123 unlink("$fas_file");
Note: See TracBrowser for help on using the browser.