root/bin/maker

Revision 281, 7.6 kB (checked in by cholt, 2 weeks ago)

no longer thread dependant, fixed overhang bug, and evidence clustering bug

  • Property svn:executable set to *
Line 
1 #!/usr/bin/perl -w
2
3 eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
4     if 0; # not running under some shell
5
6 use strict "vars";
7 use strict "refs";
8
9 use FindBin;
10 use lib "$FindBin::Bin/../lib";
11 use lib "$FindBin::Bin/../perl/lib";
12 use vars qw($RANK $LOG $CMD_ARGS);
13
14 BEGIN{
15    if (not ($ENV{CGL_SO_SOURCE})) {
16       $ENV{CGL_SO_SOURCE} = "$FindBin::Bin/../lib/CGL/so.obo";
17    }
18    if (not ($ENV{CGL_GO_SOURCE})) {
19       $ENV{CGL_GO_SOURCE} = "$FindBin::Bin/../lib/CGL/gene_ontology.obo"
20    }
21    
22    $CMD_ARGS = join(' ', @ARGV);
23    
24    #what to do on ^C
25    $SIG{'INT'} = sub {
26       print STDERR "\n\nMaker aborted by user!!\n\n";
27       exit (1);
28    };   
29    
30    #supress warnings from storable module
31    $SIG{'__WARN__'} = sub {
32       warn $_[0] if ( $_[0] !~ /Not a CODE reference/ &&
33                       $_[0] !~ /Can\'t store item /
34                     );
35    };
36
37    #output to log file of seq that caused rank to die
38    $SIG{'__DIE__'} =
39    sub {
40       if (defined ($LOG) && defined $_[0]) {
41          my $die_count = $LOG->get_die_count();
42          $die_count++;
43          
44          $LOG->add_entry("DIED","RANK",$RANK);
45          $LOG->add_entry("DIED","COUNT",$die_count);
46       }
47
48       die "#----------------------\n",
49           "FATAL: failed!!\n",
50           "#----------------------\n",
51           $_[0] . "\n";
52    };
53 }
54
55 use Cwd;
56 use FileHandle;
57 use File::Path;
58 use Getopt::Long qw(:config no_ignore_case);
59 use File::Temp qw(tempfile tempdir);
60 #use Bio::DB::Fasta;
61 use GI;
62 use Dumper::GFF::GFFV3;
63 use Iterator::Any;
64 use Iterator::Fasta;
65 use Iterator::GFF3;
66 use Fasta;
67 use FastaChunker;
68 use maker::auto_annotator;
69 use cluster;
70 use repeat_mask_seq;
71 use runlog;
72 use ds_utility;
73 use GFFDB;
74 use Error qw(:try);
75 use Error::Simple;
76 use Process::MpiChunk;
77 use Process::MpiTiers;
78
79 $| = 1;
80
81 my $usage = "
82 Usage:
83
84      maker [options] <maker_opts> <maker_bopts> <maker_exe>
85
86      Maker is a program that produces gene annotations in GFF3 file format using
87      evidence such as EST alignments and protein homology.  Maker can be used to
88      produce gene annotations for new genomes as well as update annoations from
89      existing genome databases.
90
91      The four input arguments are user control files that specify how maker
92      should behave. The evaluator options file contains control options specific
93      for the evaluation of gene annotations. All options for maker should be set
94      in the control files, but a few can also be set on the command line.
95      Command line options provide a convenient machanism to override commonly
96      altered control file values.
97
98      Input files listed in the control options files must be in fasta format.
99      Please see maker documentation to learn more about control file
100      configuration.  Maker will automatically try and locate the user control
101      files in the current working directory if these arguments are not supplied
102      when initializing maker.
103
104      It is important to note that maker does not try and recalculated data that
105      it has already calculated.  For example, if you run an analysis twice on
106      the same dataset file you will notice that maker does not rerun any of the
107      blast analyses, but instead uses the blast analyses stored from the
108      previous run.  To force maker to rerun all analyses, use the -f flag.
109
110
111 Options:
112
113      -genome|g <filename> Specify the genome file.
114
115      -predictor|p <type>  Selects the predictor(s) to use when building
116                           annotations.  Defines a pool of gene models for
117                           annotation selection.
118
119                           types: snap
120                                  augustus
121                                  fgenesh
122                                  genemark
123                                  est2genome (Uses EST's directly)
124                                  model_gff (Pass through GFF3 annotations)
125                                  pred_gff (Uses passed through GFF3 predictions)
126
127                           Use a ',' to seperate types (nospaces)
128                           i.e. -predictor=snap,augustus,fgenesh
129
130
131      -RM_off|R           Turns all repeat masking off.
132
133      -retry|r <integer>  Rerun failed contigs up to the specified count.
134
135      -cpus|c  <integer>  Tells how many cpus to use for BLAST analysis.
136
137      -force|f            Forces maker to delete old files before running again.
138                          This will require all blast analyses to be re-run.
139
140      -again|a            Caculate all annotations and output files again even if
141                          no settings have changed.
142
143      -evaluate|e         Run Evaluator on final annotations (under development).
144
145      -quiet|q            Silences most of maker's status messages.
146
147      -CTL                Generate empty control files in the current directory.
148
149      -help|?             Prints this usage statement.
150
151
152 ";
153
154 #-------------------------------------------------------------------------------
155 #------------------------------------ MAIN -------------------------------------
156 #-------------------------------------------------------------------------------
157
158 #---global variables
159 my %OPT;
160 my $rank = 0;
161 my $size = 1;
162 $RANK = $rank;
163
164 #---Process options on the command line
165 try{
166     GetOptions("RM_off|R" => \$OPT{R},
167                "force|f" => \$OPT{force},
168                "genome|g=s" => \$OPT{genome},
169                "cpus|c=i" => \$OPT{cpus},
170                "predictor=s" =>\$OPT{predictor},
171                "retry=i" =>\$OPT{retry},
172                "evaluate" =>\$OPT{evaluate},
173                "again|a" =>\$OPT{again},
174                "quiet" =>\$main::quiet,
175                "CTL" => sub {GI::generate_control_files(); exit(0);},
176                "help|?" => sub {print $usage; exit(0)}
177                );
178 }
179 catch Error::Simple with{
180     my $E = shift;
181    
182     print STDERR $E->{-text};
183     die "\n\nMaker failed parsing command line options!!\n\n";
184 };
185
186 #varibles that are persistent outside of try
187 my %CTL_OPT;
188 my $iterator;
189 my $DS_CTL;
190 my $GFF_DB;
191 my $build;
192 my @failed;
193
194 try{
195     #get arguments off the command line
196     my @ctlfiles = @ARGV;
197    
198     if (not @ctlfiles) {
199         if (-e "maker_opts.ctl" &&
200             -e "maker_bopts.ctl" &&
201             -e "maker_exe.ctl"
202             ) {
203            
204             @ctlfiles = ("maker_opts.ctl",
205                          "maker_bopts.ctl",
206                          "maker_exe.ctl"
207                          );
208         }
209         else {
210             print STDERR  "ERROR: Control files not found\n";
211             print $usage;
212             exit(0);
213         }
214     }
215    
216     #--Control file processing
217     
218     #set up control options from control files
219     %CTL_OPT = GI::load_control_files(\@ctlfiles, \%OPT, $size);
220    
221     #--open datastructure controller
222     $DS_CTL = ds_utility->new(\%CTL_OPT);
223    
224     #--set up gff database
225     $GFF_DB = new GFFDB(\%CTL_OPT);
226     $build = $GFF_DB->next_build;
227    
228     #---load genome multifasta/GFF3 file
229     $iterator = new Iterator::Any( -fasta => $CTL_OPT{'genome'},
230                                    -gff => $CTL_OPT{'genome_gff'},
231                                    );
232 }
233 catch Error::Simple with{
234     my $E = shift;
235     print STDERR $E->{-text};
236     print STDERR "\n\nMaker failed while examining startup data\n",
237     "(control files and input fasta files)!!\n\n";
238     my $code = 2;
239     $code = $E->{-value} if (defined($E->{-value}));
240    
241     exit($code);
242 };
243
244 my $tier;
245 while (my $fasta = $iterator->nextFasta() || shift @failed){
246     $tier = Process::MpiTiers->new({fasta =>$fasta,
247                                     CTL_OPT => \%CTL_OPT,
248                                     DS_CTL  => $DS_CTL,
249                                     GFF_DB  => $GFF_DB,
250                                     build   => $build},
251                                    '0',
252                                    'Process::MpiChunk'
253                                    );
254
255     next if($tier->terminated);
256     $tier->run_all;
257     $DS_CTL->add_entry($tier->DS);
258     push(@failed, $tier->fasta) if ($tier->failed);
259 }
260
261 print STDERR "\n\nMaker is now finished!!!\n\n";
262
263 #-----------------------------------------------------------------------------
264 #----------------------------------- SUBS ------------------------------------
265 #-----------------------------------------------------------------------------
266
Note: See TracBrowser for help on using the browser.