| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}' |
|---|
| 4 |
if 0; |
|---|
| 5 |
|
|---|
| 6 |
use strict "vars"; |
|---|
| 7 |
use strict "refs"; |
|---|
| 8 |
|
|---|
| 9 |
use FindBin; |
|---|
| 10 |
use lib "$FindBin::Bin/../lib"; |
|---|
| 11 |
use lib "$FindBin::Bin/../perl/lib"; |
|---|
| 12 |
use vars qw($RANK $LOG $CMD_ARGS); |
|---|
| 13 |
|
|---|
| 14 |
BEGIN{ |
|---|
| 15 |
if (not ($ENV{CGL_SO_SOURCE})) { |
|---|
| 16 |
$ENV{CGL_SO_SOURCE} = "$FindBin::Bin/../lib/CGL/so.obo"; |
|---|
| 17 |
} |
|---|
| 18 |
if (not ($ENV{CGL_GO_SOURCE})) { |
|---|
| 19 |
$ENV{CGL_GO_SOURCE} = "$FindBin::Bin/../lib/CGL/gene_ontology.obo" |
|---|
| 20 |
} |
|---|
| 21 |
|
|---|
| 22 |
$CMD_ARGS = join(' ', @ARGV); |
|---|
| 23 |
|
|---|
| 24 |
|
|---|
| 25 |
$SIG{'INT'} = sub { |
|---|
| 26 |
print STDERR "\n\nMaker aborted by user!!\n\n"; |
|---|
| 27 |
exit (1); |
|---|
| 28 |
}; |
|---|
| 29 |
|
|---|
| 30 |
|
|---|
| 31 |
$SIG{'__WARN__'} = sub { |
|---|
| 32 |
warn $_[0] if ( $_[0] !~ /Not a CODE reference/ && |
|---|
| 33 |
$_[0] !~ /Can\'t store item / |
|---|
| 34 |
); |
|---|
| 35 |
}; |
|---|
| 36 |
|
|---|
| 37 |
|
|---|
| 38 |
$SIG{'__DIE__'} = |
|---|
| 39 |
sub { |
|---|
| 40 |
if (defined ($LOG) && defined $_[0]) { |
|---|
| 41 |
my $die_count = $LOG->get_die_count(); |
|---|
| 42 |
$die_count++; |
|---|
| 43 |
|
|---|
| 44 |
$LOG->add_entry("DIED","RANK",$RANK); |
|---|
| 45 |
$LOG->add_entry("DIED","COUNT",$die_count); |
|---|
| 46 |
} |
|---|
| 47 |
|
|---|
| 48 |
die "#----------------------\n", |
|---|
| 49 |
"FATAL: failed!!\n", |
|---|
| 50 |
"#----------------------\n", |
|---|
| 51 |
$_[0] . "\n"; |
|---|
| 52 |
}; |
|---|
| 53 |
} |
|---|
| 54 |
|
|---|
| 55 |
use Cwd; |
|---|
| 56 |
use FileHandle; |
|---|
| 57 |
use File::Path; |
|---|
| 58 |
use Getopt::Long qw(:config no_ignore_case); |
|---|
| 59 |
use File::Temp qw(tempfile tempdir); |
|---|
| 60 |
|
|---|
| 61 |
use GI; |
|---|
| 62 |
use Dumper::GFF::GFFV3; |
|---|
| 63 |
use Iterator::Any; |
|---|
| 64 |
use Iterator::Fasta; |
|---|
| 65 |
use Iterator::GFF3; |
|---|
| 66 |
use Fasta; |
|---|
| 67 |
use FastaChunker; |
|---|
| 68 |
use maker::auto_annotator; |
|---|
| 69 |
use cluster; |
|---|
| 70 |
use repeat_mask_seq; |
|---|
| 71 |
use runlog; |
|---|
| 72 |
use ds_utility; |
|---|
| 73 |
use GFFDB; |
|---|
| 74 |
use Error qw(:try); |
|---|
| 75 |
use Error::Simple; |
|---|
| 76 |
use Process::MpiChunk; |
|---|
| 77 |
use Process::MpiTiers; |
|---|
| 78 |
|
|---|
| 79 |
$| = 1; |
|---|
| 80 |
|
|---|
| 81 |
my $usage = " |
|---|
| 82 |
Usage: |
|---|
| 83 |
|
|---|
| 84 |
maker [options] <maker_opts> <maker_bopts> <maker_exe> |
|---|
| 85 |
|
|---|
| 86 |
Maker is a program that produces gene annotations in GFF3 file format using |
|---|
| 87 |
evidence such as EST alignments and protein homology. Maker can be used to |
|---|
| 88 |
produce gene annotations for new genomes as well as update annoations from |
|---|
| 89 |
existing genome databases. |
|---|
| 90 |
|
|---|
| 91 |
The four input arguments are user control files that specify how maker |
|---|
| 92 |
should behave. The evaluator options file contains control options specific |
|---|
| 93 |
for the evaluation of gene annotations. All options for maker should be set |
|---|
| 94 |
in the control files, but a few can also be set on the command line. |
|---|
| 95 |
Command line options provide a convenient machanism to override commonly |
|---|
| 96 |
altered control file values. |
|---|
| 97 |
|
|---|
| 98 |
Input files listed in the control options files must be in fasta format. |
|---|
| 99 |
Please see maker documentation to learn more about control file |
|---|
| 100 |
configuration. Maker will automatically try and locate the user control |
|---|
| 101 |
files in the current working directory if these arguments are not supplied |
|---|
| 102 |
when initializing maker. |
|---|
| 103 |
|
|---|
| 104 |
It is important to note that maker does not try and recalculated data that |
|---|
| 105 |
it has already calculated. For example, if you run an analysis twice on |
|---|
| 106 |
the same dataset file you will notice that maker does not rerun any of the |
|---|
| 107 |
blast analyses, but instead uses the blast analyses stored from the |
|---|
| 108 |
previous run. To force maker to rerun all analyses, use the -f flag. |
|---|
| 109 |
|
|---|
| 110 |
|
|---|
| 111 |
Options: |
|---|
| 112 |
|
|---|
| 113 |
-genome|g <filename> Specify the genome file. |
|---|
| 114 |
|
|---|
| 115 |
-predictor|p <type> Selects the predictor(s) to use when building |
|---|
| 116 |
annotations. Defines a pool of gene models for |
|---|
| 117 |
annotation selection. |
|---|
| 118 |
|
|---|
| 119 |
types: snap |
|---|
| 120 |
augustus |
|---|
| 121 |
fgenesh |
|---|
| 122 |
genemark |
|---|
| 123 |
est2genome (Uses EST's directly) |
|---|
| 124 |
model_gff (Pass through GFF3 annotations) |
|---|
| 125 |
pred_gff (Uses passed through GFF3 predictions) |
|---|
| 126 |
|
|---|
| 127 |
Use a ',' to seperate types (nospaces) |
|---|
| 128 |
i.e. -predictor=snap,augustus,fgenesh |
|---|
| 129 |
|
|---|
| 130 |
|
|---|
| 131 |
-RM_off|R Turns all repeat masking off. |
|---|
| 132 |
|
|---|
| 133 |
-retry|r <integer> Rerun failed contigs up to the specified count. |
|---|
| 134 |
|
|---|
| 135 |
-cpus|c <integer> Tells how many cpus to use for BLAST analysis. |
|---|
| 136 |
|
|---|
| 137 |
-force|f Forces maker to delete old files before running again. |
|---|
| 138 |
This will require all blast analyses to be re-run. |
|---|
| 139 |
|
|---|
| 140 |
-again|a Caculate all annotations and output files again even if |
|---|
| 141 |
no settings have changed. |
|---|
| 142 |
|
|---|
| 143 |
-evaluate|e Run Evaluator on final annotations (under development). |
|---|
| 144 |
|
|---|
| 145 |
-quiet|q Silences most of maker's status messages. |
|---|
| 146 |
|
|---|
| 147 |
-CTL Generate empty control files in the current directory. |
|---|
| 148 |
|
|---|
| 149 |
-help|? Prints this usage statement. |
|---|
| 150 |
|
|---|
| 151 |
|
|---|
| 152 |
"; |
|---|
| 153 |
|
|---|
| 154 |
|
|---|
| 155 |
|
|---|
| 156 |
|
|---|
| 157 |
|
|---|
| 158 |
|
|---|
| 159 |
my %OPT; |
|---|
| 160 |
my $rank = 0; |
|---|
| 161 |
my $size = 1; |
|---|
| 162 |
$RANK = $rank; |
|---|
| 163 |
|
|---|
| 164 |
|
|---|
| 165 |
try{ |
|---|
| 166 |
GetOptions("RM_off|R" => \$OPT{R}, |
|---|
| 167 |
"force|f" => \$OPT{force}, |
|---|
| 168 |
"genome|g=s" => \$OPT{genome}, |
|---|
| 169 |
"cpus|c=i" => \$OPT{cpus}, |
|---|
| 170 |
"predictor=s" =>\$OPT{predictor}, |
|---|
| 171 |
"retry=i" =>\$OPT{retry}, |
|---|
| 172 |
"evaluate" =>\$OPT{evaluate}, |
|---|
| 173 |
"again|a" =>\$OPT{again}, |
|---|
| 174 |
"quiet" =>\$main::quiet, |
|---|
| 175 |
"CTL" => sub {GI::generate_control_files(); exit(0);}, |
|---|
| 176 |
"help|?" => sub {print $usage; exit(0)} |
|---|
| 177 |
); |
|---|
| 178 |
} |
|---|
| 179 |
catch Error::Simple with{ |
|---|
| 180 |
my $E = shift; |
|---|
| 181 |
|
|---|
| 182 |
print STDERR $E->{-text}; |
|---|
| 183 |
die "\n\nMaker failed parsing command line options!!\n\n"; |
|---|
| 184 |
}; |
|---|
| 185 |
|
|---|
| 186 |
|
|---|
| 187 |
my %CTL_OPT; |
|---|
| 188 |
my $iterator; |
|---|
| 189 |
my $DS_CTL; |
|---|
| 190 |
my $GFF_DB; |
|---|
| 191 |
my $build; |
|---|
| 192 |
my @failed; |
|---|
| 193 |
|
|---|
| 194 |
try{ |
|---|
| 195 |
|
|---|
| 196 |
my @ctlfiles = @ARGV; |
|---|
| 197 |
|
|---|
| 198 |
if (not @ctlfiles) { |
|---|
| 199 |
if (-e "maker_opts.ctl" && |
|---|
| 200 |
-e "maker_bopts.ctl" && |
|---|
| 201 |
-e "maker_exe.ctl" |
|---|
| 202 |
) { |
|---|
| 203 |
|
|---|
| 204 |
@ctlfiles = ("maker_opts.ctl", |
|---|
| 205 |
"maker_bopts.ctl", |
|---|
| 206 |
"maker_exe.ctl" |
|---|
| 207 |
); |
|---|
| 208 |
} |
|---|
| 209 |
else { |
|---|
| 210 |
print STDERR "ERROR: Control files not found\n"; |
|---|
| 211 |
print $usage; |
|---|
| 212 |
exit(0); |
|---|
| 213 |
} |
|---|
| 214 |
} |
|---|
| 215 |
|
|---|
| 216 |
|
|---|
| 217 |
|
|---|
| 218 |
|
|---|
| 219 |
%CTL_OPT = GI::load_control_files(\@ctlfiles, \%OPT, $size); |
|---|
| 220 |
|
|---|
| 221 |
|
|---|
| 222 |
$DS_CTL = ds_utility->new(\%CTL_OPT); |
|---|
| 223 |
|
|---|
| 224 |
|
|---|
| 225 |
$GFF_DB = new GFFDB(\%CTL_OPT); |
|---|
| 226 |
$build = $GFF_DB->next_build; |
|---|
| 227 |
|
|---|
| 228 |
|
|---|
| 229 |
$iterator = new Iterator::Any( -fasta => $CTL_OPT{'genome'}, |
|---|
| 230 |
-gff => $CTL_OPT{'genome_gff'}, |
|---|
| 231 |
); |
|---|
| 232 |
} |
|---|
| 233 |
catch Error::Simple with{ |
|---|
| 234 |
my $E = shift; |
|---|
| 235 |
print STDERR $E->{-text}; |
|---|
| 236 |
print STDERR "\n\nMaker failed while examining startup data\n", |
|---|
| 237 |
"(control files and input fasta files)!!\n\n"; |
|---|
| 238 |
my $code = 2; |
|---|
| 239 |
$code = $E->{-value} if (defined($E->{-value})); |
|---|
| 240 |
|
|---|
| 241 |
exit($code); |
|---|
| 242 |
}; |
|---|
| 243 |
|
|---|
| 244 |
my $tier; |
|---|
| 245 |
while (my $fasta = $iterator->nextFasta() || shift @failed){ |
|---|
| 246 |
$tier = Process::MpiTiers->new({fasta =>$fasta, |
|---|
| 247 |
CTL_OPT => \%CTL_OPT, |
|---|
| 248 |
DS_CTL => $DS_CTL, |
|---|
| 249 |
GFF_DB => $GFF_DB, |
|---|
| 250 |
build => $build}, |
|---|
| 251 |
'0', |
|---|
| 252 |
'Process::MpiChunk' |
|---|
| 253 |
); |
|---|
| 254 |
|
|---|
| 255 |
next if($tier->terminated); |
|---|
| 256 |
$tier->run_all; |
|---|
| 257 |
$DS_CTL->add_entry($tier->DS); |
|---|
| 258 |
push(@failed, $tier->fasta) if ($tier->failed); |
|---|
| 259 |
} |
|---|
| 260 |
|
|---|
| 261 |
print STDERR "\n\nMaker is now finished!!!\n\n"; |
|---|
| 262 |
|
|---|
| 263 |
|
|---|
| 264 |
|
|---|
| 265 |
|
|---|
| 266 |
|
|---|