| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
package runlog; |
|---|
| 5 |
use strict; |
|---|
| 6 |
use vars qw(@ISA @EXPORT $VERSION); |
|---|
| 7 |
use Exporter; |
|---|
| 8 |
use Fasta; |
|---|
| 9 |
|
|---|
| 10 |
@ISA = qw(); |
|---|
| 11 |
$VERSION = 0.1; |
|---|
| 12 |
|
|---|
| 13 |
|
|---|
| 14 |
my @ctl_to_log = ('genome_gff', |
|---|
| 15 |
'other_gff', |
|---|
| 16 |
'est', |
|---|
| 17 |
'est_reads', |
|---|
| 18 |
'altest', |
|---|
| 19 |
'est_gff', |
|---|
| 20 |
'altest_gff', |
|---|
| 21 |
'protein', |
|---|
| 22 |
'protein_gff', |
|---|
| 23 |
'model_org', |
|---|
| 24 |
'repeat_protein', |
|---|
| 25 |
'rmlib', |
|---|
| 26 |
'rm_gff', |
|---|
| 27 |
'organism_type', |
|---|
| 28 |
'predictor', |
|---|
| 29 |
'snaphmm', |
|---|
| 30 |
'gmhmm', |
|---|
| 31 |
'augustus_species', |
|---|
| 32 |
'fgenesh_par_file', |
|---|
| 33 |
'model_gff', |
|---|
| 34 |
'pred_gff', |
|---|
| 35 |
'max_dna_len', |
|---|
| 36 |
'split_hit', |
|---|
| 37 |
'pred_flank', |
|---|
| 38 |
'min_protein', |
|---|
| 39 |
'AED_threshold', |
|---|
| 40 |
'single_exon', |
|---|
| 41 |
'single_length', |
|---|
| 42 |
'keep_preds', |
|---|
| 43 |
'map_forward', |
|---|
| 44 |
'alt_peptide', |
|---|
| 45 |
'evaluate', |
|---|
| 46 |
'blast_type', |
|---|
| 47 |
'softmask', |
|---|
| 48 |
'pcov_blastn', |
|---|
| 49 |
'pid_blastn', |
|---|
| 50 |
'eval_blastn', |
|---|
| 51 |
'bit_blastn', |
|---|
| 52 |
'pcov_rm_blastx', |
|---|
| 53 |
'pid_rm_blastx', |
|---|
| 54 |
'eval_rm_blastx', |
|---|
| 55 |
'bit_rm_blastx', |
|---|
| 56 |
'pcov_blastx', |
|---|
| 57 |
'pid_blastx', |
|---|
| 58 |
'eval_blastx', |
|---|
| 59 |
'bit_blastx', |
|---|
| 60 |
'pcov_tblastx', |
|---|
| 61 |
'pid_tblastx', |
|---|
| 62 |
'eval_tblastx', |
|---|
| 63 |
'bit_tblastx', |
|---|
| 64 |
'ep_score_limit', |
|---|
| 65 |
'en_score_limit', |
|---|
| 66 |
'enable_fathom', |
|---|
| 67 |
'unmask', |
|---|
| 68 |
'model_pass', |
|---|
| 69 |
'est_pass', |
|---|
| 70 |
'altest_pass', |
|---|
| 71 |
'protein_pass', |
|---|
| 72 |
'rm_pass', |
|---|
| 73 |
'other_pass', |
|---|
| 74 |
'pred_pass', |
|---|
| 75 |
'run' |
|---|
| 76 |
); |
|---|
| 77 |
|
|---|
| 78 |
my %SEEN; |
|---|
| 79 |
|
|---|
| 80 |
|
|---|
| 81 |
|
|---|
| 82 |
|
|---|
| 83 |
sub new { |
|---|
| 84 |
my $self = {}; |
|---|
| 85 |
my $class = shift; |
|---|
| 86 |
my @args = @_; |
|---|
| 87 |
|
|---|
| 88 |
bless ($self, $class); |
|---|
| 89 |
|
|---|
| 90 |
if($self->_initialize(@args)){ |
|---|
| 91 |
$self->_load_old_log(); |
|---|
| 92 |
$self->_clean_files(); |
|---|
| 93 |
$self->_write_new_log(); |
|---|
| 94 |
} |
|---|
| 95 |
|
|---|
| 96 |
$self->report_status(); |
|---|
| 97 |
|
|---|
| 98 |
return $self; |
|---|
| 99 |
} |
|---|
| 100 |
|
|---|
| 101 |
sub _initialize { |
|---|
| 102 |
my $self = shift; |
|---|
| 103 |
$self->{CTL_OPTIONS} = shift; |
|---|
| 104 |
$self->{params} = shift; |
|---|
| 105 |
$self->{file_name} = shift || "run.log"; |
|---|
| 106 |
|
|---|
| 107 |
print STDERR "\n\n\n--Next Contig--\n\n" unless($main::quiet); |
|---|
| 108 |
|
|---|
| 109 |
$self->{CWD} = $self->{CTL_OPTIONS}->{CWD}; |
|---|
| 110 |
my $min_contig = $self->{CTL_OPTIONS}->{min_contig}; |
|---|
| 111 |
my $length = $self->{params}->{seq_length}; |
|---|
| 112 |
|
|---|
| 113 |
if($length < $min_contig){ |
|---|
| 114 |
$self->{continue_flag} = -2; |
|---|
| 115 |
|
|---|
| 116 |
return 0; |
|---|
| 117 |
} |
|---|
| 118 |
|
|---|
| 119 |
return 1; |
|---|
| 120 |
} |
|---|
| 121 |
|
|---|
| 122 |
sub _load_old_log { |
|---|
| 123 |
my $self = shift; |
|---|
| 124 |
|
|---|
| 125 |
$self->{die_count} = 0; |
|---|
| 126 |
|
|---|
| 127 |
my $log_file = $self->{file_name}; |
|---|
| 128 |
my %logged_vals; |
|---|
| 129 |
|
|---|
| 130 |
if (-e $log_file){ |
|---|
| 131 |
print STDERR "Processing run.log file...\n" unless($main::quiet); |
|---|
| 132 |
open (IN, "< $log_file"); |
|---|
| 133 |
while( defined (my $line = <IN>)){ |
|---|
| 134 |
chomp $line; |
|---|
| 135 |
|
|---|
| 136 |
my ($type, $key, $value) = split ("\t", $line); |
|---|
| 137 |
$logged_vals{$type}{$key} = defined($value) ? $value : ''; |
|---|
| 138 |
|
|---|
| 139 |
$self->{die_count} = $value if($type eq 'DIED' && $key eq 'COUNT'); |
|---|
| 140 |
} |
|---|
| 141 |
close(IN); |
|---|
| 142 |
} |
|---|
| 143 |
|
|---|
| 144 |
$self->{old_log} = \%logged_vals; |
|---|
| 145 |
} |
|---|
| 146 |
|
|---|
| 147 |
sub _clean_files{ |
|---|
| 148 |
my $self = shift; |
|---|
| 149 |
|
|---|
| 150 |
my $CWD = $self->{CWD}; |
|---|
| 151 |
my $the_void = $self->{params}->{the_void}; |
|---|
| 152 |
my %CTL_OPTIONS = %{$self->{CTL_OPTIONS}}; |
|---|
| 153 |
|
|---|
| 154 |
|
|---|
| 155 |
my $log_file = $self->{file_name}; |
|---|
| 156 |
my $gff_file = $the_void; |
|---|
| 157 |
my $out_base = $the_void; |
|---|
| 158 |
$gff_file =~ s/theVoid\.([^\/]+)$/$1.gff/; |
|---|
| 159 |
$out_base =~ s/theVoid\.[^\/]+$//; |
|---|
| 160 |
|
|---|
| 161 |
|
|---|
| 162 |
my $continue_flag = 1; |
|---|
| 163 |
|
|---|
| 164 |
my %logged_vals = %{$self->{old_log}}; |
|---|
| 165 |
my %rm_key; |
|---|
| 166 |
my @files; |
|---|
| 167 |
my @dirs; |
|---|
| 168 |
|
|---|
| 169 |
if (-e $log_file) { |
|---|
| 170 |
if (exists $logged_vals{DIED}) { |
|---|
| 171 |
if($CTL_OPTIONS{force} && ! $SEEN{$log_file}){ |
|---|
| 172 |
$self->{die_count} = 0; |
|---|
| 173 |
$continue_flag = 1; |
|---|
| 174 |
$rm_key{force}++; |
|---|
| 175 |
$SEEN{$log_file}++; |
|---|
| 176 |
} |
|---|
| 177 |
elsif($CTL_OPTIONS{always_try} && ! $SEEN{$log_file}){ |
|---|
| 178 |
$self->{die_count} = 0; |
|---|
| 179 |
$continue_flag = 1; |
|---|
| 180 |
$SEEN{$log_file}++; |
|---|
| 181 |
} |
|---|
| 182 |
else{ |
|---|
| 183 |
$continue_flag = ($CTL_OPTIONS{clean_try}) ? 2 : 3; |
|---|
| 184 |
$continue_flag = -1 if($self->{die_count} > $CTL_OPTIONS{retry}); |
|---|
| 185 |
$rm_key{retry}++ if ($continue_flag == 2); |
|---|
| 186 |
$SEEN{$log_file}++; |
|---|
| 187 |
} |
|---|
| 188 |
} |
|---|
| 189 |
elsif ($CTL_OPTIONS{force} && ! $SEEN{$log_file}) { |
|---|
| 190 |
$rm_key{force}++; |
|---|
| 191 |
$continue_flag = 1; |
|---|
| 192 |
$SEEN{$log_file}++; |
|---|
| 193 |
} |
|---|
| 194 |
elsif ($CTL_OPTIONS{again} && ! $SEEN{$log_file}){ |
|---|
| 195 |
$continue_flag = 1; |
|---|
| 196 |
$rm_key{gff}++; |
|---|
| 197 |
$SEEN{$log_file}++; |
|---|
| 198 |
} |
|---|
| 199 |
else { |
|---|
| 200 |
$continue_flag = 0 if (-e $gff_file); |
|---|
| 201 |
$SEEN{$log_file}++; |
|---|
| 202 |
} |
|---|
| 203 |
|
|---|
| 204 |
if($continue_flag >= 0 || $continue_flag == -1){ |
|---|
| 205 |
|
|---|
| 206 |
my $cwd = ($CWD) ?$CWD : Cwd::getcwd(); |
|---|
| 207 |
|
|---|
| 208 |
foreach my $key (@ctl_to_log) { |
|---|
| 209 |
|
|---|
| 210 |
if($key eq 'run'){ |
|---|
| 211 |
next if(exists $rm_key{gff}); |
|---|
| 212 |
} |
|---|
| 213 |
|
|---|
| 214 |
|
|---|
| 215 |
if($key =~ /^est_pass$|^altest_pass$|^protein_pass$|^rm_pass$/ || |
|---|
| 216 |
$key =~ /^pred_pass$|^model_pass$|^other_pass$/ |
|---|
| 217 |
){ |
|---|
| 218 |
next unless($CTL_OPTIONS{genome_gff}); |
|---|
| 219 |
my $old = (exists $logged_vals{CTL_OPTIONS}{genome_gff}) ? |
|---|
| 220 |
$logged_vals{CTL_OPTIONS}{genome_gff} : ''; |
|---|
| 221 |
$old =~ s/^$cwd\/*//; |
|---|
| 222 |
my $new = $CTL_OPTIONS{genome_gff}; |
|---|
| 223 |
$new =~ s/^$cwd\/*//; |
|---|
| 224 |
|
|---|
| 225 |
|
|---|
| 226 |
|
|---|
| 227 |
next unless($old eq $new); |
|---|
| 228 |
} |
|---|
| 229 |
|
|---|
| 230 |
|
|---|
| 231 |
if($key =~ /^map_forward$/){ |
|---|
| 232 |
next unless($CTL_OPTIONS{genome_gff} || $CTL_OPTIONS{model_gff}); |
|---|
| 233 |
} |
|---|
| 234 |
|
|---|
| 235 |
my $log_val = ''; |
|---|
| 236 |
if(defined $logged_vals{CTL_OPTIONS}{$key}){ |
|---|
| 237 |
$log_val = $logged_vals{CTL_OPTIONS}{$key}; |
|---|
| 238 |
if($key eq 'repeat_protein'){ |
|---|
| 239 |
|
|---|
| 240 |
$log_val =~ s/.*\/(maker\/data\/te_proteins.fasta)$/$1/; |
|---|
| 241 |
} |
|---|
| 242 |
elsif($key eq 'run'){ |
|---|
| 243 |
|
|---|
| 244 |
my @set = split(',', $logged_vals{CTL_OPTIONS}{run}); |
|---|
| 245 |
@set = sort @set; |
|---|
| 246 |
$log_val = join(',', @set); |
|---|
| 247 |
} |
|---|
| 248 |
} |
|---|
| 249 |
|
|---|
| 250 |
my $ctl_val = ''; |
|---|
| 251 |
if(defined $CTL_OPTIONS{$key}){ |
|---|
| 252 |
$ctl_val = $CTL_OPTIONS{$key}; |
|---|
| 253 |
$ctl_val =~ s/^$cwd\/*//; |
|---|
| 254 |
if($key eq 'repeat_protein'){ |
|---|
| 255 |
|
|---|
| 256 |
$ctl_val =~ s/.*\/(maker\/data\/te_proteins.fasta)$/$1/; |
|---|
| 257 |
} |
|---|
| 258 |
elsif($key eq 'run'){ |
|---|
| 259 |
|
|---|
| 260 |
my @set = sort @{$CTL_OPTIONS{_run}}; |
|---|
| 261 |
$ctl_val = join(',', @set); |
|---|
| 262 |
} |
|---|
| 263 |
} |
|---|
| 264 |
|
|---|
| 265 |
|
|---|
| 266 |
if($key eq 'organism_type' && ! $log_val){ |
|---|
| 267 |
$log_val = 'eukaryotic'; |
|---|
| 268 |
} |
|---|
| 269 |
|
|---|
| 270 |
|
|---|
| 271 |
if($key eq 'softmask' && $log_val eq ''){ |
|---|
| 272 |
$log_val = 1; |
|---|
| 273 |
} |
|---|
| 274 |
|
|---|
| 275 |
|
|---|
| 276 |
|
|---|
| 277 |
if($key eq 'AED_threshold' && $log_val eq ''){ |
|---|
| 278 |
$log_val = 1; |
|---|
| 279 |
} |
|---|
| 280 |
|
|---|
| 281 |
|
|---|
| 282 |
if ($log_val ne $ctl_val) { |
|---|
| 283 |
|
|---|
| 284 |
print STDERR "MAKER WARNING: Control file option \'$key\' has changed\n". |
|---|
| 285 |
"Old:$log_val\tNew:$ctl_val\n\n"; |
|---|
| 286 |
|
|---|
| 287 |
$continue_flag = 1; |
|---|
| 288 |
|
|---|
| 289 |
$rm_key{gff}++; |
|---|
| 290 |
|
|---|
| 291 |
|
|---|
| 292 |
if($key ne 'evaluate' && |
|---|
| 293 |
$key ne 'enable_fathom' && |
|---|
| 294 |
$key ne 'keep_preds' && |
|---|
| 295 |
$key ne 'other_pass' && |
|---|
| 296 |
$key ne 'other_gff' && |
|---|
| 297 |
$key ne 'map_forward' |
|---|
| 298 |
){ |
|---|
| 299 |
$rm_key{preds}++; |
|---|
| 300 |
} |
|---|
| 301 |
|
|---|
| 302 |
if ($key eq 'organism_type') { |
|---|
| 303 |
$rm_key{all}++; |
|---|
| 304 |
} |
|---|
| 305 |
|
|---|
| 306 |
if ($key eq 'max_dna_len') { |
|---|
| 307 |
$rm_key{all}++; |
|---|
| 308 |
} |
|---|
| 309 |
|
|---|
| 310 |
if ($key eq 'rm_gff' || |
|---|
| 311 |
$key eq 'model_org' || |
|---|
| 312 |
$key eq 'rmlib' |
|---|
| 313 |
) { |
|---|
| 314 |
$rm_key{all}++; |
|---|
| 315 |
} |
|---|
| 316 |
|
|---|
| 317 |
if ($key eq 'repeat_protein' || |
|---|
| 318 |
$key eq 'pcov_rm_blastx' || |
|---|
| 319 |
$key eq 'pcid_rm_blastx' || |
|---|
| 320 |
$key eq 'eval_rm_blastx' || |
|---|
| 321 |
$key eq 'bit_rm_blastx' || |
|---|
| 322 |
$key eq 'blast_type' |
|---|
| 323 |
) { |
|---|
| 324 |
$rm_key{all_but}++; |
|---|
| 325 |
} |
|---|
| 326 |
|
|---|
| 327 |
if ($key eq 'snaphmm') { |
|---|
| 328 |
$rm_key{snap}++; |
|---|
| 329 |
} |
|---|
| 330 |
|
|---|
| 331 |
if ($key eq 'augustus_species') { |
|---|
| 332 |
$rm_key{augustus}++; |
|---|
| 333 |
} |
|---|
| 334 |
|
|---|
| 335 |
if ($key eq 'fgenesh_par_file') { |
|---|
| 336 |
$rm_key{fgenesh}++; |
|---|
| 337 |
} |
|---|
| 338 |
|
|---|
| 339 |
if ($key eq 'gmhmm') { |
|---|
| 340 |
$rm_key{genemark}++; |
|---|
| 341 |
} |
|---|
| 342 |
|
|---|
| 343 |
if ($key eq 'split_hit' || |
|---|
| 344 |
$key eq'ep_score_limit' || |
|---|
| 345 |
$key eq'en_score_limit' |
|---|
| 346 |
) { |
|---|
| 347 |
$rm_key{e_exonerate}++; |
|---|
| 348 |
$rm_key{p_exonerate}++; |
|---|
| 349 |
} |
|---|
| 350 |
|
|---|
| 351 |
if ($key eq 'protein' || |
|---|
| 352 |
$key eq 'alt_peptide' || |
|---|
| 353 |
$key eq 'eval_blastx' || |
|---|
| 354 |
$key eq 'softmask' |
|---|
| 355 |
) { |
|---|
| 356 |
$rm_key{blastx}++; |
|---|
| 357 |
$rm_key{p_exonerate}++; |
|---|
| 358 |
} |
|---|
| 359 |
|
|---|
| 360 |
if ($key eq 'est') { |
|---|
| 361 |
$rm_key{est_blastn}++; |
|---|
| 362 |
$rm_key{e_exonerate}++; |
|---|
| 363 |
} |
|---|
| 364 |
|
|---|
| 365 |
if ($key eq 'est_reads') { |
|---|
| 366 |
$rm_key{read_blastn}++; |
|---|
| 367 |
} |
|---|
| 368 |
|
|---|
| 369 |
if ($key eq 'eval_blastn' || |
|---|
| 370 |
$key eq 'split_hit' |
|---|
| 371 |
) { |
|---|
| 372 |
$rm_key{blastn}++; |
|---|
| 373 |
$rm_key{e_exonerate}++; |
|---|
| 374 |
} |
|---|
| 375 |
|
|---|
| 376 |
if ($key eq 'altest' || |
|---|
| 377 |
$key eq 'eval_tblastx' || |
|---|
| 378 |
$key eq 'split_hit' || |
|---|
| 379 |
$key eq 'softmask' |
|---|
| 380 |
) { |
|---|
| 381 |
$rm_key{tblastx}++; |
|---|
| 382 |
} |
|---|
| 383 |
} |
|---|
| 384 |
} |
|---|
| 385 |
|
|---|
| 386 |
|
|---|
| 387 |
while (my $key = each %{$logged_vals{STARTED}}) { |
|---|
| 388 |
if (! exists $logged_vals{FINISHED}{$key}) { |
|---|
| 389 |
print STDERR "MAKER WARNING: The file $key\n". |
|---|
| 390 |
"did not finish on the last run and must be erased\n"; |
|---|
| 391 |
|
|---|
| 392 |
push(@files, $key); |
|---|
| 393 |
|
|---|
| 394 |
|
|---|
| 395 |
|
|---|
| 396 |
$key =~ /([^\/]+)$/; |
|---|
| 397 |
my $rm_f_name = $1; |
|---|
| 398 |
|
|---|
| 399 |
if(! -e $key && -e "$the_void/$rm_f_name"){ |
|---|
| 400 |
push(@files, $rm_f_name); |
|---|
| 401 |
} |
|---|
| 402 |
|
|---|
| 403 |
$rm_f_name =~ s/\.fasta$//; |
|---|
| 404 |
my @d = <$the_void/*$rm_f_name*>; |
|---|
| 405 |
foreach my $d (@d){ |
|---|
| 406 |
push (@dirs, $d) if (-d $d); |
|---|
| 407 |
} |
|---|
| 408 |
} |
|---|
| 409 |
} |
|---|
| 410 |
} |
|---|
| 411 |
|
|---|
| 412 |
|
|---|
| 413 |
|
|---|
| 414 |
|
|---|
| 415 |
if (exists $rm_key{force}) { |
|---|
| 416 |
print STDERR "MAKER WARNING: All old files will be erased before continuing\n"; |
|---|
| 417 |
|
|---|
| 418 |
|
|---|
| 419 |
File::Path::rmtree($the_void); |
|---|
| 420 |
File::Path::mkpath($the_void); |
|---|
| 421 |
|
|---|
| 422 |
|
|---|
| 423 |
File::Path::rmtree("$out_base/evaluator"); |
|---|
| 424 |
|
|---|
| 425 |
|
|---|
| 426 |
unlink($gff_file) if(-e $gff_file); |
|---|
| 427 |
my @f = <$out_base/*.fasta>; |
|---|
| 428 |
push (@files, @f); |
|---|
| 429 |
} |
|---|
| 430 |
elsif (exists $rm_key{retry}) { |
|---|
| 431 |
print STDERR "MAKER WARNING: Old data must be removed before re-running this sequence\n"; |
|---|
| 432 |
|
|---|
| 433 |
|
|---|
| 434 |
File::Path::rmtree($the_void); |
|---|
| 435 |
File::Path::mkpath($the_void); |
|---|
| 436 |
unlink($gff_file) if(-e $gff_file); |
|---|
| 437 |
|
|---|
| 438 |
|
|---|
| 439 |
File::Path::rmtree("$out_base/evaluator"); |
|---|
| 440 |
} |
|---|
| 441 |
elsif (exists $rm_key{all}) { |
|---|
| 442 |
print STDERR "MAKER WARNING: Changes in control files make re-use of all old data impossible\n". |
|---|
| 443 |
"All old files will be erased before continuing\n"; |
|---|
| 444 |
|
|---|
| 445 |
|
|---|
| 446 |
File::Path::rmtree($the_void); |
|---|
| 447 |
File::Path::mkpath($the_void); |
|---|
| 448 |
unlink($gff_file) if(-e $gff_file); |
|---|
| 449 |
|
|---|
| 450 |
|
|---|
| 451 |
File::Path::rmtree("$out_base/evaluator"); |
|---|
| 452 |
} |
|---|
| 453 |
elsif (exists $rm_key{all_but}) { |
|---|
| 454 |
print STDERR "MAKER WARNING: Changes in control files make re-use of all but RepeatMasker data impossible\n". |
|---|
| 455 |
"All old non-RepeatMasker files will be erased before continuing\n"; |
|---|
| 456 |
|
|---|
| 457 |
|
|---|
| 458 |
my @f = <$the_void/*>; |
|---|
| 459 |
@f = grep(!/(\.out|\.cat|\.tbl)$/, @f); |
|---|
| 460 |
|
|---|
| 461 |
|
|---|
| 462 |
foreach my $f (@f) { |
|---|
| 463 |
unlink($f) if(-f $f); |
|---|
| 464 |
File::Path::rmtree($f) if(-d $f); |
|---|
| 465 |
} |
|---|
| 466 |
} |
|---|
| 467 |
else { |
|---|
| 468 |
if (exists $rm_key{preds}) { |
|---|
| 469 |
print STDERR "MAKER WARNING: Changes in control files make re-use of hint based predictions impossible\n". |
|---|
| 470 |
"Old hint based prediction files will be erased before continuing\n"; |
|---|
| 471 |
|
|---|
| 472 |
my @f = <$the_void/*auto_annotator*>; |
|---|
| 473 |
push (@files, @f); |
|---|
| 474 |
} |
|---|
| 475 |
if (exists $rm_key{snap}) { |
|---|
| 476 |
print STDERR "MAKER WARNING: Changes in control files make re-use of old SNAP data impossible\n". |
|---|
| 477 |
"Old SNAP files will be erased before continuing\n"; |
|---|
| 478 |
|
|---|
| 479 |
my @f = <$the_void/*snap*>; |
|---|
| 480 |
push (@files, @f); |
|---|
| 481 |
} |
|---|
| 482 |
if (exists $rm_key{augustus}) { |
|---|
| 483 |
print STDERR "MAKER WARNING: Changes in control files make re-use of old Augustus data impossible\n". |
|---|
| 484 |
"Old Augustus files will be erased before continuing\n"; |
|---|
| 485 |
|
|---|
| 486 |
my @f = <$the_void/*augustus*>; |
|---|
| 487 |
push (@files, @f); |
|---|
| 488 |
} |
|---|
| 489 |
if (exists $rm_key{fgenesh}) { |
|---|
| 490 |
print STDERR "MAKER WARNING: Changes in control files make re-use of old FGENESH data impossible\n". |
|---|
| 491 |
"Old FGENESH files will be erased before continuing\n"; |
|---|
| 492 |
|
|---|
| 493 |
my @f = <$the_void/*fgenesh*>; |
|---|
| 494 |
push (@files, @f); |
|---|
| 495 |
} |
|---|
| 496 |
if (exists $rm_key{genemark}) { |
|---|
| 497 |
print STDERR "MAKER WARNING: Changes in control files make re-use of old GeneMark data impossible\n". |
|---|
| 498 |
"Old GeneMark files will be erased before continuing\n"; |
|---|
| 499 |
|
|---|
| 500 |
my @f = <$the_void/*genemark*>; |
|---|
| 501 |
push (@files, @f); |
|---|
| 502 |
} |
|---|
| 503 |
if (exists $rm_key{blastn}) { |
|---|
| 504 |
print STDERR "MAKER WARNING: Changes in control files make re-use of all old EST Blastn data impossible\n". |
|---|
| 505 |
"Old EST Blastn files will be erased before continuing\n"; |
|---|
| 506 |
|
|---|
| 507 |
my @f = <$the_void/*blastn*>; |
|---|
| 508 |
foreach my $f (@f){ |
|---|
| 509 |
push (@files, $f) if (-f $f); |
|---|
| 510 |
push (@dirs, $f) if (-d $f); |
|---|
| 511 |
} |
|---|
| 512 |
} |
|---|
| 513 |
else{ |
|---|
| 514 |
if (exists $rm_key{est_blastn}) { |
|---|
| 515 |
print STDERR "MAKER WARNING: Changes in control files make re-use of assembled EST Blastn data impossible\n". |
|---|
| 516 |
"Old EST Blastn files will be erased before continuing\n"; |
|---|
| 517 |
|
|---|
| 518 |
my @f = <$the_void/*est_blastn*>; |
|---|
| 519 |
foreach my $f (@f){ |
|---|
| 520 |
push (@files, $f) if (-f $f); |
|---|
| 521 |
push (@dirs, $f) if (-d $f); |
|---|
| 522 |
} |
|---|
| 523 |
} |
|---|
| 524 |
elsif (exists $rm_key{read_blastn}) { |
|---|
| 525 |
print STDERR "MAKER WARNING: Changes in control files make re-use of unassembled EST Blastn data impossible\n". |
|---|
| 526 |
"Old EST reads Blastn files will be erased before continuing\n"; |
|---|
| 527 |
|
|---|
| 528 |
my @f = <$the_void/*read_blastn*>; |
|---|
| 529 |
foreach my $f (@f){ |
|---|
| 530 |
push (@files, $f) if (-f $f); |
|---|
| 531 |
push (@dirs, $f) if (-d $f); |
|---|
| 532 |
} |
|---|
| 533 |
} |
|---|
| 534 |
} |
|---|
| 535 |
|
|---|
| 536 |
if (exists $rm_key{tblastx}) { |
|---|
| 537 |
print STDERR "MAKER WARNING: Changes in control files make re-use of old tBlastx data impossible\n". |
|---|
| 538 |
"Old tBlastx files will be erased before continuing\n"; |
|---|
| 539 |
|
|---|
| 540 |
my @f = <$the_void/*tblastx*>; |
|---|
| 541 |
foreach my $f (@f){ |
|---|
| 542 |
push (@files, $f) if (-f $f); |
|---|
| 543 |
push (@dirs, $f) if (-d $f); |
|---|
| 544 |
} |
|---|
| 545 |
} |
|---|
| 546 |
|
|---|
| 547 |
if (exists $rm_key{blastx}) { |
|---|
| 548 |
print STDERR "MAKER WARNING: Changes in control files make re-use of old Blastx data impossible\n". |
|---|
| 549 |
"Old Blastx files will be erased before continuing\n"; |
|---|
| 550 |
|
|---|
| 551 |
my @f = <$the_void/*blastx*>; |
|---|
| 552 |
|
|---|
| 553 |
my ($te) = $CTL_OPTIONS{repeat_protein} =~ /([^\/]+)$/; |
|---|
| 554 |
|
|---|
| 555 |
if($te){ |
|---|
| 556 |
$te =~ s/\.fasta$//; |
|---|
| 557 |
@f = grep { ! /$te\.blastx$/} @f; |
|---|
| 558 |
} |
|---|
| 559 |
|
|---|
| 560 |
foreach my $f (@f){ |
|---|
| 561 |
push (@files, $f) if (-f $f); |
|---|
| 562 |
push (@dirs, $f) if (-d $f); |
|---|
| 563 |
} |
|---|
| 564 |
|
|---|
| 565 |
} |
|---|
| 566 |
|
|---|
| 567 |
if (exists $rm_key{e_exonerate}) { |
|---|
| 568 |
print STDERR "MAKER WARNING: Changes in control files make re-use of old EST Exonerate data impossible\n". |
|---|
| 569 |
"Old EST Exonerate files will be erased before continuing\n"; |
|---|
| 570 |
|
|---|
| 571 |
my @f = <$the_void/*est_exonerate*>; |
|---|
| 572 |
push (@files, @f); |
|---|
| 573 |
} |
|---|
| 574 |
|
|---|
| 575 |
if (exists $rm_key{p_exonerate}) { |
|---|
| 576 |
print STDERR "MAKER WARNING: Changes in control files make re-use of old protein Exonerate data impossible\n". |
|---|
| 577 |
"Old protein Exonerate files will be erased before continuing\n"; |
|---|
| 578 |
|
|---|
| 579 |
my @f = <$the_void/*p_exonerate*>; |
|---|
| 580 |
push (@files, @f); |
|---|
| 581 |
} |
|---|
| 582 |
|
|---|
| 583 |
if (exists $rm_key{gff}) { |
|---|
| 584 |
print STDERR "MAKER WARNING: Any preexisting GFF3 and fasta files for this contig must now be removed.\n"; |
|---|
| 585 |
push (@files, $gff_file); |
|---|
| 586 |
push (@files, @{[<$out_base/evaluator/*.eva>]}); |
|---|
| 587 |
push (@files, @{[<$out_base/*maker*.fasta>]}); |
|---|
| 588 |
} |
|---|
| 589 |
|
|---|
| 590 |
|
|---|
| 591 |
foreach my $file (@files) { |
|---|
| 592 |
unlink($file); |
|---|
| 593 |
} |
|---|
| 594 |
|
|---|
| 595 |
|
|---|
| 596 |
foreach my $dir (@dirs) { |
|---|
| 597 |
File::Path::rmtree($dir); |
|---|
| 598 |
} |
|---|
| 599 |
|
|---|
| 600 |
|
|---|
| 601 |
my @d = <$the_void/*.temp_dir>; |
|---|
| 602 |
foreach my $d (@d){ |
|---|
| 603 |
File::Path::rmtree($d) if (-d $d); |
|---|
| 604 |
} |
|---|
| 605 |
|
|---|
| 606 |
} |
|---|
| 607 |
} |
|---|
| 608 |
|
|---|
| 609 |
$self->{continue_flag} = $continue_flag; |
|---|
| 610 |
} |
|---|
| 611 |
|
|---|
| 612 |
|
|---|
| 613 |
sub _write_new_log { |
|---|
| 614 |
my $self = shift; |
|---|
| 615 |
|
|---|
| 616 |
my $CWD = $self->{CWD}; |
|---|
| 617 |
my $log_file = $self->{file_name}; |
|---|
| 618 |
|
|---|
| 619 |
my %CTL_OPTIONS = %{$self->{CTL_OPTIONS}}; |
|---|
| 620 |
|
|---|
| 621 |
return if ($self->{continue_flag} <= 0); |
|---|
| 622 |
|
|---|
| 623 |
open (LOG, "> $log_file"); |
|---|
| 624 |
|
|---|
| 625 |
|
|---|
| 626 |
my $cwd = ($CWD) ? $CWD : Cwd::getcwd(); |
|---|
| 627 |
|
|---|
| 628 |
foreach my $key (@ctl_to_log) { |
|---|
| 629 |
my $ctl_val = ''; |
|---|
| 630 |
if(defined $CTL_OPTIONS{$key}){ |
|---|
| 631 |
$ctl_val = $CTL_OPTIONS{$key} ; |
|---|
| 632 |
$ctl_val =~ s/^$cwd\/*//; |
|---|
| 633 |
if($key eq 'repeat_protein'){ |
|---|
| 634 |
|
|---|
| 635 |
$ctl_val =~ s/.*\/(maker\/data\/te_proteins.fasta)$/$1/; |
|---|
| 636 |
} |
|---|
| 637 |
elsif($key eq 'run'){ |
|---|
| 638 |
|
|---|
| 639 |
my @set = sort @{$CTL_OPTIONS{_run}}; |
|---|
| 640 |
$ctl_val = join(',', @set); |
|---|
| 641 |
} |
|---|
| 642 |
} |
|---|
| 643 |
print LOG "CTL_OPTIONS\t$key\t$ctl_val\n"; |
|---|
| 644 |
} |
|---|
| 645 |
close(LOG); |
|---|
| 646 |
} |
|---|
| 647 |
|
|---|
| 648 |
sub add_entry { |
|---|
| 649 |
my $self = shift; |
|---|
| 650 |
|
|---|
| 651 |
my $type = shift; |
|---|
| 652 |
my $key = shift; |
|---|
| 653 |
my $value = shift; |
|---|
| 654 |
|
|---|
| 655 |
my $CWD = $self->{CWD}; |
|---|
| 656 |
my $log_file = $self->{file_name}; |
|---|
| 657 |
my $cwd = ($CWD) ?$CWD : Cwd::getcwd(); |
|---|
| 658 |
|
|---|
| 659 |
|
|---|
| 660 |
|
|---|
| 661 |
|
|---|
| 662 |
if(defined $key && defined $type){ |
|---|
| 663 |
$key =~ s/^$cwd\/*// if($type =~ /^STARTED$|^FINISHED$/); |
|---|
| 664 |
} |
|---|
| 665 |
|
|---|
| 666 |
open(LOG, ">> $log_file"); |
|---|
| 667 |
print LOG "$type\t$key\t$value\n"; |
|---|
| 668 |
close(LOG); |
|---|
| 669 |
} |
|---|
| 670 |
|
|---|
| 671 |
sub get_die_count { |
|---|
| 672 |
my $self = shift; |
|---|
| 673 |
|
|---|
| 674 |
return $self->{die_count}; |
|---|
| 675 |
} |
|---|
| 676 |
|
|---|
| 677 |
sub report_status { |
|---|
| 678 |
my $self = shift; |
|---|
| 679 |
my $flag = $self->{continue_flag}; |
|---|
| 680 |
my $die_count = $self->{die_count}; |
|---|
| 681 |
my $seq_id = $self->{params}->{seq_id}; |
|---|
| 682 |
my $seq_out_name = Fasta::seqID2SafeID($seq_id); |
|---|
| 683 |
my $out_dir = $self->{params}->{out_dir}; |
|---|
| 684 |
my $fasta_ref = $self->{params}->{fasta_ref}; |
|---|
| 685 |
my $length = $self->{params}->{seq_length}; |
|---|
| 686 |
|
|---|
| 687 |
if($flag == 0){ |
|---|
| 688 |
print STDERR "#---------------------------------------------------------------------\n", |
|---|
| 689 |
"The contig has already been processed!!\n", |
|---|
| 690 |
"Maker will now skip to the next contig.\n", |
|---|
| 691 |
"Run maker with the -f flag to force Maker to recompute all contig data.\n", |
|---|
| 692 |
"SeqID: $seq_id\n", |
|---|
| 693 |
"Length: $length\n", |
|---|
| 694 |
"#---------------------------------------------------------------------\n\n\n"; |
|---|
| 695 |
} |
|---|
| 696 |
elsif($flag == 1){ |
|---|
| 697 |
print STDERR "#---------------------------------------------------------------------\n", |
|---|
| 698 |
"Now starting the contig!!\n", |
|---|
| 699 |
"SeqID: $seq_id\n", |
|---|
| 700 |
"Length: $length\n", |
|---|
| 701 |
"#---------------------------------------------------------------------\n\n\n"; |
|---|
| 702 |
} |
|---|
| 703 |
elsif($flag == 2){ |
|---|
| 704 |
print STDERR "#---------------------------------------------------------------------\n", |
|---|
| 705 |
"Now retrying the contig!!\n", |
|---|
| 706 |
"All contig related data will be erased before continuing!!\n", |
|---|
| 707 |
"SeqID: $seq_id\n", |
|---|
| 708 |
"Length: $length\n", |
|---|
| 709 |
"Retry: $die_count!!\n", |
|---|
| 710 |
"#---------------------------------------------------------------------\n\n\n"; |
|---|
| 711 |
} |
|---|
| 712 |
elsif($flag == 3){ |
|---|
| 713 |
print STDERR "#---------------------------------------------------------------------\n", |
|---|
| 714 |
"Now retrying the contig!!\n", |
|---|
| 715 |
"SeqID: $seq_id\n", |
|---|
| 716 |
"Length: $length\n", |
|---|
| 717 |
"Retry: $die_count!!\n", |
|---|
| 718 |
"#---------------------------------------------------------------------\n\n\n"; |
|---|
| 719 |
} |
|---|
| 720 |
elsif($flag == -1){ |
|---|
| 721 |
print STDERR "#---------------------------------------------------------------------\n", |
|---|
| 722 |
"The contig failed $die_count time!!\n", |
|---|
| 723 |
"Maker will not try again!!\n", |
|---|
| 724 |
"The contig will be stored in a fasta file that you can use for debugging.\n", |
|---|
| 725 |
"SeqID: $seq_id\n", |
|---|
| 726 |
"Length: $length\n", |
|---|
| 727 |
"FASTA: $out_dir/$seq_out_name.died.fasta\n", |
|---|
| 728 |
"#---------------------------------------------------------------------\n\n\n"; |
|---|
| 729 |
|
|---|
| 730 |
open (my $DFAS, "> $out_dir/$seq_out_name.died.fasta"); |
|---|
| 731 |
print $DFAS $$fasta_ref; |
|---|
| 732 |
close ($DFAS); |
|---|
| 733 |
} |
|---|
| 734 |
elsif($flag == -2){ |
|---|
| 735 |
print STDERR "#---------------------------------------------------------------------\n", |
|---|
| 736 |
"Skipping the contig because it is too short!!\n", |
|---|
| 737 |
"SeqID: $seq_id\n", |
|---|
| 738 |
"Length: $length\n", |
|---|
| 739 |
"#---------------------------------------------------------------------\n\n\n"; |
|---|
| 740 |
} |
|---|
| 741 |
else{ |
|---|
| 742 |
die "ERROR: No valid continue flag\n"; |
|---|
| 743 |
} |
|---|
| 744 |
} |
|---|
| 745 |
|
|---|
| 746 |
sub get_continue_flag { |
|---|
| 747 |
my $self = shift; |
|---|
| 748 |
my $flag = $self->{continue_flag}; |
|---|
| 749 |
my $message; |
|---|
| 750 |
|
|---|
| 751 |
if($flag == 0){ |
|---|
| 752 |
$message = 'FINISHED'; |
|---|
| 753 |
} |
|---|
| 754 |
elsif($flag == 1){ |
|---|
| 755 |
$message = 'STARTED'; |
|---|
| 756 |
} |
|---|
| 757 |
elsif($flag == 2){ |
|---|
| 758 |
$message = 'RETRY_CLEAN'; |
|---|
| 759 |
} |
|---|
| 760 |
elsif($flag == 3){ |
|---|
| 761 |
$message = 'RETRY'; |
|---|
| 762 |
} |
|---|
| 763 |
elsif($flag == -1){ |
|---|
| 764 |
$message = 'DIED_SKIPPED_PERMANENT'; |
|---|
| 765 |
} |
|---|
| 766 |
elsif($flag == -2){ |
|---|
| 767 |
$message = 'SKIPPED_SMALL'; |
|---|
| 768 |
} |
|---|
| 769 |
else{ |
|---|
| 770 |
die "ERROR: No valid continue flag\n"; |
|---|
| 771 |
} |
|---|
| 772 |
|
|---|
| 773 |
return $flag, $message; |
|---|
| 774 |
} |
|---|
| 775 |
|
|---|
| 776 |
1; |
|---|