Changeset 198
- Timestamp:
- 04/08/09 14:42:34 (8 months ago)
- Files:
-
- MPI/Install.PL (modified) (1 diff)
- MPI/mpi_evaluator (added)
- MPI/mpi_maker (modified) (2 diffs)
- bin/evaluator (modified) (2 diffs)
- bin/maker (modified) (2 diffs)
- lib/GI.pm (modified) (12 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
MPI/Install.PL
r177 r198 19 19 20 20 system("cp $FindBin::Bin/mpi_maker $FindBin::Bin/../bin/"); 21 system("cp $FindBin::Bin/mpi_evaluator $FindBin::Bin/../bin/"); MPI/mpi_maker
r195 r198 113 113 Usage: 114 114 115 mpi_maker [options] <maker_opts> <maker_bopts> <maker_exe> <evaluator>115 mpi_maker [options] <maker_opts> <maker_bopts> <maker_exe> 116 116 117 117 Maker is a program that produces gene annotations in GFF3 file format using … … 291 291 if (-e "maker_opts.ctl" && 292 292 -e "maker_bopts.ctl" && 293 -e "maker_exe.ctl" && 294 -e "evaluator.ctl" 293 -e "maker_exe.ctl" 295 294 ) { 296 295 297 296 @ctlfiles = ("maker_opts.ctl", 298 297 "maker_bopts.ctl", 299 "maker_exe.ctl", 300 "evaluator.ctl" 298 "maker_exe.ctl" 301 299 ); 302 300 } bin/evaluator
r127 r198 1 #! /usr/bin/perl -w 1 #!/usr/bin/perl -w 2 3 eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}' 4 if 0; # not running under some shell 2 5 3 6 use strict "vars"; … … 5 8 6 9 use FindBin; 7 8 use lib "$FindBin::Bin/../lib" 9 10 use lib "$FindBin::Bin/../lib"; 11 use lib "$FindBin::Bin/../perl/lib"; 12 use vars qw($RANK $LOG $CMD_ARGS); 13 14 BEGIN{ 15 $main::eva = 1; #tells scripts this is evaluator 16 17 if (not ($ENV{CGL_SO_SOURCE})) { 18 $ENV{CGL_SO_SOURCE} = "$FindBin::Bin/../lib/CGL/so.obo"; 19 } 20 if (not ($ENV{CGL_GO_SOURCE})) { 21 $ENV{CGL_GO_SOURCE} = "$FindBin::Bin/../lib/CGL/gene_ontology.obo" 22 } 23 24 $CMD_ARGS = join(' ', @ARGV); 25 26 #what to do on ^C 27 $SIG{'INT'} = sub { 28 print STDERR "\n\nProgram aborted by user!!\n\n"; 29 exit (1); 30 }; 31 32 #supress warnings from storable module 33 $SIG{'__WARN__'} = sub { 34 warn $_[0] if ( $_[0] !~ /Not a CODE reference/ && 35 $_[0] !~ /Can\'t store item CODE/ 36 ); 37 }; 38 39 #output to log file of seq that caused rank to die 40 $SIG{'__DIE__'} = 41 sub { 42 if (defined ($LOG) && defined $_[0]) { 43 my $die_count = $LOG->get_die_count(); 44 $die_count++; 45 46 $LOG->add_entry("DIED","RANK",$RANK); 47 $LOG->add_entry("DIED","COUNT",$die_count); 48 } 49 50 die "#----------------------\n", 51 "FATAL: failed!!\n", 52 "#----------------------\n", 53 $_[0] . "\n"; 54 }; 55 } 56 57 use Cwd; 58 use FileHandle; 59 use File::Path; 10 60 use Getopt::Long; 11 12 use evaluator::gff3_to_phatHit::gff3_classifier; 13 use evaluator::evaluate; 14 use maker_gff; 15 16 #----------------------------------------------------------------------------- 17 #----------------------------------- MAIN ------------------------------------ 18 #----------------------------------------------------------------------------- 61 use File::Temp qw(tempfile tempdir); 62 use Bio::DB::Fasta; 63 use GI; 64 use Dumper::GFF::GFFV3; 65 use Iterator::Any; 66 use Iterator::Fasta; 67 use Iterator::GFF3; 68 use Fasta; 69 use FastaChunker; 70 use maker::auto_annotator; 71 use cluster; 72 use repeat_mask_seq; 73 use runlog; 74 use ds_utility; 75 use GFFDB; 76 use Error qw(:try); 77 use Error::Simple; 78 use Process::MpiChunk; 79 use Process::MpiTiers; 80 81 $| = 1; 82 19 83 my $usage = " 20 21 Synopsis: 22 23 evaluator [options] gff3_type gff3_file fasta_file maker_opts.ctl maker_bopts.ctl maker_exe.ctl 24 25 Description: 26 27 Evaluator will evaluate the quality of a gene annotation based on 28 evidence from EST, cDNA, and protien data. 29 30 ### Add more detail ### 31 32 It should be passed four files on the command line. A gff_file 33 for a single gene. And three control files: maker_opts.ctl, 34 maker_bopts.ctl and maker_exe.ctl. Please see maker documentation 35 to learn more about control file format. 36 37 ### Add more detail ### 84 Usage: 85 mpi_evaluator [options] <eval_opts> <eval_bopts> <eval_exe> 86 38 87 39 88 Options: 40 89 41 -h Help 42 -a Run number. An optional number with which to label this run. Defaults 43 to 0. 44 -m Use maker gff3 file as the input. In this case multiple genes are allowed 45 in one gff file. 90 -genome_gff <file> Specify the maker gff file to evaluate. 91 92 -model_gff <file> Specify the external gff file to evaluate. 93 94 -genome <file> Specify the genome fasta file. This if optional if the 95 fasta entries are also found in the gff file. 96 97 -RM_off|R Turns all repeat masking off. 98 99 -retry <integer> Rerun failed contigs up to the specified count. 100 101 -cpus|c <integer> Tells how many cpus to use for BLAST analysis. 102 103 -force|f Forces program to delete old files before running again. 104 This will require all blast analyses to be rerun. 105 -again|a Caculate all output files again even if no settings have 106 changed. 107 108 -quiet|q Silences most of the status messages. 109 110 -CTL Generate empty control files in the current directory. 111 112 -help|? Prints this usage statement. 113 114 46 115 "; 47 116 48 die $usage if $opt_h; 49 50 $main::error_message = $opt_e; 51 52 my $gff3_file = shift @ARGV; 53 54 55 #---- build the gff3 objects here 56 my $gff3_phat_hits = evaluator::gff3_to_phatHit::gff3_classifier->new ($gff3_file, $gff3_type, $CTL_OPTIONS{'genome'}); 57 my $maker_hits = maker_gff::parse($gff3_file) if $opt_m; 58 59 while (my $eats = shift @{$gff3_phat_hits}) { 60 my $eval = evaluator::evaluate::prepare($fasta, 61 $$masked_fasta, 62 $exonerate_p_hits, 63 $exonerate_e_hits, 64 $blastx_hits, 65 $snaps, 66 $the_void, 67 $snap_command, 68 $snap_flank, 69 $CTL_OPTIONS{'single_exon'}, 70 $eats 71 ); 72 73 print $$eval; 74 } 117 #------------------------------------------------------------------------------- 118 #------------------------------------ MAIN ------------------------------------- 119 #------------------------------------------------------------------------------- 120 121 #---global variables 122 my %OPT; 123 my $rank = 0; 124 my $size = 1; 125 $RANK = $rank; 126 127 #---Process options on the command line 128 try{ 129 GetOptions("RM_off|R" => \$OPT{R}, 130 "force|f" => \$OPT{force}, 131 "genome|g=s" => \$OPT{genome}, 132 "cpus|c=i" => \$OPT{cpus}, 133 "predictor=s" =>\$OPT{predictor}, 134 "retry=i" =>\$OPT{retry}, 135 "evaluate" =>\$OPT{evaluate}, 136 "again|a" =>\$OPT{again}, 137 "quiet" =>\$main::quiet, 138 "CTL" => sub {GI::generate_control_files(); exit(0);}, 139 "help|?" => sub {print $usage; exit(0)} 140 ); 141 } 142 catch Error::Simple with{ 143 my $E = shift; 144 145 print STDERR $E->{-text}; 146 die "\n\nFailed parsing command line options!!\n\n"; 147 }; 148 149 #varibles that are persistent outside of try 150 my %CTL_OPT; 151 my $iterator; 152 my $DS_CTL; 153 my $GFF_DB; 154 my $build; 155 my @failed; 156 157 try{ 158 #get arguments off the command line 159 my @ctlfiles = @ARGV; 160 161 if (not @ctlfiles) { 162 if (-e "eval_opts.ctl" && 163 -e "eval_bopts.ctl" && 164 -e "eval_exe.ctl" 165 ) { 166 167 @ctlfiles = ("eval_opts.ctl", 168 "eval_bopts.ctl", 169 "eval_exe.ctl" 170 ); 171 } 172 else { 173 print STDERR "ERROR: Control files not found\n"; 174 print $usage; 175 exit(0); 176 } 177 } 178 179 #--Control file processing 180 181 #set up control options from control files 182 %CTL_OPT = GI::load_control_files(\@ctlfiles, \%OPT, $size); 183 184 #--open datastructure controller 185 $DS_CTL = ds_utility->new(\%CTL_OPT); 186 187 #--set up gff database 188 $GFF_DB = new GFFDB(\%CTL_OPT); 189 $build = $GFF_DB->next_build; 190 191 #---load genome multifasta/GFF3 file 192 $iterator = new Iterator::Any( -fasta => $CTL_OPT{'genome'}, 193 -gff => $CTL_OPT{'genome_gff'}, 194 ); 195 } 196 catch Error::Simple with{ 197 my $E = shift; 198 print STDERR $E->{-text}; 199 print STDERR "\n\nProgram failed while examining startup data\n", 200 "(control files and input fasta files)!!\n\n"; 201 my $code = 2; 202 $code = $E->{-value} if (defined($E->{-value})); 203 204 exit($code); 205 }; 206 207 my $tier; 208 while (my $fasta = $iterator->nextFasta() || shift @failed){ 209 $tier = Process::MpiTiers->new({fasta =>$fasta, 210 CTL_OPT => \%CTL_OPT, 211 DS_CTL => $DS_CTL, 212 GFF_DB => $GFF_DB, 213 build => $build}, 214 '0' 215 ); 216 217 next if($tier->terminated); 218 $tier->run while(! $tier->terminated && ! $tier->failed); 219 $DS_CTL->add_entry($tier->DS); 220 push(@failed, $tier->fasta) if ($tier->failed); 221 } 222 223 print STDERR "\n\nProgram is now finished!!!\n\n"; 224 75 225 #----------------------------------------------------------------------------- 76 226 #----------------------------------- SUBS ------------------------------------ bin/maker
r195 r198 82 82 Usage: 83 83 84 maker [options] <maker_opts> <maker_bopts> <maker_exe> <evaluator>84 maker [options] <maker_opts> <maker_bopts> <maker_exe> 85 85 86 86 Maker is a program that produces gene annotations in GFF3 file format using … … 195 195 if (-e "maker_opts.ctl" && 196 196 -e "maker_bopts.ctl" && 197 -e "maker_exe.ctl" && 198 -e "evaluator.ctl" 197 -e "maker_exe.ctl" 199 198 ) { 200 199 201 200 @ctlfiles = ("maker_opts.ctl", 202 201 "maker_bopts.ctl", 203 "maker_exe.ctl", 204 "evaluator.ctl" 202 "maker_exe.ctl" 205 203 ); 206 204 } lib/GI.pm
r197 r198 2126 2126 $CTL_OPT{'unmask'} = 1; 2127 2127 $CTL_OPT{'clean_up'} = 0; 2128 #evaluator below here 2129 $CTL_OPT{'side_thre'} = 5; 2130 $CTL_OPT{'eva_window_size'} = 70; 2131 $CTL_OPT{'eva_split_hit'} = 1; 2132 $CTL_OPT{'eva_hspmax'} = 100; 2133 $CTL_OPT{'eva_gspmax'} = 100; 2134 $CTL_OPT{'enable_fathom'} = 0; 2135 $CTL_OPT{'enable_fathom'} = 1 if($main::eva); 2128 2136 } 2129 2137 … … 2149 2157 $CTL_OPT{'en_score_limit'} = 20; 2150 2158 $CTL_OPT{'ep_score_limit'} = 20; 2159 #evaluator below here 2160 $CTL_OPT{'eva_pcov_blastn'} = 0.80; 2161 $CTL_OPT{'eva_pid_blastn'} = 0.85; 2162 $CTL_OPT{'eva_eval_blastn'} = 1e-10; 2163 $CTL_OPT{'eva_bit_blastn'} = 40; 2151 2164 } 2152 2165 … … 2185 2198 #evaluator 2186 2199 if ($type eq 'all' || $type eq 'eva') { 2187 $CTL_OPT{'eva_pcov_blastn'} = 0.80; 2188 $CTL_OPT{'eva_pid_blastn'} = 0.85; 2189 $CTL_OPT{'eva_eval_blastn'} = 1e-10; 2190 $CTL_OPT{'eva_bit_blastn'} = 40; 2191 $CTL_OPT{'side_thre'} = 5; 2192 $CTL_OPT{'eva_window_size'} = 70; 2193 $CTL_OPT{'eva_split_hit'} = 1; 2194 $CTL_OPT{'eva_hspmax'} = 100; 2195 $CTL_OPT{'eva_gspmax'} = 100; 2196 $CTL_OPT{'enable_fathom'} = 0; 2197 $CTL_OPT{'enable_fathom'} = 1 if($main::eva); 2200 2201 2198 2202 } 2199 2203 … … 2254 2258 $CTL_OPT{genome} = $OPT{genome} if (defined $OPT{genome}); 2255 2259 $CTL_OPT{genome_gff} = $OPT{genome_gff} if (defined $OPT{genome_gff}); 2260 $CTL_OPT{model_gff} = $OPT{model_gff} if (defined $OPT{model_gff}); 2256 2261 $CTL_OPT{force} = $OPT{force} if (defined $OPT{force}); 2257 2262 $CTL_OPT{predictor} = $OPT{predictor} if (defined $OPT{predictor}); … … 2380 2385 push (@infiles, '_tblastx', '_formater') if($CTL_OPT{altest}); 2381 2386 push (@infiles, 'genome') if($CTL_OPT{genome}); 2382 push (@infiles, 'genome') if(!$CTL_OPT{genome_gff} );2387 push (@infiles, 'genome') if(!$CTL_OPT{genome_gff} && !$main::eva); 2383 2388 push (@infiles, 'exonerate') if($CTL_OPT{est}); 2384 2389 push (@infiles, 'exonerate') if($CTL_OPT{protein}); … … 2397 2402 push (@infiles, 'twinscan') if (grep (/twinscan/, @{$CTL_OPT{_run}})); 2398 2403 push (@infiles, 'jigsaw') if (grep (/jigsaw/, @{$CTL_OPT{_run}})); 2399 push (@infiles, 'fathom') if ($CTL_OPT{enable_fathom} );2404 push (@infiles, 'fathom') if ($CTL_OPT{enable_fathom} && $CTL_OPT{evaluate}); 2400 2405 push (@infiles, 'rm_gff') if($CTL_OPT{rm_gff}); 2401 2406 push (@infiles, 'est_gff') if($CTL_OPT{est_gff}); 2402 2407 push (@infiles, 'protein_gff') if($CTL_OPT{protein_gff}); 2403 2408 push (@infiles, 'genome_gff') if($CTL_OPT{genome_gff}); 2409 push (@infiles, 'genome_gff') if($main::eva && ! $CTL_OPT{model_gff}); 2404 2410 push (@infiles, 'pred_gff') if($CTL_OPT{pred_gff}); 2405 2411 push (@infiles, 'model_gff') if ($CTL_OPT{model_gff}); 2412 push (@infiles, 'model_gff') if ($main::eva && ! $CTL_OPT{genome_gff}); 2406 2413 push (@infiles, 'model_gff') if (grep (/gff/, $CTL_OPT{predictor}) && 2407 2414 (!$CTL_OPT{genome_gff} || … … 2494 2501 $error .= "The TMP value \'$CTL_OPT{TMP}\' is not a directory or does not exist\n"; 2495 2502 } 2503 if($main::eva && $CTL_OPT{genome_gff} && $CTL_OPT{model_gff}){ #only for evaluator 2504 $error .= "You can only specify a GFF3 file for genome_gff or model_gff no both!!\n"; 2505 } 2496 2506 2497 2507 die $error if ($error); 2498 2508 2499 2509 #--check genome fasta file 2510 my $fasta_gff = ($CTL_OPT{genome_gff}) ? $CTL_OPT{genome_gff} : $CTL_OPT{model_gff}; 2500 2511 my $iterator = new Iterator::Any( -fasta => $CTL_OPT{genome}, 2501 -gff => $ CTL_OPT{genome_gff}2512 -gff => $fasta_gff 2502 2513 ); 2503 2514 2504 2515 if ($iterator->number_of_entries() == 0) { 2505 my $genome = (! $CTL_OPT{genome}) ? $ CTL_OPT{genome_gff}: $CTL_OPT{genome};2516 my $genome = (! $CTL_OPT{genome}) ? $fasta_gff : $CTL_OPT{genome}; 2506 2517 die "ERROR: The file $genome contains no fasta entries\n"; 2507 2518 } … … 2570 2581 open (OUT, "> $dir/maker_opts.ctl") if(!$ev); 2571 2582 print OUT "#-----Genome (Required for De-Novo Annotation)\n" if(!$ev); 2572 print OUT "genome:$O{genome} #genome sequence file in fasta format\n"if(!$ev); 2573 print OUT "\n"if(!$ev); 2583 print OUT "#-----Genome (Required if not internal to GFF3 file)\n" if($ev); 2584 print OUT "genome:$O{genome} #genome sequence file in fasta format\n"; 2585 print OUT "\n"; 2574 2586 print OUT "#-----Re-annotation Options (Only Maker derived GFF3)\n" if(!$ev); 2575 2587 print OUT "#-----Maker Derived GFF3 Annotations to Evaluate (genome fasta is internal to GFF3)\n" if($ev); … … 2586 2598 print OUT "#-----External GFF3 Annotations to Evaluate\n" if($ev); 2587 2599 print OUT "model_gff:$O{model_gff} #gene models from an external gff3 file\n" if($ev); 2588 print OUT "genome:$O{genome} #genome sequence file in fasta format for GFF3\n"if($ev);2589 2600 print OUT "\n"if($ev); 2590 2601 print OUT "#-----EST Evidence (you should provide a value for at least one)\n"; … … 2639 2650 print OUT "clean_up:$O{clean_up} #removes theVoid directory with individual analysis files, 1 = yes, 0 = no\n"; 2640 2651 print OUT "TMP:$O{TMP} #specify a directory other than the system default temporary directory for temporary files\n"; 2652 print OUT "\n"; 2653 print OUT "#-----EVALUATOR Control Options\n"; 2654 print OUT "side_thre:$O{side_thre}\n"; 2655 print OUT "eva_window_size:$O{eva_window_size}\n"; 2656 print OUT "eva_split_hit:$O{eva_split_hit}\n"; 2657 print OUT "eva_hspmax:$O{eva_hspmax}\n"; 2658 print OUT "eva_gspmax:$O{eva_gspmax}\n"; 2659 print OUT "enable_fathom:$O{enable_fathom}\n"; 2641 2660 close (OUT); 2642 2661 … … 2666 2685 print OUT "eval_tblastx:$O{eval_tblastx} #tBlastx eval cutoff\n"; 2667 2686 print OUT "bit_tblastx:$O{bit_tblastx} #tBlastx bit cutoff\n"; 2687 print OUT "\n"; 2688 print OUT "eva_pcov_blastn:$O{eva_pcov_blastn} #Evaluator Blastn Percent Coverage Threshold EST-Genome Alignments\n"; 2689 print OUT "eva_pid_blastn:$O{eva_pid_blastn} #Evaluator Blastn Percent Identity Threshold EST-Genome Alignments\n"; 2690 print OUT "eva_eval_blastn:$O{eva_eval_blastn} #Evaluator Blastn eval cutoff\n"; 2691 print OUT "eva_bit_blastn:$O{eva_bit_blastn} #Evaluator Blastn bit cutoff\n"; 2668 2692 print OUT "\n"; 2669 2693 print OUT "ep_score_limit:$O{ep_score_limit} #exonerate protein percent of maximal score threshold\n"; … … 2695 2719 print OUT "qrna:$O{qrna} #location of qrna executable (not yet implemented)\n"; 2696 2720 print OUT "fathom:$O{fathom} #location of fathom executable (not yet implemented)\n"; 2697 close(OUT); 2698 2699 #--build evaluator.ctl file 2700 open (OUT, "> $dir/evaluator.ctl"); 2701 print OUT "#-----EVALUATOR Control Options\n"; 2702 print OUT "eva_pcov_blastn:$O{eva_pcov_blastn} #Blastn Percent Coverage Threshold EST-Genome Alignments\n"; 2703 print OUT "eva_pid_blastn:$O{eva_pid_blastn} #Blastn Percent Identity Threshold EST-Genome Alignments\n"; 2704 print OUT "eva_eval_blastn:$O{eva_eval_blastn} #Blastn eval cutoff\n"; 2705 print OUT "eva_bit_blastn:$O{eva_bit_blastn} #Blastn bit cutoff\n"; 2706 print OUT "side_thre:$O{side_thre}\n"; 2707 print OUT "eva_window_size:$O{eva_window_size}\n"; 2708 print OUT "eva_split_hit:$O{eva_split_hit}\n"; 2709 print OUT "eva_hspmax:$O{eva_hspmax}\n"; 2710 print OUT "eva_gspmax:$O{eva_gspmax}\n"; 2711 print OUT "enable_fathom:$O{enable_fathom}\n"; 2712 close (OUT); 2721 close(OUT); 2713 2722 } 2714 2723
