Changeset 127
- Timestamp:
- 01/23/09 12:58:16 (10 months ago)
- Files:
-
- MPI/threads-1.71/Makefile (added)
- MPI/threads-1.71/blib (added)
- MPI/threads-1.71/blib/arch (added)
- MPI/threads-1.71/blib/arch/.exists (added)
- MPI/threads-1.71/blib/arch/auto (added)
- MPI/threads-1.71/blib/arch/auto/threads (added)
- MPI/threads-1.71/blib/arch/auto/threads/.exists (added)
- MPI/threads-1.71/blib/arch/auto/threads/threads.bs (added)
- MPI/threads-1.71/blib/arch/auto/threads/threads.bundle (added)
- MPI/threads-1.71/blib/bin (added)
- MPI/threads-1.71/blib/bin/.exists (added)
- MPI/threads-1.71/blib/lib (added)
- MPI/threads-1.71/blib/lib/.exists (added)
- MPI/threads-1.71/blib/lib/auto (added)
- MPI/threads-1.71/blib/lib/auto/threads (added)
- MPI/threads-1.71/blib/lib/auto/threads/.exists (added)
- MPI/threads-1.71/blib/lib/threads.pm (added)
- MPI/threads-1.71/blib/man1 (added)
- MPI/threads-1.71/blib/man1/.exists (added)
- MPI/threads-1.71/blib/man3 (added)
- MPI/threads-1.71/blib/man3/.exists (added)
- MPI/threads-1.71/blib/script (added)
- MPI/threads-1.71/blib/script/.exists (added)
- MPI/threads-1.71/pm_to_blib (added)
- MPI/threads-1.71/threads.bs (added)
- bin/add_utr_gff (deleted)
- bin/add_utr_gff.pl (added)
- bin/evaluator (added)
- bin/gff3_merge (modified) (4 diffs)
- bin/maker (modified) (6 diffs)
- bin/maker2zff (deleted)
- bin/maker2zff.pl (added)
- bin/mpi_maker (added)
- bin/split_fasta (deleted)
- lib/Bio/Search/HSP/PhatHSP/Base.pm (modified) (1 diff)
- lib/Bio/Search/HSP/PhatHSP/augustus.pm (added)
- lib/Bio/Search/HSP/PhatHSP/est2genome.pm (added)
- lib/Bio/Search/HSP/PhatHSP/fgenesh.pm (added)
- lib/Bio/Search/HSP/PhatHSP/gff3.pm (added)
- lib/Bio/Search/HSP/PhatHSP/protein2genome.pm (added)
- lib/Bio/Search/HSP/PhatHSP/repeatmasker.pm (added)
- lib/Bio/Search/HSP/PhatHSP/snap.pm (added)
- lib/Bio/Search/Hit/PhatHit/Base.pm (modified) (24 diffs)
- lib/Bio/Search/Hit/PhatHit/augustus.pm (added)
- lib/Bio/Search/Hit/PhatHit/est2genome.pm (added)
- lib/Bio/Search/Hit/PhatHit/fgenesh.pm (added)
- lib/Bio/Search/Hit/PhatHit/gff3.pm (added)
- lib/Bio/Search/Hit/PhatHit/protein2genome.pm (added)
- lib/Bio/Search/Hit/PhatHit/repeatmasker.pm (added)
- lib/Bio/Search/Hit/PhatHit/snap.pm (added)
- lib/Datastore/docs (deleted)
- lib/Dumper/GFF/GFF3.pm (deleted)
- lib/Dumper/GFF/GFF3_DEF.pm (deleted)
- lib/Dumper/GFF/GFFV3.pm (modified) (18 diffs)
- lib/Dumper/SpaceBase.pm (deleted)
- lib/Error/docs (deleted)
- lib/Fasta.pm (modified) (7 diffs)
- lib/FastaChunker.pm (modified) (2 diffs)
- lib/FastaFile.pm (modified) (1 diff)
- lib/GFFDB.pm (added)
- lib/GI.pm (added)
- lib/Iterator.pm (modified) (1 diff)
- lib/Iterator/Any.pm (added)
- lib/Iterator/Fasta.pm (modified) (6 diffs)
- lib/Iterator/GFF3.pm (added)
- lib/PhatHit.pm (deleted)
- lib/PhatHit_utils.pm (modified) (7 diffs)
- lib/PhatHsp.pm (deleted)
- lib/Process/MakerChunk.pm (deleted)
- lib/Process/MakerTiers.pm (deleted)
- lib/Process/MpiChunk.pm (added)
- lib/Process/MpiTiers.pm (added)
- lib/Shared_Functions.pm (deleted)
- lib/Widget/.snap.pm.swp (deleted)
- lib/Widget/RepeatMasker.pm (modified) (3 diffs)
- lib/Widget/augustus.pm (modified) (8 diffs)
- lib/Widget/blastn.pm (modified) (1 diff)
- lib/Widget/blastx.pm (modified) (1 diff)
- lib/Widget/exonerate/est2genome.pm (modified) (4 diffs)
- lib/Widget/exonerate/est2genome.pm_orig (deleted)
- lib/Widget/exonerate/est2genome.pm_rev (deleted)
- lib/Widget/exonerate/protein2genome.pm (modified) (3 diffs)
- lib/Widget/exonerate/protein2genome.pm_orig (deleted)
- lib/Widget/exonerate/protein2genome.pm_rev (deleted)
- lib/Widget/fgenesh.pm (modified) (16 diffs)
- lib/Widget/formatdb.pm (added)
- lib/Widget/pressdb.pm (added)
- lib/Widget/setdb.pm (added)
- lib/Widget/snap.pm (modified) (7 diffs)
- lib/Widget/tblastx.pm (modified) (1 diff)
- lib/Widget/xdformat.pm (modified) (1 diff)
- lib/augustus (deleted)
- lib/clean.pm (modified) (3 diffs)
- lib/cluster.pm (modified) (12 diffs)
- lib/compare.pm (modified) (1 diff)
- lib/ds_utility.pm (added)
- lib/evaluator/evaluate.pm (modified) (6 diffs)
- lib/evaluator/funs.pm (modified) (3 diffs)
- lib/evaluator/gff3_to_phatHit (added)
- lib/evaluator/gff3_to_phatHit/FlyBase.pm (added)
- lib/evaluator/gff3_to_phatHit/Maker.pm (added)
- lib/evaluator/gff3_to_phatHit/WormBase.pm (added)
- lib/evaluator/gff3_to_phatHit/gff3_classifier.pm (added)
- lib/evaluator/hey_gff3_to_phatHit.pm (added)
- lib/evaluator/maker_gff.pm (added)
- lib/evaluator/master_codes (added)
- lib/evaluator/quality_index.pm (added)
- lib/evaluator/quality_seq.pm (modified) (5 diffs)
- lib/exonerate/PhatHSP (deleted)
- lib/exonerate/PhatHit (deleted)
- lib/exonerate/splice_info.pm (modified) (2 diffs)
- lib/external_gff (added)
- lib/external_gff/PhatHit.pm (added)
- lib/external_gff/PhatHsp.pm (added)
- lib/fgenesh (deleted)
- lib/maker/auto_annotator.pm (modified) (14 diffs)
- lib/maker/join.pm (modified) (4 diffs)
- lib/maker/quality_index.pm (modified) (3 diffs)
- lib/polisher.pm (modified) (2 diffs)
- lib/polisher/exonerate/est.pm (modified) (3 diffs)
- lib/polisher/exonerate/protein.pm (modified) (3 diffs)
- lib/repeat_mask_seq.pm (modified) (4 diffs)
- lib/repeatmasker (deleted)
- lib/runlog.pm (modified) (22 diffs)
- lib/snap (deleted)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
bin/gff3_merge
r116 r127 1 #! /usr/local/ bin/perl -w1 #! /usr/local/perl -w 2 2 3 3 use strict; … … 5 5 use File::Temp qw(tempfile); 6 6 7 my $usage = " 8 Usage: 9 gff3_merge <file1.gff> <file2.gff> -o <outfile.gff> 10 gff3_merge -d <datastore.index> -o <outfile.gff> 11 12 This script merges multiple small gff3 files into a single large gff3 file. 13 14 Options: 15 16 -d <file_name> Give a maker datastore file that gives the location of all gff3 files 17 -o <file_name> Specify the output file for the combined gff3 file (required) 18 19 "; 7 my $usage = ""; 20 8 21 9 my $datastore; … … 23 11 my $outfile; 24 12 25 GetOptions ("datastor|d=s" => \$datastore, 13 GetOptions ("datastor|d" => \$datastore, 14 "i=s" => \@files, 26 15 "o=s" => \$outfile, 27 16 "help|?" => sub{print $usage; exit();} 28 17 ); 29 18 30 @files = @ARGV unless($datastore); 31 32 if(! $datastore && ! @files || ! $outfile){ 19 if(! $datastore && ! @files){ 33 20 print $usage; 34 21 exit(); … … 42 29 @files = grep (/FINISHED/, @files); 43 30 44 my %uniq;45 31 foreach my $file (@files){ 46 chomp($file); 47 $file =~ s/^[^\t]+\t([^\t]+)\tFINISHED/$1/; 48 $file =~ /([^\/]+)\/$/; 32 $file =~ /([^\/]+)$/; 49 33 $file = "$file/$1.gff"; 50 $uniq{$file}++;51 34 } 52 @files = keys %uniq;53 35 } 54 36 bin/maker
r123 r127 6 6 use FindBin; 7 7 use lib "$FindBin::Bin/../lib"; 8 use vars qw($LOG $CMD_ARGS); 9 use File::Temp qw(tempfile tempdir); 10 8 use vars qw($LOG $RANK); 9 use Error qw(:warndie); 11 10 BEGIN{ 12 $ENV{CGL_SO_SOURCE} = "$FindBin::Bin/../lib/CGL/so.obo" if not ($ENV{CGL_SO_SOURCE}); 13 $ENV{CGL_GO_SOURCE} = "$FindBin::Bin/../lib/CGL/gene_ontology.obo" if not ($ENV{CGL_GO_SOURCE}); 14 $CMD_ARGS = join(' ', @ARGV); 15 11 $RANK = 'non_mpi'; #default value of rank 12 13 if (not ($ENV{CGL_SO_SOURCE})) { 14 $ENV{CGL_SO_SOURCE} = "$FindBin::Bin/../lib/CGL/so.obo"; 15 } 16 if (not ($ENV{CGL_GO_SOURCE})) { 17 $ENV{CGL_GO_SOURCE} = "$FindBin::Bin/../lib/CGL/gene_ontology.obo" 18 } 19 20 #what to do on ^C 16 21 $SIG{'INT'} = sub { 17 22 print STDERR "\n\nMaker aborted by user!!\n\n"; … … 19 24 }; 20 25 21 #output to log file of seq that caused rank to die 22 $SIG{'__DIE__'} = 23 sub { 26 #output to log file of seq that caused rank to die 27 $SIG{'__DIE__'} = sub { 24 28 if (defined ($LOG) && defined $_[0]) { 25 29 my $die_count = $LOG->get_die_count(); 26 30 $die_count++; 27 28 $LOG->add_entry("DIED","RANK", "non_mpi");31 32 $LOG->add_entry("DIED","RANK",$RANK); 29 33 $LOG->add_entry("DIED","COUNT",$die_count); 30 34 31 35 } 32 36 die "#----------------------\n", … … 34 38 "#----------------------\n", 35 39 $_[0] . "\n"; 40 36 41 }; 37 42 } 38 43 39 use Error qw(:try); 40 use Error::Simple; 44 use Cwd; 45 use Storable; 46 use FileHandle; 47 use File::Path; 48 use URI::Escape; 49 use Getopt::Long; 50 use File::Temp qw(tempfile tempdir); 51 use Datastore::MD5; 52 use PostData; 53 use Bio::DB::Fasta; 54 use GI; 41 55 use Dumper::GFF::GFFV3; 42 use Dumper::XML::Game; 43 use Datastore::MD5; 44 use URI::Escape; 45 use Storable; 46 use File::Path; 47 use Data::Dumper; 48 use Getopt::Long; 49 use FileHandle; 50 use PostData; 51 use Cwd; 56 use Iterator::Any; 57 use Iterator::Fasta; 58 use Iterator::GFF3; 52 59 use Fasta; 53 use Iterator::Fasta;54 60 use FastaChunker; 55 use Widget::RepeatMasker;56 use Widget::blastx;57 use Widget::tblastx;58 use Widget::blastn;59 use Widget::snap;60 use Widget::augustus;61 use PhatHit_utils;62 use Shadower;63 use Bio::DB::Fasta;64 use polisher::exonerate::protein;65 use polisher::exonerate::est;66 61 use maker::auto_annotator; 67 62 use cluster; 68 63 use repeat_mask_seq; 69 use maker::sens_spec;70 64 use runlog; 71 use Shared_Functions; 65 use ds_utility; 66 use GFFDB; 72 67 73 68 $| = 1; … … 76 71 Usage: 77 72 78 maker [options] <maker_opts.ctl> <maker_bopts.ctl> <maker_exe.ctl> 79 80 The three input arguments are user control files that specify how maker should behave. 81 All input files listed in the control options files must be in fasta format. Please 82 see maker documentation to learn more about control file format. The program will 83 automatically try and locate the user control files in the current working 84 directory if these arguments are not supplied when initializing maker. 85 86 It is important to note that maker does not try and recalculated data that it has 87 already calculated. For example, if you run an analysis twice on the same fasta file 88 you will notice that maker does not rerun any of the blast analyses but instead uses 89 the blast analyses stored from the previous run. To force maker to rerun all 90 analyses, use the -f flag. 73 maker [options] <maker_opts> <maker_bopts> <maker_exe> <evaluator> 74 75 Maker is a program that produces gene annotations in gff3 file format using 76 evidence such as EST alignments and protein homology. Maker can be used to 77 produce gene annotations for new genome as well as update annoations from 78 existing genome databases. 79 80 The four input arguments are user control files that specify how maker 81 should behave. All options for maker should be set in the control files, 82 but a few can also be set on the command line. The evaluator_opts.ctl 83 file contains control options specific for the evaluation of gene 84 annotations. 85 86 Input files listed in the control options files must be in fasta format. 87 Please see maker documentation to learn more about control file format. 88 The program will automatically try and locate the user control files in the 89 current working directory if these arguments are not supplied when 90 initializing maker. 91 92 It is important to note that maker does not try and recalculated data that 93 it has already calculated. For example, if you run an analysis twice on 94 the same fasta file you will notice that maker does not rerun any of the 95 blast analyses but instead uses the blast analyses stored from the previous 96 run. To force maker to rerun all analyses, use the -f flag. 97 91 98 92 99 Options: 93 100 94 -genome|g <file_name> Give MAKER a different genome file (this overrides the 95 control file value) 96 97 -predictor <snap> Selects the gene predictor to use when building annotations (Default 98 <augustus> is 'snap'). The option 'est2genome' builds annotations directly 99 <est2genome> from the EST evidence. This can also be set in the control files. 100 101 -RM_off|R Turns all repeat masking off (* See Warning) 102 103 -force|f Forces maker to rerun all analyses (erases all previous output). 104 105 -datastore|d Causes output to be written using datastore. This option is 106 automatically enabled if there are more than 1000 fasta entries in 107 the input genome file. Output can then be accessed using the 108 master_datastore_index file created by the maker. 109 110 -PREDS Outputs ab-initio predictions that do not overlap maker annotation 111 as gene annotations in the final gff3 output file (based on the 112 -predictor flag ). 113 114 -CTL Generates generic control files in the current working directory. 115 116 -quiet Silences most of the status messages. 117 118 -retry <integer> Re-run failed contigs up to the specified number of re-tries. 119 120 -cpus|c <integer> Tells how many cpus to use for Blast analysis (this overrides 121 contorol file value). 122 123 -help|? Prints this usage statement. 124 125 126 Warning: 127 128 *When using the -R flag, maker expects that the input genome file is already masked. Also 129 if your genome file contains lower case characters, maker will consider those characers 130 to be soft masked. So if your fasta file is all in lower case, nothing will align to it, 131 and there will be no maker output. 101 -genome|g <filename> Give MAKER a different genome file (this overrides 102 the control file value). 103 104 -predictor|p <type> Selects the gene predictor/predictors to use when 105 building annotations (this overrides the control 106 file value). Use a ',' to seperate types (no 107 spaces), i.e. -predictor=snap,augustus,fgenesh 108 109 Types: snap 110 augustus 111 fgenesh 112 twinscan 113 est2genome (Uses EST's directly) 114 gff (Passes through gff3 file annotations) 115 116 -RM_off|R Turns all repeat masking off. When using this 117 flag, maker expects that the input genome file is 118 already masked (This overrides all repeatmasking 119 control file values). 120 121 -datastore|d Causes output to be written using a datastore. This 122 option is automatically enabled if there are more 123 than 1000 fasta entries in the input genome file. 124 Output can then be accessed using the 125 master_datastore_index file created by the maker 126 (this overrides the control file value). 127 128 -retry <integer> Re-run failed contigs up to the specified number of 129 re-tries (This overrides the control file value). 130 131 -cpus|c <integer> Tells how many cpus to use for BLAST analysis (this 132 overrides contorol file value). 133 134 -force|f Forces maker to rerun all analyses (erases all 135 previous output). 136 137 -quiet|q Silences most of the status messages. 138 139 -CTL Generates generic control files in the current 140 working directory. 141 142 -help|? Prints this usage statement. 143 132 144 133 145 "; 134 146 135 #variables that are persistent outside of try block136 147 my %OPT; 137 my %CTL_OPTIONS; 138 139 try{ 140 #--Process arguments and the command line 141 GetOptions("RM_off|R" => \$OPT{R}, 142 "force|f" => \$OPT{f}, 143 "datastore|d" => \$OPT{d}, 144 "genome|g=s" => \$OPT{g}, 145 "cpus|c=i" => \$OPT{c}, 146 "PREDS" => \$OPT{PREDS}, 147 "predictor=s" =>\$OPT{predictor}, 148 "retry=i" =>\$OPT{retry}, 149 "quiet" => \$main::quiet, 150 "CTL" => sub {Shared_Functions::generate_control_files(); exit(0);}, 151 "help|?" => sub {print $usage; exit(0)}, 152 ); 153 154 if (defined($OPT{retry}) && $OPT{retry} <= 0) { 155 print STDERR "WARNING: the retry flag must be set to a value greater than zero\n"; 156 $OPT{retry} = 1; 148 149 #--Process arguments and the command line 150 GetOptions("RM_off|R" => \$OPT{R}, 151 "force|f" => \$OPT{f}, 152 "datastore|d" => \$OPT{d}, 153 "genome|g=s" => \$OPT{g}, 154 "cpus|c=i" => \$OPT{c}, 155 "predictor=s" =>\$OPT{predictor}, 156 "retry=i" =>\$OPT{retry}, 157 "quiet" => \$main::quiet, 158 "CTL" => sub {GI::generate_control_files(); exit(0);}, 159 "help|?" => sub {print $usage; exit(0)}, 160 ); 161 162 #------------------------------------------------------------------------------# 163 #------------------------------------ MAIN ------------------------------------# 164 #------------------------------------------------------------------------------# 165 166 #---get arguments off the command line 167 my $infile; 168 169 my @ctlfiles = @ARGV; 170 171 if (not @ctlfiles) { 172 if (-e "maker_opts.ctl" && 173 -e "maker_bopts.ctl" && 174 -e "maker_exe.ctl" && 175 -e "evaluator.ctl" 176 ) { 177 178 @ctlfiles = ("maker_opts.ctl", 179 "maker_bopts.ctl", 180 "maker_exe.ctl", 181 "evaluator.ctl" 182 ); 183 } 184 else { 185 print STDERR "ERROR: Control files not found\n"; 186 print $usage; 187 exit(0); 157 188 } 158 189 } 159 catch Error::Simple with{ 160 my $E = shift; 161 162 print STDERR $E->{-text}; 163 print STDERR "\n\nMaker failed parsing command line options!!\n\n"; 164 my $code = 2; 165 $code = $E->{-value} if (defined($E->{-value})); 166 167 exit($code); 168 }; 169 170 #----------------------------------------------------------------------------- 171 #----------------------------------- MAIN ------------------------------------ 172 #----------------------------------------------------------------------------- 173 174 #variables that are persistent outside of try block 175 my $fasta_iterator; 176 my $DS_FH; 177 178 try{ 179 #---get arguments off the command line 180 my $infile; 181 182 my @ctlfiles = @ARGV; 183 184 if (not @ctlfiles) { 185 if (-e "maker_opts.ctl" && -e "maker_bopts.ctl" && -e "maker_exe.ctl") { 186 @ctlfiles = ("maker_opts.ctl","maker_bopts.ctl","maker_exe.ctl"); 187 } 188 else { 189 print STDERR "ERROR: Control files not found\n"; 190 print $usage; 191 exit(0); 192 } 193 } 194 195 #---set up control options from control files 196 %CTL_OPTIONS = Shared_Functions::load_control_files(\@ctlfiles, \%OPT); 197 198 #---load genome fasta file 199 $fasta_iterator = new Iterator::Fasta($CTL_OPTIONS{'genome'}); 200 201 if ($fasta_iterator->number_of_entries() == 0) { 202 die "ERROR: The genome file $CTL_OPTIONS{'genome'} contains no fasta sequences\n"; 203 } 204 205 #---decide whether to use datastore 206 if ($fasta_iterator->number_of_entries() > 1000 && ! $OPT{d}) { 207 print STDERR "\n\n". 208 "WARNING: There are more than 1000 entries in the multi-fasta file.\n". 209 "Datastore will be used to avoid overloading the data structure of\n". 210 "the output directory.\n\n"; 211 212 $OPT{d} = 1; 213 } 214 215 #alter control options to use datastore 216 if ($OPT{d}) { 217 %CTL_OPTIONS = Shared_Functions::build_datastore(\%CTL_OPTIONS); 218 219 $DS_FH = new FileHandle(); 220 $DS_FH->open("> $CTL_OPTIONS{'dsindex'}"); 221 $DS_FH->autoflush(1); 222 } 223 224 #---set up blast databases for analyisis 225 Shared_Functions::create_blastdb(\%CTL_OPTIONS, \%OPT); 226 } 227 catch Error::Simple with{ 228 my $E = shift; 229 print STDERR $E->{-text}; 230 print STDERR "\n\nMaker failed while examining startup data\n", 231 "(control files and input fasta files)!!\n\n"; 232 my $code = 2; 233 $code = $E->{-value} if (defined($E->{-value})); 234 235 exit($code); 236 }; 237 238 my @failed; #holds failed contigs; 239 190 191 #---set up control options from control files 192 my %CTL_OPT = GI::load_control_files(\@ctlfiles, \%OPT); 193 194 #--open datastructure controller 195 my $DS_CTL = ds_utility->new(\%CTL_OPT); 196 197 #--set up gff database 198 my $GFF_DB = new GFFDB(\%CTL_OPT); 199 200 #---load genome multifasta/GFF3 file 201 my $iterator = new Iterator::Any( -fasta => $CTL_OPT{'genome'}, 202 -gff => $CTL_OPT{'genome_gff'}, 203 ); 204 240 205 #---iterate over each sequence in the fasta 241 CONTIG: while (my $fasta = $fasta_iterator->nextEntry() || shift @failed) { 242 next if (); 243 my $failed_contig = 0; #set failure flag to false 244 $LOG = undef; 245 246 #variables that are persistent outside of try block 247 my $query_def; 248 my $query_seq; 249 my $seq_id; 250 my $seq_out_name; 251 my $out_dir; 252 my $the_void; 253 254 try{ 255 #get fasta parts 256 $query_def = Fasta::getDef($fasta); #Get fasta header 257 $query_seq = Fasta::getSeq($fasta); #Get fasta sequence 258 ($seq_id) = $query_def =~ /^>(\S+)/; #Get sequence identifier off of fasta header 259 260 #-build a safe name for file names from the sequence identifier 261 $seq_out_name = uri_escape($seq_id, 262 '\*\?\|\\\/\'\"\{\}\<\>\;\,\^\(\)\$\~\:' 263 ); 264 265 #set up base directory for output 266 $out_dir = $CTL_OPTIONS{'out_base'}; 267 268 #use datastore as base if datastore flag is set 269 if ($OPT{d}) { 270 $out_dir = $CTL_OPTIONS{'datastore'}->id_to_dir($seq_out_name); 271 $CTL_OPTIONS{'datastore'}->mkdir($seq_out_name) || 272 die "ERROR: could not make directory $out_dir\n"; 273 } 274 275 #-set up void directory where analysis is stored 276 $the_void = Shared_Functions::build_the_void($seq_out_name, $out_dir); 277 } 278 catch Error::Simple with{ 279 my $E = shift; 280 281 print STDERR $E->{-text}; 282 print STDERR "\n\nMaker failed while examining contents of the fasta file!!\n\n"; 283 my $code = 2; 284 $code = $E->{-value} if (defined($E->{-value})); 285 286 exit($code); 287 }; 288 289 try{ 290 #-build and proccess the run log 291 $LOG = runlog->new(\%CTL_OPTIONS, \%OPT, $the_void, "run.log"); 292 } 293 catch Error::Simple with{ 294 my $E = shift; 295 296 print STDERR $E->{-text}; 297 print STDERR "\n\nMaker failed while trying to building/processing the run.log file!!\n\n"; 298 my $code = 2; 299 $code = $E->{-value} if (defined($E->{-value})); 300 301 exit($code); 302 }; 303 304 #skip contig if too short 305 if (length($$query_seq) < $CTL_OPTIONS{min_contig}){ 306 print STDERR "#---------------------------------------------------------------------\n", 307 "Skipping the contig \'$seq_id\' because it is too small\n", 308 "#---------------------------------------------------------------------\n\n\n"; 309 310 if ($OPT{d}) { 311 print $DS_FH "$seq_id\t$out_dir\tSKIPPED_SMALL\n"; 312 } 313 314 next; 315 } 316 317 #==Decide whether to skip the current contig based on log 318 my $continue_flag = $LOG->get_continue_flag(); 319 if ($continue_flag == 1) { 320 print STDERR "#---------------------------------------------------------------------\n", 321 "Now starting the contig:$seq_id!!\n", 322 "#---------------------------------------------------------------------\n\n\n"; 323 324 if ($OPT{d}) { 325 print $DS_FH "$seq_id\t$out_dir\tSTARTED\n"; 326 } 327 } 328 elsif ($continue_flag == 0) { 329 print STDERR "#---------------------------------------------------------------------\n", 330 "The contig:$seq_id has already been processed!!\n", 331 "Maker will now skip to the next contig.\n", 332 "Run maker with the -f flag to force Maker to recompute all contig data.\n", 333 "#---------------------------------------------------------------------\n\n\n"; 334 335 if ($OPT{d}) { 336 print $DS_FH "$seq_id\t$out_dir\tFINISHED\n"; 337 } 338 339 next; 340 } 341 elsif ($continue_flag == -1) { 342 print STDERR "#---------------------------------------------------------------------\n", 343 "The contig:$seq_id failed on the last run!!\n", 344 "Maker will now skip to the next contig rather than try again.\n", 345 "Run maker with the -f flag to force Maker to recompute all contig data.\n", 346 "Run maker with the -died flag to have Maker retry data that failed.\n", 347 "#---------------------------------------------------------------------\n\n\n"; 348 349 if ($OPT{d}) { 350 print $DS_FH "$seq_id\t$out_dir\tDIED_SKIPPED\n"; 351 } 352 353 next; 354 } 355 elsif ($continue_flag == 2) { 356 print STDERR "#---------------------------------------------------------------------\n", 357 "The failed contig:$seq_id will now run again!!\n", 358 "#---------------------------------------------------------------------\n\n\n"; 359 360 if ($OPT{d}) { 361 print $DS_FH "$seq_id\t$out_dir\tRETRY\n"; 362 } 363 } 364 elsif ($continue_flag == -2) { 365 print STDERR "#---------------------------------------------------------------------\n", 366 "Skipping the contig:$seq_id!!\n", 367 "However this contig is still not finished!!\n", 368 "#---------------------------------------------------------------------\n\n\n"; 369 370 if ($OPT{d}) { 371 print $DS_FH "$seq_id\t$out_dir\tSKIPPED\n"; 372 } 373 374 next; 375 } 376 elsif ($continue_flag == -3) { 377 my $die_count = $LOG->get_die_count(); 378 print STDERR "#---------------------------------------------------------------------\n", 379 "The contig:$seq_id failed $die_count time!!\n", 380 "Maker will not try again!!\n", 381 "The contig will be stored in $out_dir/$seq_out_name.died.fasta\n", 382 "You can use this fasta file to debug and re-run this sequence\n", 383 "#---------------------------------------------------------------------\n\n\n"; 384 385 open (D_FASTA, "> $out_dir/$seq_out_name.died.fasta"); 386 print D_FASTA $fasta; 387 close (D_FASTA); 388 389 if ($OPT{d}) { 390 print $DS_FH "$seq_id\t$out_dir\tDIED_SKIPPED_PERMANENT\n"; 391 } 392 393 next; 394 } 395 206 while (my $fasta = $iterator->nextFasta()) { 207 $LOG = undef; #reset global variable LOG with each contig 208 209 #build uppercase fasta 210 my $fasta = Fasta::ucFasta(\$fasta); 211 212 #get fasta parts 213 my $q_def = Fasta::getDef(\$fasta); #Get fasta header 214 my $q_seq_ref = Fasta::getSeqRef(\$fasta); #Get reference to fasta sequence 215 my $seq_id = Fasta::def2SeqID($q_def); #Get sequence identifier 216 my $safe_seq_id = Fasta::seqID2SafeID($seq_id); #Get safe named identifier 217 218 #set up base and void directories for output 219 my ($out_dir, $the_void) = $DS_CTL->seq_dirs($seq_id); 220 221 #-build and proccess the run log 222 $LOG = runlog->new( \%CTL_OPT, 223 { 'seq_id' => $seq_id, 224 'seq_length' => length($$q_seq_ref), 225 'out_dir' => $out_dir, 226 'the_void' => $the_void, 227 'fasta_ref' => \$fasta 228 }, 229 "$the_void/run.log" 230 ); 231 232 my ($c_flag, $message) = $LOG->get_continue_flag(); 233 $DS_CTL->add_entry($seq_id, $out_dir, $message); 234 235 next if(! $c_flag || $c_flag <= 0); 236 396 237 #==from here on fastas are proccessed as chunks 397 238 398 239 #-set up variables that are heldover from last chunk 399 240 my $holdover_blastn; 400 241 my $holdover_blastx; 401 242 my $holdover_tblastx; 402 my $holdover_preds; 403 243 my $holdover_pred; 244 my $holdover_est_gff; 245 my $holdover_altest_gff; 246 my $holdover_prot_gff; 247 my $holdover_pred_gff; 248 my $holdover_model_gff; 249 404 250 #-set up variables that are the result of chunk accumulation 405 251 my $masked_total_seq; 406 252 my $p_fastas; 407 253 my $t_fastas; 408 409 my $GFF3 = new Dumper::GFF::GFFV3(); 410 $GFF3->seq($query_seq); 411 $GFF3->seq_id($seq_id); 254 255 my $build = $GFF_DB->next_build; 256 my $GFF3 = Dumper::GFF::GFFV3->new("$out_dir/$safe_seq_id.gff", 257 $build, 258 $the_void 259 ); 260 $GFF3->set_current_contig($seq_id, $q_seq_ref); 412 261 413 262 #==REPEAT MASKING HERE 414 415 try{ #coresponds to levels 0-3 in mpi_maker 416 if ($OPT{R}) { 417 print STDERR "Repeatmasking skipped!!\n"; 418 $masked_total_seq = $$query_seq; 419 } 420 elsif ($CTL_OPTIONS{rm_gff}) { 421 $masked_total_seq = repeat_mask_seq::gff(uc($$query_seq), 422 $seq_id, 423 $CTL_OPTIONS{'rm_gff'} 424 ); 425 } 426 else { 427 my $fasta_chunker = new FastaChunker(); 428 $fasta_chunker->parent_fasta($fasta); 429 $fasta_chunker->chunk_size($CTL_OPTIONS{'max_dna_len'}); 430 $fasta_chunker->min_size($CTL_OPTIONS{'split_hit'}); 431 $fasta_chunker->load_chunks(); 263 my $fasta_chunker = new FastaChunker(); 264 $fasta_chunker->parent_fasta($fasta); 265 $fasta_chunker->chunk_size($CTL_OPT{max_dna_len}); 266 $fasta_chunker->min_size($CTL_OPT{split_hit}); 267 $fasta_chunker->load_chunks(); 268 my $chunk_count = 0; 269 270 while (my $chunk = $fasta_chunker->get_chunk($chunk_count++)) { 271 #-- repeatmask with gff3 input 272 my $rm_gff_keepers = []; 273 if ($CTL_OPT{go_gffdb}) { 274 $rm_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, 275 $q_seq_ref, 276 'repeat' 277 ); 278 #mask the chunk 279 $chunk = repeat_mask_seq::mask_chunk($chunk, $rm_gff_keepers); 280 } 281 282 #-- repeatmask with RepeatMasker 283 my $rm_rb_keepers = []; #repeat masker RepBase 284 if ($CTL_OPT{model_org}) { #model organism repeats 285 $rm_rb_keepers = GI::repeatmask($chunk, 286 $the_void, 287 $safe_seq_id, 288 $CTL_OPT{model_org}, 289 $CTL_OPT{RepeatMasker}, 290 '', 291 $CTL_OPT{cpus}, 292 $CTL_OPT{force}, 293 $LOG 294 ); 432 295 433 my $chunk_count = 0; 296 #mask the chunk 297 $chunk = repeat_mask_seq::mask_chunk($chunk, $rm_rb_keepers); 298 } 299 my $rm_sp_keepers = []; #repeat masker species 300 if ($CTL_OPT{rmlib}) { #species specific repeats; 301 $rm_sp_keepers = GI::repeatmask($chunk, 302 $the_void, 303 $safe_seq_id, 304 $CTL_OPT{model_org}, 305 $CTL_OPT{RepeatMasker}, 306 $CTL_OPT{rmlib}, 307 $CTL_OPT{cpus}, 308 $CTL_OPT{force}, 309 $LOG 310 ); 434 311 435 while (my $chunk = $fasta_chunker->get_chunk($chunk_count++)) { 436 $chunk->seq(uc($chunk->seq())); #must be upper case before soft masking 437 438 my $rma_keepers = []; 439 440 #-- repeatmask the input file 441 if(! $CTL_OPTIONS{rmlib_only}){ 442 $chunk->seq(uc($chunk->seq())); #must be upper case before soft masking 443 444 my $rma_keepers1 = Shared_Functions::repeatmask($chunk, 445 $the_void, 446 $seq_out_name, 447 $CTL_OPTIONS{'model_org'}, 448 $CTL_OPTIONS{'RepeatMasker'}, 449 '', 450 $CTL_OPTIONS{'cpus'}, 451 $OPT{f}, 452 $LOG 453 ); 454 push(@{$rma_keepers}, @{$rma_keepers1}); 455 456 #-mask the chunk using repeatmasker hits 457 $chunk = repeat_mask_seq::mask_chunk($chunk, $rma_keepers1); 458 } 459 460 #-mask species specific repeats; 461 if($CTL_OPTIONS{rmlib}){ 462 my $rma_keepers2 = Shared_Functions::repeatmask($chunk, 463 $the_void, 464 $seq_out_name, 465 $CTL_OPTIONS{'model_org'}, 466 $CTL_OPTIONS{'RepeatMasker'}, 467 $CTL_OPTIONS{'rmlib'}, 468 $CTL_OPTIONS{'cpus'}, 469 $OPT{f}, 470 $LOG 471 ); 472 push(@{$rma_keepers}, @{$rma_keepers2}); 473 474 #-mask the chunk using repeatmasker hits 475 $chunk = repeat_mask_seq::mask_chunk($chunk, $rma_keepers2); 476 } 477 478 #-blastx against a repeat library (for better masking) 479 my $repeat_blastx_keepers = []; 480 $repeat_blastx_keepers = Shared_Functions::blastx($chunk, 481 $CTL_OPTIONS{'repeat_protein'}, 482 $the_void, 483 $seq_out_name, 484 $CTL_OPTIONS{blastx}, 485 $CTL_OPTIONS{eval_blastx}, 486 $CTL_OPTIONS{bit_blastx}, 487 $CTL_OPTIONS{percov_blastx}, 488 $CTL_OPTIONS{percid_blastx}, 489 $CTL_OPTIONS{split_hit}, 490 $CTL_OPTIONS{cpus}, 491 $OPT{f}, 492 $LOG 493 ) if($CTL_OPTIONS{'repeat_protein'}); 494 495 #-mask the chunk using blastx hits 496 $chunk = repeat_mask_seq::mask_chunk($chunk, $repeat_blastx_keepers); 497 498 #-combine and cluster blastx and repeatmasker hits 499 #-to get consensus repeat hits for gff3 and XML annotations 500 my $rm_keepers = repeat_mask_seq::process($rma_keepers, 501 $repeat_blastx_keepers, 502 $query_seq 503 ); 504 505 #-add repeats to GFF3 506 $GFF3->repeat_hits($rm_keepers); 507 508 #-build/fill big masked sequence 509 $masked_total_seq .= $chunk->seq(); 312 #mask the chunk 313 $chunk = repeat_mask_seq::mask_chunk($chunk, $rm_sp_keepers); 314 } 315 316 #-- blastx against a repeat library (for better masking) 317 my $rm_blastx_keepers = []; 318 if ($CTL_OPT{_repeat_protein}) { 319 my $res_dir; 320 foreach my $db (@{$CTL_OPT{r_db}}) { 321 $res_dir = 322 GI::blastx_as_chunks($chunk, 323 $db, 324 $the_void, 325 $safe_seq_id, 326 $CTL_OPT{_blastx}, 327 $CTL_OPT{eval_rm_blastx}, 328 $CTL_OPT{split_hit}, 329 $CTL_OPT{cpus}, 330 $CTL_OPT{_repeat_protein}, 331 $CTL_OPT{_formater}, 332 0, 333 $CTL_OPT{force}, 334 $LOG, 335 1 336 ); 510 337 } 511 } 338 $rm_blastx_keepers = GI::collect_blastx($chunk, 339 $res_dir, 340 $CTL_OPT{eval_rm_blastx}, 341 $CTL_OPT{bit_rm_blastx}, 342 $CTL_OPT{pcov_rm_blastx}, 343 $CTL_OPT{pid_rm_blastx}, 344 $CTL_OPT{split_hit}, 345 $CTL_OPT{force}, 346 $LOG 347 ); 348 349 #mask the chunk 350 $chunk = repeat_mask_seq::mask_chunk($chunk, $rm_blastx_keepers); 351 } 352 353 #-combine and cluster repeat hits for consensus 354 my $rm_keepers = repeat_mask_seq::process($rm_gff_keepers, 355 $rm_rb_keepers, 356 $rm_sp_keepers, 357 $rm_blastx_keepers, 358 $q_seq_ref 359 ); 360 361 #-add repeats to GFF3 362 $GFF3->add_repeat_hits($rm_keepers); 363 364 #-build big masked sequence 365 $masked_total_seq .= $chunk->seq(); 512 366 } 513 catch Error::Simple with{ 514 my $E = shift; 515 516 print STDERR $E->{-text}; 517 print STDERR "\n\nMaker failed while repeat masking!!\n"; 518 print STDERR "FAILED CONTIG:$seq_id\n\n"; 519 520 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 521 522 my $die_count = $LOG->get_die_count(); 523 $die_count++; 524 525 $LOG->add_entry("DIED","RANK","non_mpi"); 526 $LOG->add_entry("DIED","COUNT",$die_count); 527 $failed_contig = 1; 528 push(@failed, $fasta); 529 }; 530 531 next CONTIG if ($failed_contig); 532 533 #variables that are persistent outside of try block 534 my $masked_fasta; 535 my $snaps; 536 my $augus; 537 my $fasta_chunker; 538 my $chunk_count; 539 my $proteins; 540 my $transcripts; 541 my $alt_ests; 542 my $fasta_t_index; 543 my $fasta_p_index; 544 my $fasta_a_index; 545 546 try{ #coresponds to level 4 in mpi-maker 547 $masked_fasta = Fasta::toFasta($query_def.' masked', \$masked_total_seq); 548 FastaFile::writeFile($masked_fasta ,$the_void."/query.masked.fasta"); 549 550 #==SNAP ab initio here 551 $snaps = []; 552 $snaps = Shared_Functions::snap($masked_fasta, 553 $the_void, 554 $seq_out_name, 555 $CTL_OPTIONS{snap}, 556 $CTL_OPTIONS{'snaphmm'}, 557 $OPT{f}, 558 $LOG 559 )if ($CTL_OPTIONS{'snap'}); 560 561 #==AUGUSTUS ab initio here 562 $augus = []; 563 $augus = Shared_Functions::augustus($masked_fasta, 564 $the_void, 565 $seq_out_name, 566 $CTL_OPTIONS{'augustus'}, 567 $CTL_OPTIONS{'augustus_species'}, 568 $OPT{f}, 569 $LOG 570 ) if ($CTL_OPTIONS{'augustus'}); 571 572 #--build an index of the databases 573 $proteins = $CTL_OPTIONS{'protein'}; 574 $transcripts = $CTL_OPTIONS{'est'}; 575 $alt_ests = $CTL_OPTIONS{'alt_est'}; 576 $fasta_t_index = Shared_Functions::build_fasta_index($transcripts); 577 $fasta_p_index = Shared_Functions::build_fasta_index($proteins); 578 $fasta_a_index = Shared_Functions::build_fasta_index($alt_ests) if($alt_ests); 579 580 #--run future blast analysis in these new chunks 581 $fasta_chunker = new FastaChunker(); 582 $fasta_chunker = new FastaChunker(); 583 $fasta_chunker->parent_fasta($$masked_fasta); 584 $fasta_chunker->chunk_size($CTL_OPTIONS{'max_dna_len'}); 585 $fasta_chunker->min_size($CTL_OPTIONS{'split_hit'}); 586 $fasta_chunker->load_chunks(); 587 588 $chunk_count = 0; 589 } 590 catch Error::Simple with{ 591 my $E = shift; 592 593 print STDERR $E->{-text}; 594 print STDERR "\n\nMaker failed at ab-initio gene predictions!!\n"; 595 print STDERR "FAILED CONTIG:$seq_id\n\n"; 596 597 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 598 599 my $die_count = $LOG->get_die_count(); 600 $die_count++; 601 602 $LOG->add_entry("DIED","RANK","non_mpi"); 603 $LOG->add_entry("DIED","COUNT",$die_count); 604 $failed_contig = 1; 605 push(@failed, $fasta); 606 }; 607 608 next CONTIG if ($failed_contig); 609 367 368 my $masked_fasta = Fasta::toFasta($q_def.' masked', \$masked_total_seq); 369 my $masked_file = $the_void."/query.masked.fasta"; 370 FastaFile::writeFile(\$masked_fasta ,$masked_file); 371 372 #==ab initio predictions here 373 my $preds = GI::abinits($masked_file, 374 $the_void, 375 $safe_seq_id, 376 \%CTL_OPT, 377 $LOG 378 ); 379 380 #==QRNA noncoding RNA prediction here 381 my $qra_preds = []; 382 383 #--build an index of the databases 384 my $proteins = $CTL_OPT{_protein}; 385 my $trans = $CTL_OPT{_est}; 386 my $altests = $CTL_OPT{_altest}; 387 my $fasta_t_index = GI::build_fasta_index($trans) if($trans); 388 my $fasta_p_index = GI::build_fasta_index($proteins) if($proteins); 389 my $fasta_a_index = GI::build_fasta_index($altests) if($altests); 390 391 #--reset fastachunker for masked chunks 392 $fasta_chunker = new FastaChunker(); 393 $fasta_chunker = new FastaChunker(); 394 $fasta_chunker->parent_fasta($masked_fasta); 395 $fasta_chunker->chunk_size($CTL_OPT{max_dna_len}); 396 $fasta_chunker->min_size($CTL_OPT{split_hit}); 397 $fasta_chunker->load_chunks(); 398 399 $chunk_count = 0; 400 610 401 while (my $chunk = $fasta_chunker->get_chunk($chunk_count++)) { 611 402 #==BLAST ANALYSIS HERE 612 613 #variables that are persistent outside of try block 614 my $blastn_keepers; 615 616 try{ #coresponds to levels 5-6 in mpi_maker 617 #-blastn search the file against ESTs 618 $blastn_keepers = Shared_Functions::blastn($chunk, 619 $transcripts, 620 $the_void, 621 $seq_out_name, 622 $CTL_OPTIONS{blastn}, 623 $CTL_OPTIONS{eval_blastn}, 624 $CTL_OPTIONS{bit_blastn}, 625 $CTL_OPTIONS{percov_blastn}, 626 $CTL_OPTIONS{percid_blastn}, 627 $CTL_OPTIONS{split_hit}, 628 $CTL_OPTIONS{cpus}, 629 $OPT{f}, 630 $LOG 631 ); 632 } 633 catch Error::Simple with{ 634 my $E = shift; 635 636 print STDERR $E->{-text}; 637 print STDERR "\n\nMaker failed at blastn of ESTs!!\n"; 638 print STDERR "FAILED CONTIG:$seq_id\n\n"; 403 #-blastn search the file against ESTs 404 my $blastn_keepers = []; 405 if ($CTL_OPT{_est}) { 406 my $res_dir; 407 foreach my $db (@{$CTL_OPT{e_db}}) { 408 $res_dir = GI::blastn_as_chunks($chunk, 409 $db, 410 $the_void, 411 $safe_seq_id, 412 $CTL_OPT{_blastn}, 413 $CTL_OPT{eval_blastn}, 414 $CTL_OPT{split_hit}, 415 $CTL_OPT{cpus}, 416 $CTL_OPT{_est}, 417 $CTL_OPT{_formater}, 418 0, 419 $CTL_OPT{force}, 420 $LOG, 421 1 422 ); 423 } 424 $blastn_keepers = GI::collect_blastn($chunk, 425 $res_dir, 426 $CTL_OPT{eval_blastn}, 427 $CTL_OPT{bit_blastn}, 428 $CTL_OPT{pcov_blastn}, 429 $CTL_OPT{pid_blastn}, 430 $CTL_OPT{split_hit}, 431 $CTL_OPT{force}, 432 $LOG 433 ); 434 } 435 436 #-blastx search the masked input file 437 my $blastx_keepers = []; 438 if ($CTL_OPT{_protein}) { 439 my $res_dir; 440 foreach my $db (@{$CTL_OPT{p_db}}) { 441 $res_dir = GI::blastx_as_chunks($chunk, 442 $db, 443 $the_void, 444 $safe_seq_id, 445 $CTL_OPT{_blastx}, 446 $CTL_OPT{eval_blastx}, 447 $CTL_OPT{split_hit}, 448 $CTL_OPT{cpus}, 449 $CTL_OPT{_protein}, 450 $CTL_OPT{_formater}, 451 0, 452 $CTL_OPT{force}, 453 $LOG, 454 1 455 ); 456 } 457 $blastx_keepers = GI::collect_blastx($chunk, 458 $res_dir, 459 $CTL_OPT{eval_blastx}, 460 $CTL_OPT{bit_blastx}, 461 $CTL_OPT{pcov_blastx}, 462 $CTL_OPT{pid_blastx}, 463 $CTL_OPT{split_hit}, 464 $CTL_OPT{force}, 465 $LOG 466 ); 467 } 468 469 #-tblastx search the masked input file 470 my $tblastx_keepers = []; 471 if ($CTL_OPT{_altest}) { 472 my $res_dir; 473 foreach my $db (@{$CTL_OPT{a_db}}) { 474 $res_dir = GI::tblastx_as_chunks($chunk, 475 $db, 476 $the_void, 477 $safe_seq_id, 478 $CTL_OPT{_tblastx}, 479 $CTL_OPT{eval_tblastx}, 480 $CTL_OPT{split_hit}, 481 $CTL_OPT{cpus}, 482 $CTL_OPT{_altest}, 483 $CTL_OPT{_formater}, 484 0, 485 $CTL_OPT{force}, 486 $LOG, 487 1 488 ); 489 } 490 $tblastx_keepers = GI::collect_tblastx($chunk, 491 $res_dir, 492 $CTL_OPT{eval_tblastx}, 493 $CTL_OPT{bit_tblastx}, 494 $CTL_OPT{pcov_tblastx}, 495 $CTL_OPT{pid_tblastx}, 496 $CTL_OPT{split_hit}, 497 $CTL_OPT{force}, 498 $LOG 499 ); 500 } 501 502 #-get only those predictions on the chunk 503 my $preds_on_chunk = GI::get_preds_on_chunk($preds, 504 $chunk 505 ); 506 507 #==GFF3 passthrough of evidence 508 my $prot_gff_keepers = []; 509 my $est_gff_keepers = []; 510 my $altest_gff_keepers = []; 511 my $model_gff_keepers = []; 512 my $pred_gff_keepers = []; 513 if ($CTL_OPT{go_gffdb}) { 514 #-protein evidence passthraough 515 $prot_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, 516 $q_seq_ref, 517 'protein' 518 ); 519 #-est evidence passthrough 520 $est_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, 521 $q_seq_ref, 522 'est' 523 ); 524 #-altest evidence passthrough 525 $altest_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, 526 $q_seq_ref, 527 'altest' 528 ); 529 #-gff gene annotation passthrough here 530 $model_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, 531 $q_seq_ref, 532 'model' 533 ); 534 #-pred passthrough 535 $pred_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, 536 $q_seq_ref, 537 'repeat' 538 ); 539 } 540 541 #==merge heldover Phathits from last round 542 if ($chunk->number != 0) { #if not first chunk 543 #reviews heldover blast hits, 544 #then merges and reblasts them if they cross the divide 545 ($blastn_keepers, 546 $blastx_keepers, 547 $tblastx_keepers) = GI::merge_resolve_hits(\$masked_fasta, 548 $fasta_t_index, 549 $fasta_p_index, 550 $fasta_a_index, 551 $blastn_keepers, 552 $blastx_keepers, 553 $tblastx_keepers, 554 $holdover_blastn, 555 $holdover_blastx, 556 $holdover_tblastx, 557 $the_void, 558 \%CTL_OPT, 559 $LOG 560 ); 561 #combine remaining holdover types 562 push(@{$preds_on_chunk}, @{$holdover_pred}); 563 push(@{$pred_gff_keepers}, @{$holdover_pred_gff}); 564 push(@{$est_gff_keepers}, @{$holdover_est_gff}); 565 push(@{$altest_gff_keepers}, @{$holdover_altest_gff}); 566 push(@{$prot_gff_keepers}, @{$holdover_prot_gff}); 567 push(@{$model_gff_keepers}, @{$holdover_model_gff}); 568 569 #clear holdovers 570 @{$holdover_pred} = (); 571 @{$holdover_est_gff} = (); 572 @{$holdover_altest_gff} = (); 573 @{$holdover_prot_gff} = (); 574 @{$holdover_pred_gff} = (); 575 @{$holdover_model_gff} = (); 576 } 639 577 640 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 641 642 my $die_count = $LOG->get_die_count(); 643 $die_count++; 644 645 $LOG->add_entry("DIED","RANK","non_mpi"); 646 $LOG->add_entry("DIED","COUNT",$die_count); 647 $failed_contig = 1; 648 push(@failed, $fasta); 649 }; 650 651 next CONTIG if ($failed_contig); 652 653 #variables that are persistent outside of try block 654 my $blastx_keepers; 655 656 try{ #coresponds to levels 7-8 in mpi_maker 657 #-blastx search the masked input file 658 $blastx_keepers = Shared_Functions::blastx($chunk, 659 $proteins, 660 $the_void, 661 $seq_out_name, 662 $CTL_OPTIONS{blastx}, 663 $CTL_OPTIONS{eval_blastx}, 664 $CTL_OPTIONS{bit_blastx}, 665 $CTL_OPTIONS{percov_blastx}, 666 $CTL_OPTIONS{percid_blastx}, 667 $CTL_OPTIONS{split_hit}, 668 $CTL_OPTIONS{cpus}, 669 $OPT{f}, 670 $LOG 671 ); 672 } 673 catch Error::Simple with{ 674 my $E = shift; 675 676 print STDERR $E->{-text}; 677 print STDERR "\n\nMaker failed at blastx of proteins!!\n"; 678 print STDERR "FAILED CONTIG:$seq_id\n\n"; 679 680 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 681 682 my $die_count = $LOG->get_die_count(); 683 $die_count++; 684 685 $LOG->add_entry("DIED","RANK","non_mpi"); 686 $LOG->add_entry("DIED","COUNT",$die_count); 687 $failed_contig = 1; 688 push(@failed, $fasta); 689 }; 690 691 next CONTIG if ($failed_contig); 692 693 #variables that are persistent outside of try block 694 my $tblastx_keepers = []; 695 696 try{ #coresponds to levels 9-10 in mpi_maker 697 #-tblastx search the masked input file 698 $tblastx_keepers = Shared_Functions::tblastx($chunk, 699 $alt_ests, 700 $the_void, 701 $seq_out_name, 702 $CTL_OPTIONS{tblastx}, 703 $CTL_OPTIONS{eval_tblastx}, 704 $CTL_OPTIONS{bit_tblastx}, 705 $CTL_OPTIONS{percov_tblastx}, 706 $CTL_OPTIONS{percid_tblastx}, 707 $CTL_OPTIONS{split_hit}, 708 $CTL_OPTIONS{cpus}, 709 $OPT{f}, 710 $LOG 711 ) if $alt_ests; 712 } 713 catch Error::Simple with{ 714 my $E = shift; 715 716 print STDERR $E->{-text}; 717 print STDERR "\n\nMaker failed at tblastx of alternate ests!!\n"; 718 print STDERR "FAILED CONTIG:$seq_id\n\n"; 719 720 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 721 722 my $die_count = $LOG->get_die_count(); 723 $die_count++; 724 725 $LOG->add_entry("DIED","RANK","non_mpi"); 726 $LOG->add_entry("DIED","COUNT",$die_count); 727 $failed_contig = 1; 728 push(@failed, $fasta); 729 }; 730 731 next CONTIG if ($failed_contig); 732 733 #variables that are persistent outside of try block 734 my $pred_command; 735 my $preds_on_chunk; 736 737 try{ #coresponds to level 11 in mpi_maker 738 #-- decide which gene finder to use to build annotations 739 740 if ($CTL_OPTIONS{predictor} eq 'augustus') { 741 $pred_command = $CTL_OPTIONS{augustus} .' --species='.$CTL_OPTIONS{augustus_species}; 742 $preds_on_chunk = Shared_Functions::get_preds_on_chunk($augus, 743 $chunk 744 ); 745 } 746 elsif ($CTL_OPTIONS{predictor} eq 'snap') { 747 $pred_command = $CTL_OPTIONS{snap}.' '.$CTL_OPTIONS{snaphmm}; 748 $preds_on_chunk = Shared_Functions::get_preds_on_chunk($snaps, 749 $chunk 750 ); 751 } 752 elsif ($CTL_OPTIONS{predictor} eq 'est2genome') { 753 $pred_command = ''; 754 $preds_on_chunk = []; 755 } 756 else { 757 die "ERROR: invalid predictor type: $CTL_OPTIONS{predictor}\n"; 758 } 759 760 #==merge heldover Phathits from last round 761 #reviews heldover hits, then merges and reblasts them if they cross the divide 762 if ($chunk->number != 0) { #if not first chunk 763 ($blastn_keepers, 764 $blastx_keepers, 765 $tblastx_keepers) = Shared_Functions::merge_and_resolve_hits($masked_fasta, 766 $fasta_t_index, 767 $fasta_p_index, 768 $fasta_a_index, 769 $blastn_keepers, 770 $blastx_keepers, 771 $tblastx_keepers, 772 $holdover_blastn, 773 $holdover_blastx, 774 $holdover_tblastx, 775 $the_void, 776 \%CTL_OPTIONS, 777 $OPT{f}, 778 $LOG 779 ); 780 } 781 782 #==PROCESS HITS CLOSE TOO CHUNK DIVISIONS 783 #holdover hits that are too close to the divide for review with next chunk 784 if (not $chunk->is_last) { #if not last chunk 785 ($holdover_blastn, 786 $holdover_blastx, 787 $holdover_tblastx, 788 $holdover_preds, 789 $blastn_keepers, 790 $blastx_keepers, 791 $tblastx_keepers, 792 $preds_on_chunk) = Shared_Functions::process_the_chunk_divide($chunk, 793 $CTL_OPTIONS{'split_hit'}, 794 $blastn_keepers, 795 $blastx_keepers, 796 $tblastx_keepers, 797 $preds_on_chunk 798 ); 799 } 800 } 801 catch Error::Simple with{ 802 my $E = shift; 803 804 print STDERR $E->{-text}; 805 print STDERR "\n\nMaker failed while proccessing the chunk divide!!\n"; 806 print STDERR "FAILED CONTIG:$seq_id\n\n"; 807 808 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 809 810 my $die_count = $LOG->get_die_count(); 811 $die_count++; 812 813 $LOG->add_entry("DIED","RANK","non_mpi"); 814 $LOG->add_entry("DIED","COUNT",$die_count); 815 $failed_contig = 1; 816 push(@failed, $fasta); 817 }; 818 819 next CONTIG if ($failed_contig); 578 #==PROCESS HITS CLOSE TO CHUNK DIVISIONS 579 #holdover hits that are too close to the divide for review with next chunk 580 if (not $chunk->is_last) { #if not last chunk 581 ($holdover_blastn, 582 $holdover_blastx, 583 $holdover_tblastx, 584 $holdover_pred, 585 $holdover_est_gff, 586 $holdover_altest_gff, 587 $holdover_prot_gff, 588 $holdover_pred_gff, 589 $holdover_model_gff, 590 $blastn_keepers, 591 $blastx_keepers, 592 $tblastx_keepers, 593 $preds_on_chunk, 594 $est_gff_keepers, 595 $altest_gff_keepers, 596 $prot_gff_keepers, 597 $pred_gff_keepers, 598 $model_gff_keepers 599 ) = GI::process_the_chunk_divide($chunk, 600 $CTL_OPT{'split_hit'}, 601 $blastn_keepers, 602 $blastx_keepers, 603 $tblastx_keepers, 604 $preds_on_chunk, 605 $est_gff_keepers, 606 $altest_gff_keepers, 607 $prot_gff_keepers, 608 $pred_gff_keepers, 609 $model_gff_keepers 610 ); 611 } 820 612 821 613 #==EXONERATE HERE 822 614 823 615 #variables that are persistent outside of try block 824 616 my $blastx_data; 825 617 my $exonerate_p_data; 826 827 try{ #coresponds to level 12 in mpi_maker 828 #-cluster the blastx hits 829 print STDERR "cleaning blastx...\n" unless $main::quiet; 830 831 my $blastx_clusters = cluster::clean_and_cluster($blastx_keepers, 832 $query_seq, 833 10 834 ); 835 836 undef $blastx_keepers; #free up memory 837 838 #-make a multi-fasta of the seqs in the blastx_clusters 839 #-polish the blastx hits with exonerate 840 841 my $exonerate_p_clusters = Shared_Functions::polish_exonerate($fasta, 842 $blastx_clusters, 843 $fasta_p_index, 844 $the_void, 845 5, 846 'p', 847 $CTL_OPTIONS{exonerate}, 848 $CTL_OPTIONS{percov_blastx}, 849 $CTL_OPTIONS{percid_blastx}, 850 $CTL_OPTIONS{ep_score_limit}, 851 $CTL_OPTIONS{ep_matrix}, 852 $OPT{f}, 853 $LOG 854 ); 855 856 $blastx_data = Shared_Functions::flatten($blastx_clusters); 857 $exonerate_p_data = Shared_Functions::flatten($exonerate_p_clusters, 'exonerate:p'); 858 } 859 catch Error::Simple with{ 860 my $E = shift; 861 862 print STDERR $E->{-text}; 863 print STDERR "\n\nMaker failed at exonerate against proteins!!\n"; 864 print STDERR "FAILED CONTIG:$seq_id\n\n"; 865 866 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 867 868 my $die_count = $LOG->get_die_count(); 869 $die_count++; 870 871 $LOG->add_entry("DIED","RANK","non_mpi"); 872 $LOG->add_entry("DIED","COUNT",$die_count); 873 $failed_contig = 1; 874 push(@failed, $fasta); 875 }; 876 877 next CONTIG if ($failed_contig); 878 879 #variables that are persistent outside of try block 880 my $blastn_data; 881 my $tblastx_data; 882 my $exonerate_e_data; 883 884 try{ #coresponds to levels 13 in mpi_maker 885 #-cluster the tblastx hits 886 print STDERR "cleaning tblastx...\n" unless $main::quiet; 887 my $tblastx_clusters = cluster::clean_and_cluster($tblastx_keepers, 888 $query_seq, 889 10 890 ); 891 892 undef $tblastx_keepers; #free up memory 893 $tblastx_data = Shared_Functions::flatten($tblastx_clusters); 894 895 896 #-cluster the blastn hits 897 print STDERR "cleaning blastn...\n" unless $main::quiet; 898 my $blastn_clusters = cluster::clean_and_cluster($blastn_keepers, 899 $query_seq, 900 10 901 ); 902 903 undef $blastn_keepers; #free up memory 904 905 #-polish blastn hits with exonerate 906 my $exonerate_e_clusters = Shared_Functions::polish_exonerate($fasta, 907 $blastn_clusters, 908 $fasta_t_index, 909 $the_void, 910 5, 911 'e', 912 $CTL_OPTIONS{exonerate}, 913 $CTL_OPTIONS{percov_blastn}, 914 $CTL_OPTIONS{percid_blastn}, 915 $CTL_OPTIONS{en_score_limit}, 916 $CTL_OPTIONS{en_matrix}, 917 $OPT{f}, 918 $LOG 919 ); 920 921 $blastn_data = Shared_Functions::flatten($blastn_clusters); 922 $exonerate_e_data = Shared_Functions::flatten($exonerate_e_clusters, 'exonerate:e'); 923 } 924 catch Error::Simple with{ 925 my $E = shift; 926 927 print STDERR $E->{-text}; 928 print STDERR "\n\nMaker failed at exonerate against transcripts!!\n"; 929 print STDERR "FAILED CONTIG:$seq_id\n\n"; 930 931 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 932 933 my $die_count = $LOG->get_die_count(); 934 $die_count++; 935 936 $LOG->add_entry("DIED","RANK","non_mpi"); 937 $LOG->add_entry("DIED","COUNT",$die_count); 938 $failed_contig = 1; 939 push(@failed, $fasta); 940 }; 941 942 next CONTIG if ($failed_contig); 618 619 #-cluster the blastx hits 620 print STDERR "cleaning blastx...\n" unless $main::quiet; 621 622 my $blastx_clusters = cluster::clean_and_cluster($blastx_keepers, 623 $q_seq_ref, 624 10 625 ); 626 undef $blastx_keepers; #free up memory 627 628 #-make a multi-fasta of the seqs in the blastx_clusters 629 #-polish the blastx hits with exonerate 630 631 my $exoner_p_clust = GI::polish_exonerate($fasta, 632 $blastx_clusters, 633 $fasta_p_index, 634 $the_void, 635 5, 636 'p', 637 $CTL_OPT{exonerate}, 638 $CTL_OPT{pcov_blastx}, 639 $CTL_OPT{pid_blastx}, 640 $CTL_OPT{ep_score_limit}, 641 $CTL_OPT{ep_matrix}, 642 $CTL_OPT{force}, 643 $LOG 644 ); 645 646 #flatten clusters 647 $blastx_data = GI::flatten($blastx_clusters); 648 $exonerate_p_data = GI::flatten($exoner_p_clust, 'exonerate:p'); 649 650 #-cluster the tblastx hits 651 print STDERR "cleaning tblastx...\n" unless $main::quiet; 652 my $tblastx_clusters = cluster::clean_and_cluster($tblastx_keepers, 653 $q_seq_ref, 654 10 655 ); 656 undef $tblastx_keepers; #free up memory 657 658 #flatten the clusters 659 my $tblastx_data = GI::flatten($tblastx_clusters); 660 661 662 #-cluster the blastn hits 663 print STDERR "cleaning blastn...\n" unless $main::quiet; 664 my $blastn_clusters = cluster::clean_and_cluster($blastn_keepers, 665 $q_seq_ref, 666 10 667 ); 668 undef $blastn_keepers; #free up memory 669 670 #-polish blastn hits with exonerate 671 my $exoner_e_clust = GI::polish_exonerate($fasta, 672 $blastn_clusters, 673 $fasta_t_index, 674 $the_void, 675 5, 676 'e', 677 $CTL_OPT{exonerate}, 678 $CTL_OPT{pcov_blastn}, 679 $CTL_OPT{pid_blastn}, 680 $CTL_OPT{en_score_limit}, 681 $CTL_OPT{en_matrix}, 682 $CTL_OPT{force}, 683 $LOG 684 ); 685 686 #flatten clusters 687 my $blastn_data = GI::flatten($blastn_clusters); 688 my $exonerate_e_data = GI::flatten($exoner_e_clust, 689 'exonerate:e' 690 ); 691 692 #combine final data sets 693 my $final_est = GI::combine($exonerate_e_data, 694 $est_gff_keepers 695 ); 696 my $final_altest = GI::combine($tblastx_data, 697 $altest_gff_keepers 698 ); 699 my $final_prot = GI::combine($blastx_data, 700 $exonerate_p_data, 701 $prot_gff_keepers 702 ); 703 my $final_pred = GI::combine($preds_on_chunk, 704 $pred_gff_keepers 705 ); 943 706 944 707 #####working here########### … … 946 709 947 710 #variables that are persistent outside of try block 948 my $annotations; 949 950 try{ #coresponds to level 14 in mpi_maker 951 #-auto-annotate the input file 952 $annotations = maker::auto_annotator::annotate($fasta, 953 $$masked_fasta, 954 $chunk->number(), 955 $exonerate_p_data, 956 $exonerate_e_data, 957 $blastx_data, 958 $preds_on_chunk, 959 $the_void, 960 $pred_command, 961 $CTL_OPTIONS{snap_flank}, 962 $CTL_OPTIONS{'single_exon'}, 963 $OPT{f}, 964 $OPT{PREDS}, 965 $CTL_OPTIONS{predictor}, 966 $LOG, 967 \%CTL_OPTIONS, 968 ); 969 } 970 catch Error::Simple with{ 971 my $E = shift; 972 973 print STDERR $E->{-text}; 974 print STDERR "\n\nMaker failed at annotation generation!!\n"; 975 print STDERR "FAILED CONTIG:$seq_id\n\n"; 976 977 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 978 979 my $die_count = $LOG->get_die_count(); 980 $die_count++; 981 982 $LOG->add_entry("DIED","RANK","non_mpi"); 983 $LOG->add_entry("DIED","COUNT",$die_count); 984 $failed_contig = 1; 985 push(@failed, $fasta); 986 }; 987 988 next CONTIG if ($failed_contig); 989 990 try{ #coresponds to levels 15 in mpi_maker 991 #==OUTPUT DATA HERE 992 993 #--- GFF3 994 $GFF3->genes($annotations); 995 $GFF3->phat_hits($blastx_data); 996 $GFF3->phat_hits($blastn_data); 997 $GFF3->phat_hits($tblastx_data); 998 $GFF3->phat_hits($exonerate_p_data); 999 $GFF3->phat_hits($exonerate_e_data); 1000 1001 #--- GAME XML output was removed and the module is broken 1002 1003 1004 #---quality indices 1005 1006 #my @quality_indices; 1007 #foreach my $an (@$annotations) { 1008 # my $g_name = $an->{g_name}; 1009 # my $g_s = $an->{g_start}; 1010 # my $g_e = $an->{g_end}; 1011 # my $g_strand = $an->{g_strand}; 1012 # 1013 # my @temp_ant; 1014 # foreach my $a (@{$an->{t_structs}}) { 1015 # push(@quality_indices, [$a->{t_name}, $a->{t_qi}]); 1016 # 1017 # my ($p_fasta, $t_fasta) = get_p_and_t_fastas($a); 1018 # 1019 # $t_fastas .= $$t_fasta; 1020 # $p_fastas .= $$p_fasta; 1021 # 1022 # push (@temp_ant,$a) if defined($a->{hit}); 1023 # } 1024 #} 1025 #Write the quality index of the mRNAs to a separate file. 1026 #write_quality_data(\@quality_indices, $seq_id); 1027 1028 #--- building fastas for annotations (grows with itteration) 1029 my ($p_fasta, $t_fasta) = Shared_Functions::get_maker_p_and_t_fastas($annotations); 1030 $p_fastas .= $p_fasta; 1031 $t_fastas .= $t_fasta; 1032 } 1033 catch Error::Simple with{ 1034 my $E = shift; 1035 1036 print STDERR $E->{-text}; 1037 print STDERR "\n\nMaker failed at processing annotations on chunk!!\n"; 1038 print STDERR "FAILED CONTIG:$seq_id\n\n"; 1039 1040 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 1041 1042 my $die_count = $LOG->get_die_count(); 1043 $die_count++; 1044 1045 $LOG->add_entry("DIED","RANK","non_mpi"); 1046 $LOG->add_entry("DIED","COUNT",$die_count); 1047 $failed_contig = 1; 1048 push(@failed, $fasta); 1049 }; 1050 1051 next CONTIG if ($failed_contig); 1052 } #END CONTIG 1053 1054 try{ #coresponds to levels 16 in mpi_maker 1055 #--- building fastas of predictions 1056 my ($p_snap_fastas, $t_snap_fastas) = 1057 Shared_Functions::get_snap_p_and_t_fastas($query_seq, $snaps); 1058 my ($p_augus_fastas, $t_augus_fastas) = 1059 Shared_Functions::get_snap_p_and_t_fastas($query_seq, $augus); 1060 1061 #--Write fasta files now that all chunks are finished 1062 FastaFile::writeFile(\$p_fastas ,"$out_dir/$seq_out_name.maker.proteins.fasta"); 1063 FastaFile::writeFile(\$t_fastas ,"$out_dir/$seq_out_name.maker.transcripts.fasta"); 1064 if ($CTL_OPTIONS{'snap'}) { 1065 FastaFile::writeFile(\$p_snap_fastas ,"$out_dir/$seq_out_name.maker.snap.proteins.fasta"); 1066 FastaFile::writeFile(\$t_snap_fastas ,"$out_dir/$seq_out_name.maker.snap.transcript.fasta"); 1067 } 1068 if ($CTL_OPTIONS{'augustus'}) { 1069 FastaFile::writeFile(\$p_augus_fastas,"$out_dir/$seq_out_name.maker.augus.proteins.fasta"); 1070 FastaFile::writeFile(\$t_augus_fastas,"$out_dir/$seq_out_name.maker.augus.transcript.fasta"); 1071 } 1072 1073 #--- add predictions to GFF and write 1074 $GFF3->predictions($snaps); 1075 $GFF3->predictions($augus); 1076 $GFF3->print($out_dir."/".$seq_out_name.".gff"); 1077 1078 #--cleanup maker files created with each fasta sequence 1079 File::Path::rmtree ($the_void) if $CTL_OPTIONS{clean_up}; #rm temp directory 711 my $annotations = []; 712 713 #-auto-annotate the input file 714 GI::combine($blastx_data, ); 715 $annotations = maker::auto_annotator::annotate($fasta, 716 $masked_fasta, 717 $chunk->number(), 718 $final_prot, 719 $final_est, 720 $final_altest, 721 $final_pred, 722 $model_gff_keepers, 723 $the_void, 724 $build, 725 \%CTL_OPT, 726 $LOG 727 ); 728 729 my $maker_anno = maker::auto_annotator::best_annotations($annotations, 730 \%CTL_OPT 731 ); 732 733 #==OUTPUT DATA HERE 734 735 #--- GFF3 736 $GFF3->add_genes($maker_anno); 737 $GFF3->add_phathits($blastx_data); 738 $GFF3->add_phathits($blastn_data); 739 $GFF3->add_phathits($tblastx_data); 740 $GFF3->add_phathits($exonerate_p_data); 741 $GFF3->add_phathits($exonerate_e_data); 742 $GFF3->add_phathits($est_gff_keepers); 743 $GFF3->add_phathits($altest_gff_keepers); 744 $GFF3->add_phathits($prot_gff_keepers); 745 $GFF3->add_phathits($preds_on_chunk); 746 $GFF3->add_phathits($pred_gff_keepers); 747 $GFF3->resolved_flag if (not $chunk->is_last); #adds ### between contigs 748 749 #--- building fastas for annotations (grows with itteration) 750 my ($p_fasta, $t_fasta) = GI::maker_p_and_t_fastas($maker_anno); 751 $p_fastas .= $p_fasta; 752 $t_fastas .= $t_fasta; 1080 753 } 1081 catch Error::Simple with{ 1082 my $E = shift; 1083 1084 print STDERR $E->{-text}; 1085 print STDERR "\n\nMaker failed while writing final data!!\n"; 1086 print STDERR "FAILED CONTIG:$seq_id\n\n"; 1087 1088 print $DS_FH "$seq_id\t$out_dir\tDIED\n"; 1089 1090 my $die_count = $LOG->get_die_count(); 1091 $die_count++; 1092 1093 $LOG->add_entry("DIED","RANK","non_mpi"); 1094 $LOG->add_entry("DIED","COUNT",$die_count); 1095 $failed_contig = 1; 1096 push(@failed, $fasta); 1097 }; 1098 1099 next CONTIG if ($failed_contig); 1100 1101 #-- write to dsindex the finished files 1102 if($OPT{d}){ 1103 print $DS_FH "$seq_id\t$out_dir\tFINISHED\n";; 1104 } 1105 754 #END CONTIG 755 756 #--- write fastas for ab-initio predictions 757 my ($p_snap_fastas, 758 $t_snap_fastas) = GI::abinit_p_and_t_fastas($preds, 759 $safe_seq_id, 760 $q_seq_ref, 761 $out_dir 762 ); 763 764 #--Write fasta files now that all chunks are finished 765 FastaFile::writeFile(\$p_fastas, 766 "$out_dir/$safe_seq_id.maker.proteins.fasta" 767 ); 768 FastaFile::writeFile(\$t_fastas, 769 "$out_dir/$safe_seq_id.maker.transcripts.fasta" 770 ); 771 772 #--- write GFF3 file 773 $GFF3->finalize(); 774 775 #--cleanup maker files created with each fasta sequence 776 File::Path::rmtree ($the_void) if $CTL_OPT{clean_up}; #rm temp directory 777 778 #-- write to DS log the finished files 779 $DS_CTL->add_entry($seq_id, $out_dir, 'FINISHED'); 780 1106 781 #--- clear the log variable 1107 782 $LOG = undef; … … 1113 788 exit(0); 1114 789 1115 #----------------------------------------------------------------------------- 1116 #----------------------------------- SUBS ------------------------------------1117 #----------------------------------------------------------------------------- 790 #------------------------------------------------------------------------------# 791 #------------------------------------ SUBS ------------------------------------# 792 #------------------------------------------------------------------------------# lib/Bio/Search/HSP/PhatHSP/Base.pm
r89 r127 508 508 return $self->hit->seq_id(); 509 509 } 510 ################################################ subroutine header begin ## 511 512 =head2 strand 513 514 Usage : How to use this function/method 515 516 =for example 517 use Bio::Search::HSP::PhatHSP::Base; 518 my $hsps = Bio::Search::HSP::PhatHSP::Base::_getTestHSPs('blastn', 519 'sample_data/blastn.sample.report'); 520 521 =for example begin 522 523 my $hsp = $hsps->[0]; # $hits is filled in by test harness 524 my $strand = $hsp->strand(); 525 526 =for example end 527 528 =for example_testing 529 is($name, "3197985", "Check the name of the hit."); 530 531 Purpose : What the subroutine does. 532 Returns : The types and values it returns. 533 Argument : Required and optional input. 534 Throws : Exceptions and other anomolies 535 Comments : This is a sample subroutine header. 536 : It is polite to include more pod and fewer comments. 537 See Also : Other things that might be useful. 538 539 =cut 540 541 ################################################## subroutine header end ## 542 543 sub strand 544 { 545 my $self = shift; 546 my $what = shift; 547 548 if ($what =~ /subject|sbjct/i){ 549 $what = 'hit'; 550 } 551 552 if($what eq 'hit'){ 553 return $self->{_strand_hack}{hit} if(exists $self->{_strand_hack}); 554 return $self->{STRAND_HIT} if(exists $self->{STRAND_HIT}); 555 return $self->SUPER::strand('hit'); 556 } 557 elsif($what eq 'query'){ 558 return $self->{_strand_hack}{query} if(exists $self->{_strand_hack}); 559 return $self->{STRAND_QUERY} if(exists $self->{STRAND_QUERY}); 560 return $self->SUPER::strand('query'); 561 } 562 else{ 563 return $self->SUPER::strand($what); 564 } 565 } 510 566 511 567 ################################################ subroutine header begin ## lib/Bio/Search/Hit/PhatHit/Base.pm
r89 r127 88 88 ################################################ subroutine header begin ## 89 89 90 =head 2new90 =head1 new 91 91 92 92 Usage : How to use this function/method … … 133 133 ################################################ subroutine header begin ## 134 134 135 =head 2nB135 =head1 nB 136 136 137 137 Usage : How to use this function/method … … 167 167 sub nB { 168 168 my $self = shift; 169 my $w = shift; 170 171 if ($self->strand($w) == 1 || $self->strand($w) == 0) { 172 my $sorted = $self->sortFeatures($w); 173 return $sorted->[0]->start($w); 174 } 175 elsif ($self->strand($w) == -1) { 176 my $sorted = $self->revSortFeatures($w); 177 return $sorted->[0]->end($w); 178 } 179 else { 180 die "dead in PhatHit::Base::nB\n"; 181 } 182 } 183 184 ################################################ subroutine header begin ## 185 186 =head2 nE 169 my $w = shift || 'query'; 170 171 if ($w eq 'sbjct' || $w eq 'subject'){ 172 $w = 'hit'; 173 } 174 175 #set both nB for query and hit on first time through 176 if(! defined $self->{nB}){ 177 $self->{nB} = {}; 178 $self->nB('query'); 179 $self->nB('hit'); 180 } 181 182 if(! defined $self->{nB}{$w}){ 183 if ($self->strand($w) == 1 || $self->strand($w) == 0) { 184 my $sorted = $self->sortFeatures($w); 185 $self->{nB}{$w} = $sorted->[0]->start($w); 186 } 187 elsif ($self->strand($w) == -1) { 188 my $sorted = $self->revSortFeatures($w); 189 $self->{nB}{$w} = $sorted->[0]->end($w); 190 } 191 else { 192 die "dead in PhatHit::Base::nB\n"; 193 } 194 } 195 196 return $self->{nB}{$w}; 197 } 198 199 ################################################ subroutine header begin ## 200 201 =head1 nE 187 202 188 203 Usage : How to use this function/method … … 218 233 sub nE { 219 234 my $self = shift; 220 my $w = shift; 221 222 if ($self->strand($w) == -1) { 223 my $sorted = $self->sortFeatures($w); 224 return $sorted->[0]->start($w); 225 } 226 elsif ($self->strand($w) == 1 || $self->strand($w) == 0) { 227 my $sorted = $self->revSortFeatures($w); 228 return $sorted->[0]->end($w); 229 } 230 else { 231 die "dead in blastn::PhatHit::nE\n"; 232 } 233 } 234 235 ################################################ subroutine header begin ## 236 237 =head2 nB 235 my $w = shift || 'query'; 236 237 if ($w eq 'sbjct' || $w eq 'subject'){ 238 $w = 'hit'; 239 } 240 241 #set both nB for query and hit on first time through 242 if(! defined $self->{nE}){ 243 $self->{nE} = {}; 244 $self->nE('query'); 245 $self->nE('hit'); 246 } 247 248 if(! defined $self->{nE}{$w}){ 249 if ($self->strand($w) == -1) { 250 my $sorted = $self->sortFeatures($w); 251 $self->{nE}{$w} = $sorted->[0]->start($w); 252 } 253 elsif ($self->strand($w) == 1 || $self->strand($w) == 0) { 254 my $sorted = $self->revSortFeatures($w); 255 $self->{nE}{$w} = $sorted->[0]->end($w); 256 } 257 else { 258 die "dead in blastn::PhatHit::nE\n"; 259 } 260 } 261 262 return $self->{nE}{$w}; 263 } 264 265 ################################################ subroutine header begin ## 266 267 =head1 nB 238 268 239 269 Usage : How to use this function/method … … 281 311 ################################################ subroutine header begin ## 282 312 283 =head 2end313 =head1 end 284 314 285 315 Usage : How to use this function/method … … 328 358 ################################################ subroutine header begin ## 329 359 330 =head 2id360 =head1 id 331 361 332 362 Usage : How to use this function/method … … 377 407 ################################################ subroutine header begin ## 378 408 379 =head 2strand409 =head1 strand 380 410 381 411 Usage : How to use this function/method … … 414 444 ################################################## subroutine header end ## 415 445 416 sub strand 417 { 446 sub strand { 418 447 my ($self, $seqType) = @_; 419 448 … … 447 476 ################################################ subroutine header begin ## 448 477 449 =head 2hsps478 =head1 hsps 450 479 451 480 Usage : How to use this function/method … … 483 512 # think about changing this to return a reference, and to be in 484 513 # cjm-normal-form. 485 sub hsps 486 { 514 sub hsps { 487 515 my $self = shift; 488 516 my $hsps = shift; … … 503 531 # XXXX percent aligned query 504 532 505 =head 2pAq533 =head1 pAq 506 534 507 535 Usage : How to use this function/method … … 534 562 ################################################## subroutine header end ## 535 563 536 sub pAq 537 { 564 sub pAq { 538 565 my $self = shift; 539 566 my $ql = shift; … … 554 581 555 582 # XXXX percent aligned hit 556 =head 2pAh583 =head1 pAh 557 584 558 585 Usage : How to use this function/method … … 585 612 ################################################## subroutine header end ## 586 613 587 sub pAh 588 { 614 sub pAh { 589 615 my $self = shift; 590 616 … … 597 623 ################################################ subroutine header begin ## 598 624 599 =head 2getLengths625 =head1 getLengths 600 626 601 627 Usage : How to use this function/method … … 629 655 ################################################## subroutine header end ## 630 656 631 sub getLengths 632 { 657 sub getLengths { 633 658 my $self = shift; 634 659 … … 651 676 my $hOffset = $h_b - 1; 652 677 653 my $q_seq = ''; 654 my $h_seq = ''; 655 $q_seq .= 'nnnnnnnnnnnnnnnnnnnn' while (length($q_seq) <= $qLen); 656 $h_seq .= 'nnnnnnnnnnnnnnnnnnnn' while (length($h_seq) <= $hLen); 678 my $q_seq = 'n'x$qLen; 679 my $h_seq = 'n'x$hLen; 657 680 658 681 foreach my $s ($self->hsps) { … … 687 710 ################################################ subroutine header begin ## 688 711 689 =head 2show712 =head1 show 690 713 691 714 Usage : How to use this function/method … … 731 754 ################################################ subroutine header begin ## 732 755 733 =head 2revSortFeatures756 =head1 revSortFeatures 734 757 735 758 Usage : How to use this function/method … … 776 799 ################################################ subroutine header begin ## 777 800 778 =head 2sortFeatures801 =head1 sortFeatures 779 802 780 803 Usage : How to use this function/method … … 814 837 my $who = shift; 815 838 816 my @hsps = $self->hsps; 817 818 my @subfeatures = sort {$a->start($who) <=> $b->start($who)} @hsps; 839 my @subfeatures = sort {$a->start($who) <=> $b->start($who)} $self->hsps; 840 819 841 return \@subfeatures; 820 842 } … … 822 844 ################################################ subroutine header begin ## 823 845 824 =head 2queryLength846 =head1 queryLength 825 847 826 848 Usage : How to use this function/method … … 866 888 ################################################ subroutine header begin ## 867 889 868 =head 2_getTestHits890 =head1 _getTestHits 869 891 870 892 Usage : *private* … … 920 942 ################################################ subroutine header begin ## 921 943 922 =head 2AUTOLOAD944 =head1 AUTOLOAD 923 945 924 946 Usage : *private* lib/Dumper/GFF/GFFV3.pm
r89 r127 17 17 #------------------------------------------------------------------------ 18 18 sub new { 19 my $self = {}; 20 bless $self; 21 return $self; 19 my $class = shift; 20 my $filename = shift; 21 my $build = shift || "Build1";; 22 my $t_dir = shift || "/tmp"; 23 24 my $self = {}; 25 26 bless ($self, $class); 27 28 $self->_initialize($filename, $build, $t_dir); 29 30 return $self; 31 } 32 #------------------------------------------------------------------------ 33 sub _initialize { 34 my $self = shift; 35 my $filename = shift; 36 my $build = shift; 37 my $t_dir = shift; 38 39 $self->{build} = $build; 40 41 my $gff_file = "$filename"; 42 my ($name) = $filename =~ /([^\/]+)$/; 43 my $ann_file = "$t_dir/$name.ann"; 44 my $seq_file = "$t_dir/$name.seq"; 45 46 open(my $ANN, "> $ann_file") || die "ERROR: Could not open file: $ann_file\n"; 47 print_txt($ANN, $self->header."\n"); 48 close($ANN); 49 50 open(my $SEQ, "> $seq_file") || die "ERROR: Could not open file: $seq_file\n"; 51 print_txt($SEQ, "##FASTA\n"); 52 close($SEQ); 53 54 $self->{gff_file} = $gff_file; 55 $self->{ann_file} = $ann_file; 56 $self->{seq_file} = $seq_file; 22 57 } 23 58 #------------------------------------------------------------------------ 24 59 sub fasta { 25 60 my $self = shift; 26 my $fasta = Fasta::toFasta('>'.$self->seq_id, \(uc(${$self->seq}))); 27 return $$fasta; 28 } 61 62 my $fasta_ref = Fasta::toFastaRef('>'.$self->{seq_id}, \uc(${$self->seq})); 63 64 return $fasta_ref; 65 } 66 #------------------------------------------------------------------------ 67 sub resolved_flag { 68 my $self = shift; 69 70 open(my $ANN, ">>", $self->{ann_file}); 71 print_txt($ANN, "###\n"); 72 close($ANN); 73 } 74 #------------------------------------------------------------------------ 75 sub set_current_contig { 76 my $self = shift; 77 78 my $flag = 0; 79 $flag = 1 if (defined $self->{seq_id}); 80 $self->{seq_id} = shift; 81 $self->{seq} = shift; 82 83 open(my $ANN, ">>", $self->{ann_file}); 84 print_txt($ANN, "###\n") if($flag); 85 print_txt($ANN, $self->contig_comment."\n"); 86 print_txt($ANN, $self->contig_line."\n"); 87 close($ANN); 88 89 open(my $SEQ, ">>", $self->{seq_file}); 90 print_txt($SEQ, ${$self->fasta}); 91 close($SEQ); 92 } 93 #------------------------------------------------------------------------ 94 sub seq { 95 my $self = shift; 96 97 return $self->{seq} || undef; 98 } 99 29 100 #------------------------------------------------------------------------ 30 101 sub seq_id { 31 102 my $self = shift; 32 my $seq_id = shift; 33 if (defined($seq_id)) { 34 $self->{seq_id} = $seq_id; 35 } 36 else { 37 return $self->{seq_id}; 38 } 39 } 40 #------------------------------------------------------------------------ 41 sub seq { 42 my $self = shift; 43 my $seq = shift; 44 45 if (defined($seq)) { 46 $self->{seq} = $seq; 47 } 48 else { 49 return $self->{seq}; 50 } 51 } 52 #------------------------------------------------------------------------ 53 sub print { 103 104 return $self->{seq_id} || undef; 105 } 106 #------------------------------------------------------------------------ 107 sub finalize { 54 108 my $self = shift; 55 my $file = shift; 56 my $fh; 57 if (defined($file)){ 58 $fh = new FileHandle(); 59 $fh->open(">$file"); 60 } 61 print_txt($fh, $self->header."\n"); 62 print_txt($fh, $self->contig_comment."\n"); 63 print_txt($fh, $self->contig_line."\n"); 64 65 print_txt($fh, $self->genes); 66 print_txt($fh, $self->predictions); 67 print_txt($fh, $self->repeat_hits); 68 print_txt($fh, $self->phat_hits); 69 70 print_txt($fh, "##FASTA\n"); 71 print_txt($fh, $self->fasta); 72 $fh->close() if defined($file); 109 110 my $gff_f = $self->{gff_file}; 111 my $ann_f = $self->{ann_file}; 112 my $seq_f = $self->{seq_file}; 113 114 system("cat $seq_f >> $ann_f"); 115 unlink("$seq_f"); 116 system("mv $ann_f $gff_f"); 117 118 die "ERROR: GFF3 file not created\n" if(! -e $gff_f); 73 119 } 74 120 #------------------------------------------------------------------------ … … 101 147 unless defined($self->seq); 102 148 103 my $seq = $self->seq ();149 my $seq = $self->seq; 104 150 105 151 my $length = length($$seq); 106 my $id = $self->seq_id ();152 my $id = $self->seq_id; 107 153 my $name = $id; 108 154 if ($id =~ m/gnl\%7Cv3\%7C(Contig\d+)/) { … … 116 162 } 117 163 #------------------------------------------------------------------------ 118 sub print_fld {119 my $data = shift;120 my $fh = shift;121 122 die "LLLLL\n";123 }124 #------------------------------------------------------------------------125 164 sub print_txt { 126 165 my $fh = shift; … … 137 176 sub header { 138 177 my $self = shift; 139 178 179 my $build = $self->{build}; 140 180 my $h = "##gff-version 3"; 141 181 $h .= "\n##genome-build maker $build" if(defined $build); 182 142 183 return $h; 143 184 } 144 185 #------------------------------------------------------------------------ 145 sub predictions {186 sub add_predictions { 146 187 my $self = shift; 147 188 my $hits = shift; 148 189 149 if (defined($hits) && defined($hits->[0])){ 150 foreach my $p (@{$hits}){ 151 #$self->{predictions} .= pred_data($p, $self->seq_id); 152 $self->{predictions} .= hit_data($p, $self->seq_id); 153 } 154 } 155 else { 156 return $self->{predictions} || ''; 157 } 158 } 159 #------------------------------------------------------------------------ 160 sub phat_hits { 190 open(my $ANN, '>>', $self->{ann_file}); 191 foreach my $p (@{$hits}){ 192 print_txt($ANN, hit_data($p, $self->seq_id)); 193 } 194 close($ANN); 195 } 196 #------------------------------------------------------------------------ 197 sub add_phathits { 161 198 my $self = shift; 162 199 my $hits = shift; 163 200 164 if (defined($hits) && defined($hits->[0])){ 165 foreach my $hit (@{$hits}){ 166 $self->{hits} .= hit_data($hit, $self->seq_id); 167 } 168 } 169 else { 170 return $self->{hits} || ''; 171 } 172 } 173 #------------------------------------------------------------------------ 174 sub repeat_hits { 201 open(my $ANN, '>>', $self->{ann_file}); 202 foreach my $h (@{$hits}){ 203 print_txt($ANN, hit_data($h, $self->seq_id)); 204 } 205 close($ANN); 206 } 207 #------------------------------------------------------------------------ 208 sub add_repeat_hits { 175 209 my $self = shift; 176 210 my $hits = shift; 177 211 178 if (defined($hits) && defined($hits->[0])){ 179 foreach my $hit (@{$hits}){ 180 $self->{repeats} .= repeat_data($hit, $self->seq_id); 181 } 182 } 183 else { 184 return $self->{repeats} || ''; 185 } 186 } 187 #------------------------------------------------------------------------ 188 sub genes { 212 open(my $ANN, '>>', $self->{ann_file}); 213 foreach my $r (@{$hits}){ 214 print_txt($ANN, repeat_data($r, $self->seq_id)); 215 } 216 close($ANN); 217 } 218 #------------------------------------------------------------------------ 219 sub add_genes { 189 220 my $self = shift; 190 221 my $genes = shift; 191 222 192 if (defined($genes) && defined($genes->[0])){ 193 foreach my $g (@{$genes}){ 194 $self->{genes} .= gene_data($g, $self->seq_id); 195 } 196 } 197 else { 198 return $self->{genes} || ''; 199 } 223 open(my $ANN, '>>', $self->{ann_file}); 224 foreach my $g (@{$genes}){ 225 print_txt($ANN, gene_data($g, $self->seq_id)); 226 } 227 close($ANN); 200 228 } 201 229 #------------------------------------------------------------------------ … … 253 281 } 254 282 #------------------------------------------------------------------------ 255 sub pred_data {256 my $g = shift;257 my $seq_id = shift;258 259 my $g_name = $g->name();260 my $g_s = $g->nB('query');261 my $g_e = $g->nE('query');262 my $g_strand = $g->strand('query') == 1 ? '+' : '-';263 264 $g_name =~ s/\s/-/g;265 my $g_id = get_id_gene();266 267 my $source;268 if (ref($g) =~ /snap/){269 $source = 'snap';270 $g_id = join("-", $source, $seq_id, $g_id);271 }272 elsif (ref($g) =~ /augustus/){273 $source = 'augustus';274 $g_id = join("-", $source, $seq_id, $g_id);275 }276 else {277 die "unknown class:".ref($g)." in GFFV3::pred_data\n";278 }279 280 my $score = $g->score();281 282 ($g_s, $g_e) = ($g_e, $g_s) if $g_s > $g_e;283 284 285 my ($class, $type) = get_class_and_type($g, 'hit');286 287 my @g_data;288 push(@g_data, $seq_id, $class, 'gene', $g_s, $g_e, $score, $g_strand);289 push(@g_data, '.', 'ID='.$g_id.';Name='.$g_name);290 291 my $g_l = join("\t", @g_data)."\n";292 293 my @transcripts = $g;294 295 my %epl;296 foreach my $t (@transcripts){297 #my $t_id = get_id_mRNA();298 my $t_id = join(":", $t->name, get_id_mRNA());299 #-------300 my $t_s = $t->strand('query') == 1 ? '+' : '-';301 302 my ($t_b, $t_e) = PhatHit_utils::get_span_of_hit($t, 'query');303 304 ($t_b, $t_e) = ($t_e, $t_b) if $t_b > $t_e;305 306 my ($p_b, $p_e) = PhatHit_utils::get_span_of_hit($t, 'hit');307 308 my $nine = 'ID='.$t_id.';Parent='.$g_id.';Name='.$t->name;309 $nine .= ';Target='.$t->name.' '.$p_b.' '.$p_e.' '.'+';310 311 my ($class, $type) = get_class_and_type($t, 'hsp');312 313 my @t_data;314 push(@t_data, $seq_id, $class, $type, $t_b, $t_e, '.', $t_s, '.');315 push(@t_data, $nine);316 317 my $t_l = join("\t", @t_data)."\n";318 #--------319 $g_l .= $t_l;320 321 grow_exon_data_lookup($t, $t_id, \%epl);322 }323 my $e_l = get_exon_data($seq_id, \%epl, $source);324 325 $g_l .= $e_l;326 327 return $g_l;328 }329 #------------------------------------------------------------------------330 283 sub gene_data { 331 284 my $g = shift; … … 335 288 #my $g_name = join("-", "maker", $seq_id, (split("-", $g->{g_name}))[1..2]); 336 289 my $g_s = $g->{g_start}; 337 my $g_e = $g->{g_end};338 my $g_strand = $g->{g_strand} ==1 ? '+' : '-';290 my $g_e = $g->{g_end}; 291 my $g_strand = $g->{g_strand} ==1 ? '+' : '-'; 339 292 my $t_structs = $g->{t_structs}; 340 293 341 294 #my $g_id = get_id_gene(); 342 295 my $g_id = $g_name; 296 343 297 my @g_data; 344 push(@g_data, $seq_id, 'maker', 'gene', $g_s, $g_e, '.', $g_strand); 345 push(@g_data, '.', 'ID='.$g_id.';Name='.$g_name); 298 push(@g_data, $seq_id, 'maker', 'gene', $g_s, $g_e, '.', $g_strand, '.'); 299 my $attributes = 'ID='.$g_id.';Name='.$g_name; 300 $attributes .= ';'.$g->{-attrib} if($g->{attrib}); 301 push(@g_data, $attributes); 346 302 347 303 my $g_l = join("\t", @g_data)."\n"; … … 354 310 #my $t_id = get_id_mRNA(); 355 311 my $t_id = (split(/\s+/, $t->{t_name}))[0]; 356 my $t_l = 357 get_transcript_data($t, $t_id, $seq_id, $g_id, $g_name); 358 312 my $t_l = get_transcript_data($t, $t_id, $seq_id, $g_id, $g_name); 359 313 $g_l .= $t_l."\n"; 360 314 … … 398 352 my $h_id = get_id_hit(); 399 353 $h_id = join(":", $seq_id, $h_id); 400 my $score = $h->s ignificance() || 'NA';354 my $score = $h->score() || '.'; 401 355 $score .= 0 if $score eq '0.'; 402 $score = '.' if $score eq 'NA'; 403 404 my $alt_score = $h->score() || '.'; 405 $score = $alt_score 406 if ($score eq '.' && $alt_score =~ /\d/); 407 356 408 357 my @h_data; 409 push(@h_data, $seq_id, $class, $type, $h_s, $h_e, $score, $h_str); 410 push(@h_data, '.', 'ID='.$h_id.';Name='.$h_n.';Target='.$h_n.' '.$t_s.' '.$t_e.' '.$t_strand); 411 my $h_l = join("\t", @h_data)."\n"; 358 push(@h_data, $seq_id, $class, $type, $h_s, $h_e, $score, $h_str, '.'); 359 my $attributes = 'ID='.$h_id.';Name='.$h_n.';Target='.$h_n.' '.$t_s.' '.$t_e.' '.$t_strand; 360 $attributes .= $h->{-attrib} if($h->{-attrib}); 361 my $h_l = join("\t", @h_data, $attributes)."\n"; 412 362 413 363 my $sorted = PhatHit_utils::sort_hits($h); … … 456 406 my $h_id = get_id_hit(); 457 407 $h_id = join(":", $seq_id, $h_id); 458 my $score = $h->s ignificance() || 'NA';408 my $score = $h->score() || '.'; 459 409 $score .= 0 if $score eq '0.'; 460 $score = '.' if $score eq 'NA';461 462 my $alt_score = $h->score() || '.';463 $score = $alt_score464 if ($score eq '.' && $alt_score =~ /\d/);465 410 466 411 my @h_data; 467 push(@h_data, $seq_id, $class, $type, $h_s, $h_e, $score, $h_str); 468 push(@h_data, '.', 'ID='.$h_id.';Name='.$h_n.';Target='.$h_n.' '.$t_s.' '.$t_e.' '.$t_strand); 469 my $h_l = join("\t", @h_data)."\n"; 412 push(@h_data, $seq_id, $class, $type, $h_s, $h_e, $score, $h_str, '.'); 413 my $attributes = 'ID='.$h_id.';Name='.$h_n.';Target='.$h_n.' '.$t_s.' '.$t_e.' '.$t_strand; 414 $attributes .= ';'.$h->{-attrib} if($h->{-attrib}); 415 my $h_l = join("\t", @h_data, $attributes)."\n"; 470 416 471 417 my $sorted = PhatHit_utils::sort_hits($h); … … 489 435 my $k = shift; 490 436 491 my ($class) = ref($h) =~ /.*::(\S+)$/; 437 my ($class) = ref($h) =~ /.*::([^\:\s\t\n]+)$/; 438 $class = $h->algorithm if($class eq 'gff3'); 492 439 493 440 my $type; 494 if ($class eq 'blastx'){441 if ($class =~ /^blastx$/i){ 495 442 $type = $k eq 'hit' ? 'protein_match' : 'match_part'; 496 }elsif ($class eq 'tblastx'){443 }elsif ($class =~ /^tblastx$/i){ 497 444 $type = $k eq 'hit' ? 'translated_nucleotide_match' : 'match_part'; 498 445 } 499 elsif ($class eq 'protein2genome'){446 elsif ($class =~ /protein2genome/i){ 500 447 $type = $k eq 'hit' ? 'protein_match' : 'match_part'; 501 448 } 502 elsif ($class eq 'est2genome'){449 elsif ($class =~ /est2genome/i){ 503 450 $type = $k eq 'hit' ? 'expressed_sequence_match' : 'match_part'; 504 451 } 505 elsif ($class eq 'blastn'){452 elsif ($class =~ /^blastn$/i){ 506 453 $type = $k eq 'hit' ? 'expressed_sequence_match' : 'match_part' ; 507 454 } 508 elsif ( ref($h) =~ /snap/){455 elsif ($class =~ /snap/i){ 509 456 $type = $k eq 'hit' ? 'match' : 'match_part' ; 510 457 $class= 'snap'; 511 458 } 512 elsif ( ref($h) =~ /augustus/){459 elsif ($class =~ /augustus/i){ 513 460 $type = $k eq 'hit' ? 'match' : 'match_part' ; 514 461 $class= 'augustus'; 515 462 } 516 elsif (ref($h) =~ /repeatmasker/){ 463 elsif ($class =~ /fgenesh/i){ 464 $type = $k eq 'hit' ? 'match' : 'match_part' ; 465 $class= 'fgenesh'; 466 } 467 elsif ($class =~ /twinscan/i){ 468 $type = $k eq 'hit' ? 'match' : 'match_part' ; 469 $class= 'twinscan'; 470 } 471 elsif ($class =~ /repeat/i){ 517 472 $type = $k eq 'hit' ? 'match' : 'match_part'; 518 473 $class= 'repeatmasker'; 519 474 } 520 475 else { 521 die "unknown class in GFFV3::get_class_and_type :".ref($h)."\n";476 die "unknown class in GFFV3::get_class_and_type $class ".ref($h)."\n"; 522 477 } 523 478 … … 556 511 my $strand = $e->strand('query') == 1 ? '+': '-'; 557 512 558 my $score = $source eq 'maker' ? '.' : $e->score(); 513 my $score = $e->score() || '.'; 514 $score .= 0 if $score eq '0.'; 559 515 560 516 my @data; 561 push(@data, $seq_id, $source, 'exon', $nB, $nE, $score); 562 push(@data, $strand, '.','ID='.$e_id.';Parent='.$p); 517 push(@data, $seq_id, $source, 'exon', $nB, $nE, $score, $strand, '.'); 518 my $nine = 'ID='.$e_id.';Parent='.$p; 519 $nine .= ';'.$e->{-attrib} if($e->{-attrib}); 520 push(@data, $nine); 563 521 564 522 $e_l .= join("\t", @data)."\n"; … … 604 562 605 563 my @data; 606 push(@data, $seq_id, $source, 'CDS', $nB, $nE, $score); 607 push(@data, $strand, $phase,'ID='.$e_id.';Parent='.$p); 564 push(@data, $seq_id, $source, 'CDS', $nB, $nE, $score, $strand, $phase); 565 my $nine = 'ID='.$e_id.';Parent='.$p; 566 #$nine .= ';'.$e->{-attrib} if($e->{-attrib}); 567 push(@data, $nine); 608 568 609 569 $phase = (3 - (($nE - $nB + 1) % 3)) % 3; … … 690 650 $e, 691 651 $hsp->strand('query'), 692 $hsp->name() ,652 $hsp->name() 693 653 ]); 694 654 … … 723 683 my @data; 724 684 push(@data, $seq_id, 'maker', 'mRNA', $t_b, $t_e, '.', $t_s, '.'); 725 push(@data, 'ID='.$t_id.';Parent='.$g_id.';Name='.$t_name); 685 my $nine = 'ID='.$t_id.';Parent='.$g_id.';Name='.$t_name;; 686 $nine .= ';'.$t_hit->{-attrib} if($t_hit->{-attrib}); 687 push(@data, $nine); 726 688 727 689 … … 740 702 my $t_strand = $hsp->strand('hit') == -1 ? '-' : '+'; 741 703 742 my $score = $hsp->s ignificance() || 'NA';704 my $score = $hsp->score() || '.'; 743 705 $score .= 0 if $score eq '0.'; 744 $score = '.' if $score eq 'NA';745 746 my $alt_score = $hsp->score() || '.';747 $score = $alt_score748 if ( $score eq '.' && $alt_score =~ /\d/);749 750 706 751 707 my $nB = $hsp->nB('query'); … … 767 723 my $nine = 'ID='.$hsp_id.';Parent='.$hit_id.';Name='.$hsp_name; 768 724 $nine .= ';Target='.$hsp_name.' '.$tB.' '.$tE.' '.$t_strand; 769 725 $nine .= ';'.$hsp->{-attrib} if($hsp->{-attrib}); 770 726 my @data; 771 727 push(@data, $seq_id, $class, $type, $nB, $nE, $score, $hsp_str, '.'); … … 787 743 my $t_strand = $hsp->strand('hit') == -1 ? '-' : '+'; 788 744 789 my $score = $hsp->s ignificance() || 'NA';745 my $score = $hsp->score() || '.'; 790 746 $score .= 0 if $score eq '0.'; 791 $score = '.' if $score eq 'NA';792 793 my $alt_score = $hsp->score() || '.';794 $score = $alt_score795 if ( $score eq '.' && $alt_score =~ /\d/);796 797 747 798 748 my $nB = $hsp->nB('query'); … … 816 766 push(@data, $seq_id, $class, $type, $nB, $nE, $score, $hsp_str, '.'); 817 767 my $nine = 'ID='.$hsp_id.';Parent='.$hit_id.';Name='.$hsp_name; 818 $nine .= ';Target='.$hsp_name.' '.$tB.' '.$tE.' '.$t_strand; 768 $nine .= ';Target='.$hsp_name.' '.$tB.' '.$tE.' '.$t_strand; 769 $nine .= ';'.$hsp->{-attrib} if($hsp->{-attrib}); 819 770 push(@data, $nine); 820 771 lib/Fasta.pm
r89 r127 8 8 use PostData; 9 9 use FileHandle; 10 use URI::Escape; 10 11 11 12 @ISA = qw( … … 103 104 #------------------------------- SUBS ------------------------------------------ 104 105 #------------------------------------------------------------------------------- 105 sub getWantedFromMulti {106 sub getWantedFromMulti { 106 107 my $multiFasta = shift; 107 108 my $wanted = shift; 108 109 109 $/ = "\n>";110 local $/ = "\n>"; 110 111 111 112 my $fh = new FileHandle; … … 120 121 } 121 122 } 122 $/ = "\n";123 local $/ = "\n"; 123 124 $fh->close; 124 125 … … 129 130 my $fasta = shift; 130 131 131 my @fasta = split(/\n/, $fasta); 132 my $seq = ''; 132 #always work with references 133 $fasta = $$fasta while(ref($fasta) eq 'REF'); 134 my $fasta_ref = (ref($fasta) eq '') ? \$fasta : $fasta; 135 136 my @fasta = split(/\n/, $$fasta_ref); 133 137 while(my $l = shift(@fasta)){ 134 138 chomp($l); 135 139 return $l if $l =~ /^>/; 136 140 } 137 141 } 142 #----------------------------------------------------------------------------- 143 sub getSeqID { 144 my $fasta = shift; 145 146 #always work with references 147 $fasta = $$fasta while(ref($fasta) eq 'REF'); 148 my $fasta_ref = (ref($fasta) eq '') ? \$fasta : $fasta; 149 150 my $def = Fasta::getDef($fasta_ref); 151 my $seq_id = Def2SeqID($def); 152 153 return $seq_id; 154 } 155 #----------------------------------------------------------------------------- 156 sub def2SeqID { 157 my $def = shift; 158 159 my ($seq_id) = $def =~ /^>(\S+)/; 160 161 return $seq_id; 162 } 163 #----------------------------------------------------------------------------- 164 sub getSafeID { 165 my $fasta = shift; 166 167 #always work with references 168 $fasta = $$fasta while(ref($fasta) eq 'REF'); 169 my $fasta_ref = (ref($fasta) eq '') ? \$fasta : $fasta; 170 171 my $seq_id = getSeqID($fasta_ref); 172 my $safe_id = SeqID2SafeID($seq_id); 173 174 return $safe_id; 175 } 176 #----------------------------------------------------------------------------- 177 sub seqID2SafeID { 178 my $seq_id = shift; 179 180 my $safe_id = uri_escape($seq_id, 181 '\*\?\|\\\/\'\"\{\}\<\>\;\,\^\(\)\$\~\:' 182 ); 183 184 return $safe_id; 185 } 186 #----------------------------------------------------------------------------- 187 sub safeID2SeqID { 188 my $seq_id = shift; 189 190 my $safe_id = uri_unescape($seq_id); 191 192 return $safe_id; 138 193 } 139 194 #------------------------------------------------------------------------------- 140 195 sub getSeq { 196 my $fasta = shift; 197 198 return ${getSeqRef(\$fasta)}; 199 } 200 #------------------------------------------------------------------------------- 201 sub getSeqRef { 202 my $fasta = shift; 203 204 #always work with references 205 $fasta = $$fasta while(ref($fasta) eq 'REF'); 206 my $fasta_ref = (ref($fasta) eq '') ? \$fasta : $fasta; 207 208 my @fasta = split(/\n/, $$fasta_ref); 209 210 my $seq = ''; 211 while(my $l = shift(@fasta)){ 212 chomp($l); 213 next if $l =~ /^>/; 214 $seq .= $l; 215 } 216 217 return \$seq; 218 } 219 #------------------------------------------------------------------------------- 220 sub ucFasta{ 221 my $fasta = shift; 222 223 return ${ucFastaRef(\$fasta)}; 224 } 225 #------------------------------------------------------------------------------- 226 sub ucFastaRef{ 141 227 my $fasta = shift; 142 228 143 my @fasta = split(/\n/, $fasta); 144 145 my $seq = ''; 146 while(my $l = shift(@fasta)){ 147 chomp($l); 148 next if $l =~ /^>/; 149 $seq .= $l; 150 } 151 152 return \$seq; 153 229 #always work with references 230 $fasta = $$fasta while(ref($fasta) eq 'REF'); 231 my $fasta_ref = (ref($fasta) eq '') ? \$fasta : $fasta; 232 233 my $def = getDef($fasta_ref); 234 my $seq = uc(getSeq($fasta_ref)); 235 236 return toFastaRef($def, \$seq); 154 237 } 155 238 #------------------------------------------------------------------------------- 156 239 sub revComp { 240 my $seq = shift; 241 242 return ${revCompRef(\$seq)}; 243 } 244 #------------------------------------------------------------------------------- 245 sub revCompRef { 157 246 my $seq = shift; 158 247 248 #always work with references 249 $seq = $$seq while(ref($seq) eq 'REF'); 250 my $seq_ref = (ref($seq) eq '') ? \$seq : $seq; 251 159 252 my($rc); 160 253 161 $rc = $ seq;254 $rc = $$seq_ref; 162 255 $rc =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX 163 256 /tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/; 164 257 $rc = reverse scalar ($rc); 165 258 166 return($rc); 167 259 return \$rc; 168 260 } 169 261 #------------------------------------------------------------------------------- 170 262 sub toFasta { 263 my $def = shift; 264 my $seq = shift; 265 266 return ${toFastaRef($def, \$seq)}; 267 } 268 #------------------------------------------------------------------------------- 269 sub toFastaRef { 171 270 my $def = shift; 172 271 my $seq = shift; 173 272 273 #always work with references 274 $seq = $$seq while(ref($seq) eq 'REF'); 275 my $seq_ref = (ref($seq) eq '') ? \$seq : $seq; 276 174 277 my $fasta = $def."\n"; 175 278 176 $fasta .= formatSeq($seq, 60);279 $fasta .= ${_formatSeq($seq_ref, 60)}; 177 280 return \$fasta; 178 179 } 180 #------------------------------------------------------------------------------- 181 sub formatSeq { 281 } 282 #------------------------------------------------------------------------------- 283 sub _formatSeq { 182 284 my $seq = shift; 183 285 my $l = shift; 184 286 185 my $fasta = ''; 186 for (my $i=0; $i< length($$seq);$i+=$l){ 187 $fasta .= substr($$seq, $i, $l)."\n"; 188 } 189 return $fasta; 190 287 #always work with references 288 $seq = $$seq while(ref($seq) eq 'REF'); 289 my $seq_ref = (ref($seq) eq '') ? \$seq : $seq; 290 291 my $f_seq = ''; 292 for (my $i=0; $i< length($$seq_ref);$i+=$l){ 293 $f_seq .= substr($$seq_ref, $i, $l)."\n"; 294 } 295 296 return \$f_seq; 191 297 } 192 298 #------------------------------------------------------------------------------- … … 195 301 my $seq = shift; 196 302 303 #always work with references 304 $seq = $$seq while(ref($seq) eq 'REF'); 305 my $seq_ref = (ref($seq) eq '') ? \$seq : $seq; 306 197 307 my $fasta = $def."\n"; 198 308 199 my $bpos = "0 " x length($$seq );309 my $bpos = "0 " x length($$seq_ref); 200 310 201 311 for (my $i=0; $i< length($bpos);$i+=60){ 202 312 $fasta .= substr($bpos, $i, 60)."\n"; 203 313 } 204 return \$fasta; 205 314 315 return $fasta; 206 316 } 207 317 … … 211 321 my $seq = shift; 212 322 213 $$seq =~ s/^\s+//; 214 215 my @values = split(/\s+/, $$seq); 323 #always work with references 324 $seq = $$seq while(ref($seq) eq 'REF'); 325 my $seq_ref = (ref($seq) eq '') ? \$seq : $seq; 326 327 $$seq_ref =~ s/^\s+//; 328 329 my @values = split(/\s+/, $$seq_ref); 216 330 my $fasta = $def."\n"; 217 331 … … 233 347 } 234 348 } 235 $fasta .= "\n" unless $fasta =~ /\n$/; 236 237 return \$fasta; 238 349 $fasta .= "\n" unless $fasta =~ /\n$/; 350 351 return $fasta; 239 352 } 240 353 #------------------------------------------------------------------------------- lib/FastaChunker.pm
r89 r127 45 45 my $parent_seq = Fasta::getSeq($self->parent_fasta); 46 46 47 $self->parent_seq_length(length($ $parent_seq));47 $self->parent_seq_length(length($parent_seq)); 48 48 my $fasta = ''; 49 49 … … 60 60 61 61 my $chunk = new FastaChunk(); 62 $chunk->seq(substr($ $parent_seq, $i, $l));62 $chunk->seq(substr($parent_seq, $i, $l)); 63 63 $chunk->def($def); 64 64 $chunk->parent_def($parent_def); 65 $chunk->seqid(Fasta::def2SeqID($parent_def)); 65 66 $chunk->size($l); #the max size of a chunk 66 67 $chunk->length(length($chunk->seq())); #the actual size of a chunk lib/FastaFile.pm
r89 r127 48 48 #----------------------------------------------------------------------------- 49 49 sub writeFile { 50 my $f asta= shift;50 my $f = shift; 51 51 my $loc = shift; 52 53 my $fasta = (ref($f) eq '') ? \$f : $f; 52 54 53 55 my $fh = new FileHandle(); lib/Iterator.pm
r89 r127 30 30 31 31 my $fh = $self->fileHandle(); 32 while(my $line = <$fh>){ 32 my $line; 33 while($line = <$fh>){ 33 34 $self->offsetInFile(1); 34 35 return $line; 35 36 } 36 37 $fh->close(); 37 return 0;38 return undef; 38 39 } 39 40 #------------------------------------------------------------------------------- lib/Iterator/Fasta.pm
r89 r127 26 26 bless $self; 27 27 28 $self-> set_number_of_entries($arg);28 $self->_set_number_of_entries($arg); 29 29 30 30 $self->fileHandle($arg); … … 33 33 } 34 34 #------------------------------------------------------------------------------- 35 sub set_number_of_entries {35 sub _set_number_of_entries { 36 36 my $self = shift; 37 37 my $arg = shift; … … 41 41 42 42 my $i = 0; 43 while (my $line = <$fh>){ 43 my $line; 44 while ($line = <$fh>){ 44 45 $i++ if $line =~ /^>/; 45 46 } … … 58 59 59 60 while (my $query = $self->nextEntry()){ 60 my $query_def = Fasta::getDef($query); 61 62 my ($id) = $query_def =~ />(\S+)/; 63 61 my ($id) = Fasta::getSeqID(\$query); 64 62 $hash{$id} = $query; 65 63 } … … 79 77 return undef; 80 78 } 81 82 while( my$line = <$fh>){79 my $line; 80 while($line = <$fh>){ 83 81 $line =~ s/>//; 84 82 $line =~ s/>$//; … … 91 89 $fh->close(); 92 90 return undef; 91 } 92 #------------------------------------------------------------------------------- 93 sub nextFasta {#alias to nextEntry 94 my $self = shift; 95 return $self->nextEntry; 93 96 } 94 97 #------------------------------------------------------------------------------- lib/PhatHit_utils.pm
r108 r127 255 255 '-algorithm' => $hit->algorithm, 256 256 '-length' => $hit->length, 257 '-score' => $hsp->score, 258 '-bits' => $hsp->bits, 259 '-significance' => $hsp->significance 257 260 ); 258 261 … … 307 310 } 308 311 elsif ($action eq 'revq' || $action eq 'both'){ 309 push(@args, Fasta::revComp($q_s)) 310 unless ref($hsp) =~ /blastp/ 311 || ref($hsp) =~ /tblast/; 312 if (ref($hsp) =~ /blastp|tblastn|blastx/ && ref($hsp) !~ /tblastx/){ 313 push(@args, $q_s); 314 } 315 else{ 316 push(@args, Fasta::revComp($q_s)); 317 } 312 318 } 313 319 … … 330 336 } 331 337 elsif ($action eq 'revh' || $action eq 'both'){ 332 push(@args, Fasta::revComp($h_s)) 333 unless ref($hsp) =~ /blastp/ 334 || ref($hsp) =~ /tblast/ 335 || ref($hsp) =~ /protein/; 338 if(ref($hsp) =~ /blastp|tblastn|blastx/ && ref($hsp) !~ /tblastx/){ 339 push(@args, $h_s); 340 } 341 else{ 342 push(@args, Fasta::revComp($h_s)) 343 } 336 344 } 337 345 338 346 push(@args, '-hsp_length'); 339 347 push(@args, $hsp->query->length); 340 341 348 342 my $spaces = $hsp->homology_string =~ tr/ / /;343 my $midd = length($hsp->homology_string) - $spaces;349 #my $spaces = $hsp->homology_string =~ tr/ / /; 350 #my $midd = length($hsp->homology_string) - $spaces; 344 351 345 352 push(@args, '-identical'); 346 push(@args, $ midd);353 push(@args, $hsp->num_identical); 347 354 348 355 push(@args, '-hit_length'); … … 371 378 372 379 push(@args, '-conserved'); 373 push(@args, $ midd);380 push(@args, $hsp->num_conserved); 374 381 375 382 push(@args, '-hit_name'); … … 383 390 384 391 push(@args, '-hit_gaps'); 385 push(@args, $hsp->gaps('hit')); 386 392 push(@args, $hsp->gaps('hit')); 393 387 394 return \@args; 388 395 … … 421 428 if defined($hsp->queryName); 422 429 423 424 430 my $n_q_s = $hsp->strand('query'); 425 431 my $n_h_s = $hsp->strand('hit'); 426 432 427 433 if ($what eq 'both') { 428 $n_q_s = -1*$n_q_s;429 $n_h_s = -1*$n_h_s;434 $n_q_s = -1*$n_q_s; 435 $n_h_s = -1*$n_h_s; 430 436 } 431 437 elsif ($what eq 'revq'){ 432 $n_q_s = -1*$n_q_s;438 $n_q_s = -1*$n_q_s; 433 439 } 434 440 elsif ($what eq 'revh'){ 435 $n_h_s = -1*$n_h_s;441 $n_h_s = -1*$n_h_s; 436 442 } 437 443 … … 586 592 $hsp->{'_sequenceschanged'} = 1; 587 593 } 594 $f->{nB}{query} += $offset if (defined($f->{nB}) && defined($f->{nB}{query})); 595 $f->{nE}{query} += $offset if (defined($f->{nE}) && defined($f->{nE}{query})); 588 596 $f->{'_sequenceschanged'} = 1; 589 597 } lib/Widget/RepeatMasker.pm
r109 r127 10 10 use Widget; 11 11 use Bio::DB::Fasta; 12 use repeatmasker::PhatHit;13 use repeatmasker::PhatHsp;12 use Bio::Search::Hit::PhatHit::repeatmasker; 13 use Bio::Search::HSP::PhatHSP::repeatmasker; 14 14 use IPC::Open3; 15 15 … … 221 221 push(@args, $h_gaps); 222 222 223 my $hsp = new repeatmasker::PhatHsp(@args);223 my $hsp = Bio::Search::HSP::PhatHSP::repeatmasker->new(@args); 224 224 $hsp->queryName($q_name); 225 225 #------------------------------------------------- … … 250 250 foreach my $key (keys %hsps){ 251 251 my $f = 252 new repeatmasker::PhatHit('-name' => $q_name,253 '-description' => 'NA',254 '-algorithm' => 'repeat_masker',255 '-length' => $q_length,256 );252 Bio::Search::Hit::PhatHit::repeatmasker->new('-name' => $q_name, 253 '-description' => 'NA', 254 '-algorithm' => 'repeat_masker', 255 '-length' => $q_length, 256 ); 257 257 258 258 $f->queryLength($q_length); lib/Widget/augustus.pm
r109 r127 12 12 use FastaFile; 13 13 use Iterator::Fasta; 14 use augustus::PhatHit;15 use augustus::PhatHsp;14 use Bio::Search::Hit::PhatHit::augustus; 15 use Bio::Search::HSP::PhatHSP::augustus; 16 16 use PhatHit_utils; 17 17 use IPC::Open3; … … 37 37 } 38 38 #------------------------------------------------------------------------ 39 sub get_ aug_shot {39 sub get_pred_shot { 40 40 my $seq = shift; 41 41 my $def = shift; … … 45 45 my $pred_flank = shift; 46 46 my $pred_command = shift; 47 my $q_id = shift;48 47 $OPT_F = shift; 49 48 $LOG = shift; 49 50 my $q_id = Fasta::def2SeqID($def); 50 51 51 52 my ($shadow_seq, $strand, $offset, $xdef) = … … 65 66 } 66 67 67 my $gene_preds = augustus($ $shadow_fasta,68 my $gene_preds = augustus($shadow_fasta, 68 69 $the_void, 69 70 $id, … … 87 88 my $gomiph = $set->{gomiph}; 88 89 my $mia = $set->{mia}; 89 my $augs = $set->{preds}; 90 my $models = $set->{gomod}; 91 my $alts = $set->{alt_ests}; 92 my $preds = $set->{preds}; 90 93 my $ests = $set->{ests}; 91 94 my @t_data; 92 95 93 96 push(@t_data, @{$gomiph}) if defined($gomiph); 94 push(@t_data, @{$ augs}) if defined($augs);97 push(@t_data, @{$preds}) if defined($preds); 95 98 push(@t_data, $mia) if defined($mia); 99 push(@t_data, @{$alts}) if defined($alts); 100 push(@t_data, @{$models}) if defined($models); 96 101 push(@t_data, @{$ests}) if defined($ests); 97 102 … … 434 439 435 440 my $def = Fasta::getDef($fasta); 436 my $q_seq = Fasta::getSeq ($fasta);441 my $q_seq = Fasta::getSeqRef($fasta); 437 442 438 443 my ($q_name) = $def =~ /^>(.+)/; … … 509 514 my %hsps; 510 515 my $i = 0; 511 my $f = new augustus::PhatHit('-name' => $g->{name},512 '-description' => 'NA',513 '-algorithm' => 'augustus',514 '-length' => $q_len,515 '-score' => $total_score,516 );517 516 my $f = new Bio::Search::Hit::PhatHit::augustus('-name' => $g->{name}, 517 '-description' => 'NA', 518 '-algorithm' => 'augustus', 519 '-length' => $q_len, 520 '-score' => $total_score, 521 ); 522 518 523 $f->queryLength($q_len); 519 524 … … 588 593 push(@args, 0); 589 594 590 my $hsp = new augustus::PhatHsp(@args);595 my $hsp = new Bio::Search::HSP::PhatHSP::augustus(@args); 591 596 $hsp->queryName($q_name); 592 597 #------------------------------------------------- lib/Widget/blastn.pm
r109 r127 111 111 next unless PhatHit_utils::is_contigous($hit); 112 112 113 #fix strand for messed up ncbi blast 114 $hit = PhatHit_utils::copy($hit, 'both') if ($hit->strand('hit') < 0); 115 113 116 push(@keepers, $hit) if $hit->hsps(); 114 117 } lib/Widget/blastx.pm
r109 r127 107 107 $significance = 0 if $significance =~ /0\./; 108 108 next unless $significance < $params->{significance}; 109 110 109 #next unless $hit->pAh > $params->{percov}; 111 110 #next unless $hit->hsp('best')->frac_identical() > $params->{percid}; 112 111 #next unless PhatHit_utils::is_contigous($hit); 113 112 113 #fix strand for messed up ncbi blast 114 $hit = PhatHit_utils::copy($hit, 'both') if ($hit->strand('hit') < 0); 115 114 116 push(@keepers, $hit) if $hit->hsps(); 115 117 } lib/Widget/exonerate/est2genome.pm
r89 r127 9 9 use FileHandle; 10 10 use Widget::exonerate; 11 use exonerate::PhatHit::est2genome;12 use exonerate::PhatHSP::est2genome;11 use Bio::Search::Hit::PhatHit::est2genome; 12 use Bio::Search::HSP::PhatHSP::est2genome; 13 13 use PhatHit_utils; 14 14 use exonerate::splice_info; … … 593 593 594 594 my $phat_hit = 595 new exonerate::PhatHit::est2genome( 596 '-name' => $v->{q_id}, 597 '-description' => $v->{q_id}, 598 '-algorithm' => 'exonerate::est2genome', 599 '-length' => $q_seq_len, 600 ); 595 new Bio::Search::Hit::PhatHit::est2genome('-name' => $v->{q_id}, 596 '-description' => $v->{q_id}, 597 '-algorithm' => 'exonerate::est2genome', 598 '-length' => $q_seq_len, 599 ); 601 600 602 601 $phat_hit->queryLength($t_seq_len); … … 608 607 #next unless $i ==2; 609 608 my $args = load_args($exon, $v); 610 my $hsp = new exonerate::PhatHSP::est2genome(@{$args});609 my $hsp = new Bio::Search::HSP::PhatHSP::est2genome(@{$args}); 611 610 $hsp->queryName($v->{q_id}); 612 611 #------------------------------------------------- … … 872 871 push(@args, '-hit_gaps'); 873 872 push(@args, $exon->{q_gaps}); 874 875 873 876 874 return \@args; lib/Widget/exonerate/protein2genome.pm
r89 r127 9 9 use FileHandle; 10 10 use Widget::exonerate; 11 use exonerate::PhatHit::protein2genome;12 use exonerate::PhatHSP::protein2genome;11 use Bio::Search::Hit::PhatHit::protein2genome; 12 use Bio::Search::HSP::PhatHSP::protein2genome; 13 13 @ISA = qw( 14 14 Widget::exonerate … … 343 343 344 344 my $phat_hit = 345 new exonerate::PhatHit::protein2genome( 346 '-name' => $v->{q_id}, 347 '-description' => $v->{q_id}, 348 '-algorithm' => 'exonerate::protein2genome', 349 '-length' => $q_seq_len, 350 ); 345 new Bio::Search::Hit::PhatHit::protein2genome('-name' => $v->{q_id}, 346 '-description' => $v->{q_id}, 347 '-algorithm' => 'exonerate::protein2genome', 348 '-length' => $q_seq_len, 349 ); 351 350 352 351 $phat_hit->queryLength($t_seq_len); … … 358 357 #next unless $i ==3; 359 358 my $args = load_args($exon, $v); 360 my $hsp = new exonerate::PhatHSP::protein2genome(@{$args});359 my $hsp = new Bio::Search::HSP::PhatHSP::protein2genome(@{$args}); 361 360 $hsp->queryName($v->{q_id}); 362 361 #------------------------------------------------- lib/Widget/fgenesh.pm
r118 r127 4 4 package Widget::fgenesh; 5 5 use strict; 6 use lib '~/maker/lib'; 7 use lib '/data1/hao/projects/MAKER-fgenesh/lib'; 8 6 9 use vars qw/@ISA/; 7 10 use PostData; … … 12 15 use Iterator::Fasta; 13 16 use PhatHit_utils; 14 use fgenesh::PhatHit;15 use fgenesh::PhatHsp;16 17 use IPC::Open3; 18 use Bio::Search::Hit::PhatHit::fgenesh; 19 use Bio::Search::HSP::PhatHSP::fgenesh; 17 20 18 21 @ISA = qw( 19 22 Widget 20 23 ); 21 my $OPT_F; 24 my $OPT_F; 25 my $LOG; 22 26 #------------------------------------------------------------------------------ 23 27 #--------------------------------- METHODS ------------------------------------ … … 34 38 35 39 #------------------------------------------------------------------------------- 40 41 42 43 #------------------------------------------------------------------------------- 36 44 #------------------------------ FUNCTIONS -------------------------------------- 37 45 #------------------------------------------------------------------------------- … … 47 55 } 48 56 $fh->close(); 57 49 58 } 50 59 #------------------------------------------------------------------------------- … … 56 65 my $gomiph = $set->{gomiph}; 57 66 my $mia = $set->{mia}; 58 my $augs = $set->{preds}; 67 my $models = $set->{gomod}; 68 my $alts = $set->{alt_ests}; 69 my $preds = $set->{preds}; 59 70 my $ests = $set->{ests}; 60 71 my @t_data; 61 72 62 73 push(@t_data, @{$gomiph}) if defined($gomiph); 63 push(@t_data, @{$ augs}) if defined($augs);74 push(@t_data, @{$preds}) if defined($preds); 64 75 push(@t_data, $mia) if defined($mia); 76 push(@t_data, @{$alts}) if defined($alts); 77 push(@t_data, @{$models}) if defined($models); 65 78 push(@t_data, @{$ests}) if defined($ests); 66 79 … … 122 135 my $self = shift; 123 136 my $command = shift; 124 125 if (defined($command)){126 $self->print_command($command);127 my $pid = open3(\*CHLD_IN, \*CHLD_OUT, \*CHLD_ERR, $command);128 local $/ = \1;129 while (my $line = <CHLD_ERR>){130 print STDERR $line unless($main::quiet);131 }132 waitpid $pid, 0;133 die "ERROR: Augustusfailed\n" if $? > 0;134 }135 else {136 die "you must give Widget::fgenesh a command to run!\n";137 }137 138 if (defined($command)){ 139 $self->print_command($command); 140 my $pid = open3(\*CHLD_IN, \*CHLD_OUT, \*CHLD_ERR, $command); 141 local $/ = \1; #read in everytime a byte becomes available 142 while (my $line = <CHLD_ERR>){ 143 print STDERR $line unless($main::quiet); 144 } 145 waitpid $pid, 0; 146 die "ERROR: FgenesH failed\n" if $? > 0; 147 } 148 else { 149 die "you must give Widget::fgenesh a command to run!\n"; 150 } 138 151 } 139 152 #------------------------------------------------------------------------------- … … 147 160 148 161 my $def = Fasta::getDef($fasta); 149 my $q_seq = Fasta::getSeq ($fasta);162 my $q_seq = Fasta::getSeqRef($fasta); 150 163  
