Changeset 277
- Timestamp:
- 11/04/09 09:39:38 (3 weeks ago)
- Files:
-
- MPI/mpi_maker (modified) (1 diff)
- bin/maker (modified) (1 diff)
- lib/GI.pm (modified) (10 diffs)
- lib/PhatHit_utils.pm (modified) (3 diffs)
- lib/Process/MpiChunk.pm (modified) (26 diffs)
- lib/Widget/exonerate/est2genome.pm (modified) (1 diff)
- lib/runlog.pm (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
MPI/mpi_maker
r256 r277 161 161 -RM_off|R Turns all repeat masking off. 162 162 163 -retry <integer> Rerun failed contigs up to the specified count.163 -retry|r <integer> Rerun failed contigs up to the specified count. 164 164 165 165 -cpus|c <integer> Tells how many cpus to use for BLAST analysis. bin/maker
r256 r277 131 131 -RM_off|R Turns all repeat masking off. 132 132 133 -retry <integer> Rerun failed contigs up to the specified count.133 -retry|r <integer> Rerun failed contigs up to the specified count. 134 134 135 135 -cpus|c <integer> Tells how many cpus to use for BLAST analysis. lib/GI.pm
r275 r277 89 89 sub merge_resolve_hits{ 90 90 my $fasta = shift @_; 91 my $fasta_t_index = shift @_; 92 my $fasta_p_index = shift @_; 93 my $fasta_a_index = shift @_; 94 my $blastn_keepers = shift @_; 95 my $blastx_keepers = shift @_; 96 my $tblastx_keepers = shift @_; 97 my $blastn_holdovers = shift @_; 98 my $blastx_holdovers = shift @_; 99 my $tblastx_holdovers = shift @_; 91 my $fasta_index = shift @_; 92 my $blast_keepers = shift @_; 93 my $blast_holdovers = shift @_; 100 94 my $the_void = shift @_; 101 95 my %CTL_OPT = %{shift @_}; 96 my $type = shift @_; #blastn, blastx, or tblastx 102 97 my $LOG = shift @_; 103 98 104 PhatHit_utils::merge_hits($blast n_keepers,105 $blast n_holdovers,99 PhatHit_utils::merge_hits($blast_keepers, 100 $blast_holdovers, 106 101 $CTL_OPT{split_hit}, 107 102 ); 108 @{$blastn_holdovers} = (); 109 110 PhatHit_utils::merge_hits($blastx_keepers, 111 $blastx_holdovers, 112 $CTL_OPT{split_hit}, 113 ); 114 @{$blastx_holdovers} = (); 115 116 PhatHit_utils::merge_hits($tblastx_keepers, 117 $tblastx_holdovers, 118 $CTL_OPT{split_hit}, 119 ); 120 @{$tblastx_holdovers} = (); 121 122 $blastn_keepers = reblast_merged_hits($fasta, 123 $blastn_keepers, 124 $fasta_t_index, 125 $the_void, 126 'blastn', 127 \%CTL_OPT, 128 $LOG 103 @{$blast_holdovers} = (); 104 105 $blast_keepers = reblast_merged_hits($fasta, 106 $blast_keepers, 107 $fasta_index, 108 $the_void, 109 $type, 110 \%CTL_OPT, 111 $LOG 129 112 ); 130 113 131 $blastx_keepers = reblast_merged_hits($fasta, 132 $blastx_keepers, 133 $fasta_p_index, 134 $the_void, 135 'blastx', 136 \%CTL_OPT, 137 $LOG 138 ); 139 140 $tblastx_keepers = reblast_merged_hits($fasta, 141 $tblastx_keepers, 142 $fasta_a_index, 143 $the_void, 144 'tblastx', 145 \%CTL_OPT, 146 $LOG 147 ); 148 149 return ($blastn_keepers, $blastx_keepers, $tblastx_keepers); 114 return $blast_keepers; 150 115 } 151 116 #----------------------------------------------------------------------------- … … 300 265 #----------------------------------------------------------------------------- 301 266 sub process_the_chunk_divide{ 302 my $chunk = shift @_; 303 my $split_hit = shift @_; 304 my $hit_groups = \@_; #processed and returned in order given by user 305 306 my $phat_hits; 307 308 foreach my $group (@{$hit_groups}) { 309 push(@{$phat_hits}, @{$group}); 310 } 311 312 my ($p_hits, $m_hits) = PhatHit_utils::seperate_by_strand('query', $phat_hits); 313 my $p_coors = PhatHit_utils::to_begin_and_end_coors($p_hits, 'query'); 314 my $m_coors = PhatHit_utils::to_begin_and_end_coors($m_hits, 'query'); 315 316 foreach my $p_coor (@{$p_coors}) { 317 $p_coor->[0] -= $chunk->offset(); 318 $p_coor->[1] -= $chunk->offset(); 319 #fix coordinates for hits outside of chunk end 320 $p_coor->[0] = $chunk->length if($p_coor->[0] > $chunk->length); 321 $p_coor->[1] = $chunk->length if($p_coor->[1] > $chunk->length); 322 #fix coordinates for hits outside of chunk begin 323 $p_coor->[0] = 0 if($p_coor->[0] < 0); 324 $p_coor->[1] = 0 if($p_coor->[1] < 0); 325 } 326 foreach my $m_coor (@{$m_coors}) { 327 $m_coor->[0] -= $chunk->offset(); 328 $m_coor->[1] -= $chunk->offset(); 329 #fix coordinates for hits outside of chunk end 330 $m_coor->[0] = $chunk->length if($m_coor->[0] > $chunk->length); 331 $m_coor->[1] = $chunk->length if($m_coor->[1] > $chunk->length); 332 #fix coordinates for hits outside of chunk begin 333 $m_coor->[0] = 0 if($m_coor->[0] < 0); 334 $m_coor->[1] = 0 if($m_coor->[1] < 0); 335 } 336 337 my $p_pieces = Shadower::getPieces(\($chunk->seq), $p_coors, 10); 338 $p_pieces = [sort {$b->{e} <=> $a->{e}} @{$p_pieces}]; 339 my $m_pieces = Shadower::getPieces(\($chunk->seq), $m_coors, 10); 340 $m_pieces = [sort {$b->{e} <=> $a->{e}} @{$m_pieces}]; 341 342 my @keepers; 343 my @holdovers; 344 345 my $cutoff = $chunk->length + $chunk->offset - $split_hit; 346 my $p_cutoff = $chunk->length + $chunk->offset + 1; 347 my $m_cutoff = $chunk->length + $chunk->offset + 1; 348 349 foreach my $p_piece (@{$p_pieces}) { 350 if ($p_piece->{e} + $chunk->offset >= $cutoff) { 351 $p_cutoff = $p_piece->{b} + $chunk->offset; 352 } 353 } 354 foreach my $m_piece (@{$m_pieces}) { 355 if ($m_piece->{e} + $chunk->offset >= $cutoff) { 356 $m_cutoff = $m_piece->{b} + $chunk->offset; 357 } 358 } 359 360 #too small, all are heldover for next round 361 if ($p_cutoff <= 1 + $chunk->offset && 362 $m_cutoff <= 1 + $chunk->offset) { 363 foreach my $g (@{$hit_groups}){ 364 push (@holdovers, $g); 365 push (@keepers, []); 366 } 367 return @holdovers, @keepers; 368 } 369 370 foreach my $group (@{$hit_groups}) { 371 my $group_keepers = []; 372 my $group_holdovers = []; 373 374 foreach my $hit (@{$group}) { 375 my $b = $hit->nB('query'); 376 my $e = $hit->nE('query'); 377 my $strand = $hit->strand; 378 379 ($b, $e) = ($e, $b) if $b > $e; 380 381 if (($e < $p_cutoff && $strand eq '1' && $p_cutoff > $chunk->offset +1) || 382 ($e < $m_cutoff && $strand eq '-1' && $m_cutoff > $chunk->offset +1) 383 ) { 384 push(@{$group_keepers}, $hit); 385 } 386 else { 387 push(@{$group_holdovers}, $hit); 388 } 389 } 390 391 push(@keepers, $group_keepers); 392 push(@holdovers, $group_holdovers); 393 } 394 395 #hit holdovers and keepers are returned in same order given by user 396 return @holdovers, @keepers; 397 } 398 #----------------------------------------------------------------------------- 399 sub process_the_chunk_divide_temp{ 400 my $chunk = shift @_; 401 my $split_hit = shift @_; 402 my $hit_groups = \@_; #processed and returned in order given by user 403 404 my $p_hits; 405 406 foreach my $group (@{$hit_groups}) { 407 push(@{$p_hits}, @{$group}); 408 } 409 410 my $coors = PhatHit_utils::to_begin_and_end_coors($p_hits, 'query'); 411 412 foreach my $coor (@{$coors}) { 413 $coor->[0] -= $chunk->offset(); 414 $coor->[1] -= $chunk->offset(); 415 #fix coordinates for hits outside of chunk end 416 $coor->[0] = $chunk->length if($coor->[0] > $chunk->length); 417 $coor->[1] = $chunk->length if($coor->[1] > $chunk->length); 418 #fix coordinates for hits outside of chunk begin 419 $coor->[0] = 0 if($coor->[0] < 0); 420 $coor->[1] = 0 if($coor->[1] < 0); 421 } 422 423 my $pieces = Shadower::getPieces(\($chunk->seq), $coors, 10); 424 $pieces = [sort {$b->{e} <=> $a->{e}} @{$pieces}]; 425 426 my @keepers; 427 my @holdovers; 428 429 my $cutoff = $chunk->length + $chunk->offset - $split_hit; 430 my $p_cutoff = $chunk->length + $chunk->offset + 1; 431 432 foreach my $piece (@{$pieces}) { 433 if ($piece->{e} + $chunk->offset >= $cutoff) { 434 $p_cutoff = $piece->{b} + $chunk->offset; 435 } 436 } 437 438 #too small, all are heldover for next round 439 if ($p_cutoff <= 1 + $chunk->offset) { 440 foreach my $g (@{$hit_groups}){ 441 push (@holdovers, $g); 442 push (@keepers, []); 443 } 444 return @holdovers, @keepers; 445 } 446 447 foreach my $group (@{$hit_groups}) { 448 my $group_keepers = []; 449 my $group_holdovers = []; 450 451 foreach my $hit (@{$group}) { 452 my $b = $hit->nB('query'); 453 my $e = $hit->nE('query'); 454 my $strand = $hit->strand; 455 456 ($b, $e) = ($e, $b) if $b > $e; 457 458 if (($e < $p_cutoff && $p_cutoff > $chunk->offset +1)) { 459 push(@{$group_keepers}, $hit); 460 } 461 else { 462 push(@{$group_holdovers}, $hit); 463 } 464 } 465 466 push(@keepers, $group_keepers); 467 push(@holdovers, $group_holdovers); 468 } 469 470 #hit holdovers and keepers are returned in same order given by user 471 return @holdovers, @keepers; 472 } 473 474 #----------------------------------------------------------------------------- 267 my $chunk = shift @_; 268 my $split_hit = shift @_; 269 my $s_flag = shift @_; #indicates whether to treat strands independantly 270 my $groups_cfh = shift @_; #group to cluster and find holdovers 271 272 my $phat_hits; 273 274 foreach my $group (@{$groups_cfh}) { 275 push(@{$phat_hits}, @{$group}); 276 } 277 278 my $p_hits = []; 279 my $m_hits = []; 280 281 #seperate by strand or not (this makes chunk cutoffs strand independant) 282 if($s_flag){ 283 ($p_hits, $m_hits) = PhatHit_utils::seperate_by_strand('query', $phat_hits, 1); #exonerate flag set 284 } 285 else{ 286 $p_hits = $phat_hits; 287 } 288 289 my $p_coors = PhatHit_utils::to_begin_and_end_coors($p_hits, 'query'); 290 my $m_coors = PhatHit_utils::to_begin_and_end_coors($m_hits, 'query'); 291 292 foreach my $p_coor (@{$p_coors}) { 293 $p_coor->[0] -= $chunk->offset(); 294 $p_coor->[1] -= $chunk->offset(); 295 #fix coordinates for hits outside of chunk end 296 $p_coor->[0] = $chunk->length if($p_coor->[0] > $chunk->length); 297 $p_coor->[1] = $chunk->length if($p_coor->[1] > $chunk->length); 298 #fix coordinates for hits outside of chunk begin 299 $p_coor->[0] = 0 if($p_coor->[0] < 0); 300 $p_coor->[1] = 0 if($p_coor->[1] < 0); 301 } 302 foreach my $m_coor (@{$m_coors}) { 303 $m_coor->[0] -= $chunk->offset(); 304 $m_coor->[1] -= $chunk->offset(); 305 #fix coordinates for hits outside of chunk end 306 $m_coor->[0] = $chunk->length if($m_coor->[0] > $chunk->length); 307 $m_coor->[1] = $chunk->length if($m_coor->[1] > $chunk->length); 308 #fix coordinates for hits outside of chunk begin 309 $m_coor->[0] = 0 if($m_coor->[0] < 0); 310 $m_coor->[1] = 0 if($m_coor->[1] < 0); 311 } 312 313 my $p_pieces = Shadower::getPieces(\$chunk->seq, $p_coors, 10); 314 $p_pieces = [sort {$b->{e} <=> $a->{e}} @{$p_pieces}]; 315 my $m_pieces = Shadower::getPieces(\$chunk->seq, $m_coors, 10); 316 $m_pieces = [sort {$b->{e} <=> $a->{e}} @{$m_pieces}]; 317 318 my $cutoff = $chunk->length + $chunk->offset - $split_hit; 319 my $p_cutoff = $chunk->length + $chunk->offset + 1; 320 my $m_cutoff = $chunk->length + $chunk->offset + 1; 321 322 my @keepers; 323 my @holdovers; 324 325 #no internal cutoff if this is the last contig 326 $cutoff = $chunk->length + $chunk->offset + 1 if($chunk->is_last); 327 328 #adjust cutoff to overlapping hits 329 foreach my $p_piece (@{$p_pieces}) { 330 if ($p_piece->{e} + $chunk->offset >= $cutoff) { 331 $p_cutoff = $p_piece->{b} + $chunk->offset; 332 } 333 } 334 foreach my $m_piece (@{$m_pieces}) { 335 if ($m_piece->{e} + $chunk->offset >= $cutoff) { 336 $m_cutoff = $m_piece->{b} + $chunk->offset; 337 } 338 } 339 340 #too small, all are heldover for next round 341 if ($p_cutoff <= 1 + $chunk->offset && 342 $m_cutoff <= 1 + $chunk->offset) { 343 foreach my $g (@{$groups_cfh}){ 344 push (@holdovers, $g); 345 push (@keepers, []); 346 } 347 return @keepers, @holdovers; 348 } 349 350 #seperate holdovers and keepers 351 foreach my $group (@{$groups_cfh}) { 352 my $group_keepers = []; 353 my $group_holdovers = []; 354 355 foreach my $hit (@{$group}) { 356 my $b = $hit->nB('query'); 357 my $e = $hit->nE('query'); 358 my $strand = $hit->strand; 359 360 #exonerate counterpart check (blastn with flipped exonerate) 361 $strand *= -1 if ($hit->{_exonerate_flipped}); 362 363 #if stands are not being treated independantly, treat all as plus strand 364 $strand = 1 if (!$s_flag); 365 366 ($b, $e) = ($e, $b) if $b > $e; 367 368 if (($strand eq '1' && $e < $p_cutoff && $p_cutoff > $chunk->offset +1) || 369 ($strand eq '-1' && $e < $m_cutoff && $m_cutoff > $chunk->offset +1) 370 ) { 371 $hit->{_holdover} = 0; 372 push(@{$group_keepers}, $hit); 373 } 374 else { 375 $hit->{_holdover} = 1; 376 push(@{$group_holdovers}, $hit); 377 } 378 } 379 380 push(@keepers, $group_keepers); 381 push(@holdovers, $group_holdovers); 382 } 383 384 #keepers and hit holdovers are returned in same order given by user 385 return @keepers, @holdovers; 386 } 387 #----------------------------------------------------------------------------- 388 475 389 # sub write_quality_data { 476 390 # my $quality_indices = shift; … … 1099 1013 #----------------------------------------------------------------------------- 1100 1014 sub polish_exonerate { 1101 my $g_fasta = shift; 1102 my $phat_hit_clusters = shift; 1103 my $db_index = shift; 1104 my $the_void = shift; 1105 my $depth = shift; 1106 my $type = shift; 1107 my $exonerate = shift; 1108 my $pcov = shift; 1109 my $pid = shift; 1110 my $score_limit = shift; 1111 my $matrix = shift; 1112 my $LOG = shift; 1113 1114 my $def = Fasta::getDef($g_fasta); 1115 my $seq = Fasta::getSeqRef($g_fasta); 1116 1117 my $exe = $exonerate; 1118 1119 my @exonerate_clusters; 1120 my $i = 0; 1121 foreach my $c (@{$phat_hit_clusters}) { 1122 my $n = 0; 1123 my $got_some = 0; 1124 1125 foreach my $hit (@{$c}) { 1126 last if $n == $depth; 1127 1128 next if $hit->pAh < $pcov; 1129 next if $hit->hsp('best')->frac_identical < $pid; 1130 1131 my ($nB, $nE) = PhatHit_utils::get_span_of_hit($hit,'query'); 1132 1133 my @coors = [$nB, $nE]; 1134 my $p = Shadower::getPieces($seq, \@coors, 50); 1135 my $p_def = $def." ".$p->[0]->{b}." ".$p->[0]->{e}; 1136 my $p_fasta = Fasta::toFasta($p_def, \$p->[0]->{piece}); 1137 my $name = Fasta::def2SeqID($p_def); 1138 my $safe_name = Fasta::seqID2SafeID($name); 1139 1140 my $d_file = $the_void."/".$safe_name.'.'.$p->[0]->{b}.'.'.$p->[0]->{e}.".fasta"; 1141 1142 FastaFile::writeFile($p_fasta, $d_file); 1143 1144 my $offset = $p->[0]->{b} - 1; 1145 my $id = $hit->name(); 1146 1147 my $fastaObj = $db_index->get_Seq_for_hit($hit); 1148 1149 #still no sequence? try rebuilding the index and try again 1150 if (not $fastaObj) { 1015 my $g_fasta = shift; 1016 my $phat_hits = shift; 1017 my $db_index = shift; 1018 my $the_void = shift; 1019 my $type = shift; 1020 my $exonerate = shift; 1021 my $pcov = shift; 1022 my $pid = shift; 1023 my $score_limit = shift; 1024 my $matrix = shift; 1025 my $LOG = shift; 1026 1027 my $def = Fasta::getDef($g_fasta); 1028 my $seq = Fasta::getSeqRef($g_fasta); 1029 1030 my $exe = $exonerate; 1031 1032 my @exonerate_data; 1033 1034 foreach my $hit (@{$phat_hits}) { 1035 next if $hit->pAh < $pcov; 1036 next if $hit->hsp('best')->frac_identical < $pid; 1037 1038 my ($nB, $nE) = PhatHit_utils::get_span_of_hit($hit,'query'); 1039 1040 my @coors = [$nB, $nE]; 1041 my $p = Shadower::getPieces($seq, \@coors, 50); 1042 my $p_def = $def." ".$p->[0]->{b}." ".$p->[0]->{e}; 1043 my $p_fasta = Fasta::toFasta($p_def, \$p->[0]->{piece}); 1044 my $name = Fasta::def2SeqID($p_def); 1045 my $safe_name = Fasta::seqID2SafeID($name); 1046 1047 my $d_file = $the_void."/".$safe_name.'.'.$p->[0]->{b}.'.'.$p->[0]->{e}.".fasta"; 1048 1049 FastaFile::writeFile($p_fasta, $d_file); 1050 1051 my $offset = $p->[0]->{b} - 1; 1052 my $id = $hit->name(); 1053 1054 my $fastaObj = $db_index->get_Seq_for_hit($hit); 1055 1056 #still no sequence? try rebuilding the index and try again 1057 if (not $fastaObj) { 1151 1058 print STDERR "WARNING: Cannot find> ".$hit->name.", trying to re-index the fasta.\n"; 1152 1059 $db_index->reindex(); 1153 1060 $fastaObj = $db_index->get_Seq_for_hit($hit); 1154 1061 1155 1062 if (not $fastaObj) { 1156 print STDERR "stop here:".$hit->name."\n";1157 die "ERROR: Fasta index error\n";1063 print STDERR "stop here:".$hit->name."\n"; 1064 die "ERROR: Fasta index error\n"; 1158 1065 } 1159 }1160 1161 my $seq = $fastaObj->seq();1162 my $def = $db_index->header_for_hit($hit);1163 1164 my $fasta = Fasta::toFasta('>'.$def, \$seq);1165 1166 #build a safe name for file names from the sequence identifier1167 my $safe_id = Fasta::seqID2SafeID($id);1168 1169 my $t_file = $the_void."/".$safe_id.'.fasta';1170 FastaFile::writeFile($fasta, $t_file);1171 1172 my $exonerate_hits = to_polisher($d_file,1173 $t_file,1174 $the_void,1175 $offset,1176 $type,1177 $exe,1178 $score_limit,1179 $matrix,1180 $LOG1066 } 1067 1068 my $seq = $fastaObj->seq(); 1069 my $def = $db_index->header_for_hit($hit); 1070 1071 my $fasta = Fasta::toFasta('>'.$def, \$seq); 1072 1073 #build a safe name for file names from the sequence identifier 1074 my $safe_id = Fasta::seqID2SafeID($id); 1075 1076 my $t_file = $the_void."/".$safe_id.'.fasta'; 1077 FastaFile::writeFile($fasta, $t_file); 1078 1079 my $exonerate_hits = to_polisher($d_file, 1080 $t_file, 1081 $the_void, 1082 $offset, 1083 $type, 1084 $exe, 1085 $score_limit, 1086 $matrix, 1087 $LOG 1181 1088 ); 1182 1183 foreach my $exonerate_hit (@{$exonerate_hits}) {1089 1090 foreach my $exonerate_hit (@{$exonerate_hits}) { 1184 1091 if (defined($exonerate_hit) && exonerate_okay($exonerate_hit)) { 1185 $n++; 1186 push(@{$exonerate_clusters[$i]}, $exonerate_hit); 1187 $got_some = 1; 1092 #tag the source blastn hit to let you know the counterpart 1093 #exonerate hit was flipped to the other strand 1094 $hit->{_exonerate_flipped} = 1 if($exonerate_hit->{_was_flipped}); 1095 $hit->type("exonerate:$type"); #set hit type (exonerate only) 1096 1097 push(@exonerate_data, $exonerate_hit); 1188 1098 } 1189 } 1190 } 1191 $i++ if $got_some; 1192 } 1193 return \@exonerate_clusters; 1099 } 1100 } 1101 1102 return \@exonerate_data; 1194 1103 } 1195 1104 #----------------------------------------------------------------------------- 1196 1105 sub exonerate_okay { 1197 my $hit = shift; 1198 1199 my $i = 0; 1200 foreach my $hsp ($hit->hsps()) { 1201 return 0 unless defined($hsp->nB('query')); 1202 return 0 unless defined($hsp->nE('query')); 1203 return 0 unless defined($hsp->nB('hit')); 1204 return 0 unless defined($hsp->nE('hit')); 1205 return 0 unless defined($hsp->strand('query')); 1206 return 0 unless defined($hsp->strand('query')); 1207 return 0 unless defined($hsp->strand('hit')); 1208 return 0 unless defined($hsp->strand('hit')); 1209 1210 my $q_str = $hsp->query_string(); 1211 my $h_str = $hsp->hit_string(); 1212 1213 if ($h_str =~ /Target Intron/) { 1214 print STDERR "BADDD EXONERATE!\n"; 1215 sleep 4; 1216 return 0; 1217 } 1218 elsif ($q_str =~ /Target Intron/) { 1219 print STDERR "BADDD EXONERATE!\n"; 1220 sleep 4; 1221 return 0; 1222 } 1223 $i++; 1224 } 1225 1226 return 1 1227 } 1106 my $hit = shift; 1107 1108 my $i = 0; 1109 foreach my $hsp ($hit->hsps()) { 1110 return 0 unless defined($hsp->nB('query')); 1111 return 0 unless defined($hsp->nE('query')); 1112 return 0 unless defined($hsp->nB('hit')); 1113 return 0 unless defined($hsp->nE('hit')); 1114 return 0 unless defined($hsp->strand('query')); 1115 return 0 unless defined($hsp->strand('query')); 1116 return 0 unless defined($hsp->strand('hit')); 1117 return 0 unless defined($hsp->strand('hit')); 1118 1119 my $q_str = $hsp->query_string(); 1120 my $h_str = $hsp->hit_string(); 1121 1122 if ($h_str =~ /Target Intron/) { 1123 print STDERR "BADDD EXONERATE!\n"; 1124 sleep 4; 1125 return 0; 1126 } 1127 elsif ($q_str =~ /Target Intron/) { 1128 print STDERR "BADDD EXONERATE!\n"; 1129 sleep 4; 1130 return 0; 1131 } 1132 $i++; 1133 } 1134 1135 return 1; 1136 } 1137 1228 1138 #----------------------------------------------------------------------------- 1229 1139 sub to_polisher { … … 1417 1327 } 1418 1328 elsif (-e $blast_finished) { 1419 print STDERR "re reading blast report.\n" unless $main::quiet;1420 print STDERR "$blast_finished\n" unless $main::quiet;1329 print STDERR "re reading blast report.\n" unless ($main::quiet || !$LOG_FLAG); 1330 print STDERR "$blast_finished\n" unless ($main::quiet || !$LOG_FLAG); 1421 1331 return $blast_dir; 1422 1332 } … … 1590 1500 $command .= " lcmask"; 1591 1501 $command .= " gi"; 1502 $command .= " warnings"; #suppress certain warnings 1503 $command .= " novalidctxok"; #fixes failure related to short and masked sequence 1504 $command .= " shortqueryok"; #fixes failure related to very short sequence 1592 1505 $command .= ($org_type eq 'eukaryotic') ? "" : " kap"; 1593 1506 #$command .= " mformat=2"; # remove for full report … … 1685 1598 } 1686 1599 elsif (-e $blast_finished) { 1687 print STDERR "re reading blast report.\n" unless $main::quiet;1688 print STDERR "$blast_finished\n" unless $main::quiet;1600 print STDERR "re reading blast report.\n" unless ($main::quiet || !$LOG_FLAG); 1601 print STDERR "$blast_finished\n" unless ($main::quiet || !$LOG_FLAG); 1689 1602 return $blast_dir; 1690 1603 } … … 1877 1790 $command .= " kap"; 1878 1791 $command .= " gi"; 1792 $command .= " warnings"; #suppress certain warnings 1793 $command .= " novalidctxok"; #fixes failure related to short and masked sequence 1794 $command .= " shortqueryok"; #fixes failure related to very short sequence 1879 1795 #$command .= " mformat=2"; # remove for full report 1880 1796 $command .= " -o $out_file"; … … 1965 1881 } 1966 1882 elsif (-e $blast_finished) { 1967 print STDERR "re reading blast report.\n" unless $main::quiet;1968 print STDERR "$blast_finished\n" unless $main::quiet;1883 print STDERR "re reading blast report.\n" unless ($main::quiet || !$LOG_FLAG); 1884 print STDERR "$blast_finished\n" unless ($main::quiet || !$LOG_FLAG); 1969 1885 return $blast_dir; 1970 1886 } … … 2106 2022 $chunk->erase_fasta_file(); 2107 2023 2108 return $chunk_keepers 2024 return $chunk_keepers; 2109 2025 } 2110 2026 #----------------------------------------------------------------------------- … … 2135 2051 $command .= " lcmask"; 2136 2052 $command .= " gi"; 2053 $command .= " warnings"; #suppress certain warnings 2054 $command .= " novalidctxok"; #fixes failure related to short and masked sequence 2055 $command .= " shortqueryok"; #fixes failure related to very short sequence 2137 2056 $command .= ($org_type eq 'eukaryotic') ? "" : " kap"; 2138 2057 #$command .= " mformat=2"; # remove for full report lib/PhatHit_utils.pm
r260 r277 42 42 my $what = shift; 43 43 my $hits = shift; 44 my $exonerate_flag = shift || 0; 45 46 #The exonerate flag specifies whether to take into account 47 #exonerate realignment alignment flip. In other words should 48 #a blastn hit be considered to belong to the opposite strand 49 #if its exonerate counterpart was flipped 44 50 45 51 my @p; … … 48 54 my @z; 49 55 foreach my $hit (@{$hits}){ 50 51 if ($hit->strand($what) eq '-1/1'){ 52 push(@x, $hit); 53 } 54 elsif ($hit->strand($what) eq '1/-1'){ 55 push(@x, $hit); 56 } 57 elsif ($hit->strand($what) == 1) { 58 push(@p, $hit); 59 } 60 elsif ($hit->strand($what) == -1 ){ 61 push(@m, $hit); 62 } 63 elsif ($hit->strand($what) == 0 ){ 64 push(@z, $hit); 65 } 56 my $strand = $hit->strand($what); 57 58 unless($strand =~ /^\-?\d$/){ 59 die "FATAL: Could not get stand correctly. Perhaps". 60 "your using the wrong version of BioPerl.\n\n"; 61 } 62 63 $strand *= -1 if($exonerate_flag && $hit->{_exonerate_flipped}); 64 65 if ($strand == 1) { 66 push(@p, $hit); 67 } 68 elsif ($strand == -1 ){ 69 push(@m, $hit); 70 } 71 elsif ($strand == 0 ){ 72 push(@z, $hit); 73 } 74 66 75 } 67 76 return (\@p, \@m, \@x, \@z); … … 813 822 if ($what eq 'both' || $what eq 'rev'){ 814 823 @sorted = sort {$b->nB('query') <=> $a->nB('query') } @new_hsps; 824 $new_hit->{_was_flipped} = 1; 815 825 } 816 826 elsif ($what eq 'revh'){ lib/Process/MpiChunk.pm
r271 r277 316 316 $the_void 317 317 ); 318 318 319 $GFF3->set_current_contig($seq_id, $q_seq_ref); 319 320 … … 638 639 my $the_void = $VARS->{the_void}; 639 640 my $safe_seq_id = $VARS->{safe_seq_id}; 640 my $LOG = $VARS->{LOG}; 641 641 my $LOG = $VARS->{LOG}; 642 642 643 643 my $masked_fasta = Fasta::toFasta($q_def.' masked', \$masked_total_seq); … … 698 698 } 699 699 elsif ($level == 6) { #prep new fasta chunks 700 $level_status = 'preparing new fas a chunks';700 $level_status = 'preparing new fasta chunks'; 701 701 if ($flag eq 'load') { 702 702 #-------------------------CHUNKER … … 815 815 @args = (qw{chunk 816 816 res_dir 817 masked_fasta 818 fasta_t_index 819 holdover_blastn 820 the_void 821 q_seq_ref 817 822 LOG 818 823 CTL_OPT} … … 824 829 my %CTL_OPT = %{$VARS->{CTL_OPT}}; 825 830 my $res_dir = $VARS->{res_dir}; 831 my $masked_fasta = $VARS->{masked_fasta}; 832 my $fasta_t_index = $VARS->{fasta_t_index}; 833 my $holdover_blastn = $VARS->{holdover_blastn}; 834 my $the_void = $VARS->{the_void}; 835 my $q_seq_ref = $VARS->{q_seq_ref}; 826 836 my $LOG = $VARS->{LOG}; 827 837 my $chunk = $VARS->{chunk}; … … 837 847 } 838 848 $res_dir = undef; 839 #-------------------------CODE 840 841 #------------------------RESULTS 842 %results = (blastn_keepers => $blastn_keepers, 849 850 #==merge in heldover Phathits from last round 851 if ($chunk->number != 0) { #if not first chunk 852 #reviews heldover blast hits, 853 #then merges and reblasts them if they cross the divide 854 $blastn_keepers = GI::merge_resolve_hits(\$masked_fasta, 855 $fasta_t_index, 856 $blastn_keepers, 857 $holdover_blastn, 858 $the_void, 859 \%CTL_OPT, 860 'blastn', 861 $LOG 862 ); 863 } 864 865 #seperate out hits too close to chunk divide to be run with exonerate 866 my $holdovers = []; 867 if (not $chunk->is_last) { 868 ($blastn_keepers, $holdovers) = GI::process_the_chunk_divide($chunk, 869 $CTL_OPT{'split_hit'}, 870 1, #treat strands independently 871 [$blastn_keepers] 872 ); 873 } 874 875 #-cluster the blastn hits 876 print STDERR "cleaning blastn...\n" unless $main::quiet; 877 my $blastn_clusters = cluster::clean_and_cluster($blastn_keepers, 878 $q_seq_ref, 879 10 880 ); 881 882 #flatten clusters 883 my $blastn_data = GI::flatten($blastn_clusters); 884 885 #new filtered keepers for later chunk divide processing 886 $blastn_keepers = GI::combine($blastn_data, $holdovers); 887 #-------------------------CODE 888 889 #------------------------RESULTS 890 %results = (blastn_data => $blastn_data, 891 blastn_keepers => $blastn_keepers, 843 892 res_dir => $res_dir 844 893 ); … … 921 970 @args = (qw{chunk 922 971 res_dir 972 masked_fasta 973 fasta_p_index 974 holdover_blastx 975 the_void 976 q_seq_ref 923 977 LOG 924 978 CTL_OPT} … … 930 984 my %CTL_OPT = %{$VARS->{CTL_OPT}}; 931 985 my $res_dir = $VARS->{res_dir}; 986 my $masked_fasta = $VARS->{masked_fasta}; 987 my $fasta_p_index = $VARS->{fasta_p_index}; 988 my $holdover_blastx = $VARS->{holdover_blastx}; 989 my $the_void = $VARS->{the_void}; 990 my $q_seq_ref = $VARS->{q_seq_ref}; 932 991 my $LOG = $VARS->{LOG}; 933 992 my $chunk = $VARS->{chunk}; 934 935 993 936 994 my $blastx_keepers = []; … … 943 1001 } 944 1002 $res_dir = undef; 945 #-------------------------CODE 946 947 #------------------------RESULTS 948 %results = (blastx_keepers => $blastx_keepers, 1003 1004 #==merge in heldover Phathits from last round 1005 if ($chunk->number != 0) { #if not first chunk 1006 #reviews heldover blast hits, 1007 #then merges and reblasts them if they cross the divide 1008 $blastx_keepers = GI::merge_resolve_hits(\$masked_fasta, 1009 $fasta_p_index, 1010 $blastx_keepers, 1011 $holdover_blastx, 1012 $the_void, 1013 \%CTL_OPT, 1014 'blastx', 1015 $LOG 1016 ); 1017 } 1018 1019 #seperate out hits too close to chunk divide to be run with exonerate 1020 my $holdovers = []; 1021 if (not $chunk->is_last) { 1022 ($blastx_keepers, $holdovers) = GI::process_the_chunk_divide($chunk, 1023 $CTL_OPT{'split_hit'}, 1024 1, #treat strands independently 1025 [$blastx_keepers] 1026 ); 1027 } 1028 1029 #-cluster the blastx hits 1030 print STDERR "cleaning blastx...\n" unless $main::quiet; 1031 my $blastx_clusters = cluster::clean_and_cluster($blastx_keepers, 1032 $q_seq_ref, 1033 10 1034 ); 1035 1036 #flatten clusters 1037 my $blastx_data = GI::flatten($blastx_clusters); 1038 1039 #new filtered keepers for later chunk divide processing 1040 $blastx_keepers = GI::combine($blastx_data, $holdovers); 1041 #-------------------------CODE 1042 1043 #------------------------RESULTS 1044 %results = (blastx_data => $blastx_data, 1045 blastx_keepers => $blastx_keepers, 949 1046 res_dir => $res_dir 950 1047 ); … … 1026 1123 @args = (qw{chunk 1027 1124 res_dir 1125 masked_fasta 1126 fasta_a_index 1127 holdover_tblastx 1128 the_void 1129 q_seq_ref 1028 1130 LOG 1029 1131 CTL_OPT} … … 1035 1137 my %CTL_OPT = %{$VARS->{CTL_OPT}}; 1036 1138 my $res_dir = $VARS->{res_dir}; 1139 my $masked_fasta = $VARS->{masked_fasta}; 1140 my $fasta_a_index = $VARS->{fasta_a_index}; 1141 my $holdover_tblastx = $VARS->{holdover_tblastx}; 1142 my $the_void = $VARS->{the_void}; 1143 my $q_seq_ref = $VARS->{q_seq_ref}; 1037 1144 my $LOG = $VARS->{LOG}; 1038 1145 my $chunk = $VARS->{chunk}; 1039 1040 1146 1041 1147 my $tblastx_keepers = []; … … 1048 1154 } 1049 1155 $res_dir = undef; 1050 #-------------------------CODE 1051 1052 #------------------------RESULTS 1053 %results = (tblastx_keepers => $tblastx_keepers, 1156 1157 #==merge in heldover Phathits from last round 1158 if ($chunk->number != 0) { #if not first chunk 1159 #reviews heldover blast hits, 1160 #then merges and reblasts them if they cross the divide 1161 $tblastx_keepers = GI::merge_resolve_hits(\$masked_fasta, 1162 $fasta_a_index, 1163 $tblastx_keepers, 1164 $holdover_tblastx, 1165 $the_void, 1166 \%CTL_OPT, 1167 'tblastx', 1168 $LOG 1169 ); 1170 } 1171 1172 #seperate out hits too close to chunk divide to be run with exonerate 1173 my $holdovers = []; 1174 if (not $chunk->is_last) { 1175 ($tblastx_keepers, $holdovers) = GI::process_the_chunk_divide($chunk, 1176 $CTL_OPT{'split_hit'}, 1177 1, #treat strands independently 1178 [$tblastx_keepers] 1179 ); 1180 } 1181 1182 #-cluster the tblastx hits 1183 print STDERR "cleaning tblastx...\n" unless $main::quiet; 1184 my $tblastx_clusters = cluster::clean_and_cluster($tblastx_keepers, 1185 $q_seq_ref, 1186 10 1187 ); 1188 1189 #flatten clusters 1190 my $tblastx_data = GI::flatten($tblastx_clusters); 1191 1192 #new filtered keepers for later chunk divide processing 1193 $tblastx_keepers = GI::combine($tblastx_data, $holdovers); 1194 1195 #temp destroy this for now because we don't use tblastx data with exonerate 1196 $tblastx_data = []; 1197 #-------------------------CODE 1198 1199 #------------------------RESULTS 1200 %results = (tblastx_data => $tblastx_data, 1201 tblastx_keepers => $tblastx_keepers, 1054 1202 res_dir => $res_dir 1055 1203 ); … … 1061 1209 } 1062 1210 } 1063 elsif ($level == 13) { #process chunk divide 1211 elsif ($level == 13) { #exonerate proteins 1212 $level_status = 'doing exonerate of proteins'; 1213 if ($flag eq 'load') { 1214 #-------------------------CHUNKER 1215 my $chunk = new Process::MpiChunk($level, $VARS); 1216 push(@chunks, $chunk); 1217 #-------------------------CHUNKER 1218 } 1219 elsif ($flag eq 'init') { 1220 #------------------------ARGS_IN 1221 @args = (qw{blastx_data 1222 the_void 1223 q_seq_ref 1224 fasta 1225 fasta_p_index 1226 LOG 1227 CTL_OPT} 1228 ); 1229 #------------------------ARGS_IN 1230 } 1231 elsif ($flag eq 'run') { 1232 #-------------------------CODE 1233 my %CTL_OPT = %{$VARS->{CTL_OPT}};; 1234 my $blastx_data = $VARS->{blastx_data}; 1235 my $the_void = $VARS->{the_void}; 1236 my $q_seq_ref = $VARS->{q_seq_ref}; 1237 my $fasta = $VARS->{fasta}; 1238 my $fasta_p_index = $VARS->{fasta_p_index}; 1239 my $LOG = $VARS->{LOG}; 1240 1241 #-make a multi-fasta of the seqs in the blastx_clusters 1242 #-polish the blastx hits with exonerate 1243 my $exonerate_p_data =[]; 1244 if($CTL_OPT{organism_type} eq 'eukaryotic'){ 1245 $exonerate_p_data = GI::polish_exonerate($fasta, 1246 $blastx_data, 1247 $fasta_p_index, 1248 $the_void, 1249 'p', 1250 $CTL_OPT{exonerate}, 1251 $CTL_OPT{pcov_blastx}, 1252 $CTL_OPT{pid_blastx}, 1253 $CTL_OPT{ep_score_limit}, 1254 $CTL_OPT{ep_matrix}, 1255 $LOG 1256 ); 1257 } 1258 1259 #free memmory 1260 @{$blastx_data} = (); 1261 #-------------------------CODE 1262 1263 #------------------------RESULTS 1264 %results = (exonerate_p_data => $exonerate_p_data); 1265 #------------------------RESULTS 1266 } 1267 elsif ($flag eq 'flow') { 1268 #-------------------------NEXT_LEVEL 1269 #-------------------------NEXT_LEVEL 1270 } 1271 } 1272 elsif ($level == 14) { #exonerate ESTs 1273 $level_status = 'doing exonerate of ESTs'; 1274 if ($flag eq 'load') { 1275 #-------------------------CHUNKER 1276 my $chunk = new Process::MpiChunk($level, $VARS); 1277 push(@chunks, $chunk); 1278 #-------------------------CHUNKER 1279 } 1280 elsif ($flag eq 'init') { 1281 #------------------------ARGS_IN 1282 @args = (qw{blastn_data 1283 the_void 1284 q_seq_ref 1285 fasta 1286 fasta_t_index 1287 LOG 1288 CTL_OPT} 1289 ); 1290 #------------------------ARGS_IN 1291 } 1292 elsif ($flag eq 'run') { 1293 #-------------------------CODE 1294 my %CTL_OPT = %{$VARS->{CTL_OPT}}; 1295 my $blastn_data = $VARS->{blastn_data}; 1296 my $the_void = $VARS->{the_void}; 1297 my $q_seq_ref = $VARS->{q_seq_ref}; 1298 my $fasta = $VARS->{fasta}; 1299 my $fasta_t_index = $VARS->{fasta_t_index}; 1300 my $LOG = $VARS->{LOG}; 1301 1302 #-polish blastn hits with exonerate 1303 my $exonerate_e_data = []; 1304 if($CTL_OPT{organism_type} eq 'eukaryotic'){ 1305 $exonerate_e_data = GI::polish_exonerate($fasta, 1306 $blastn_data, 1307 $fasta_t_index, 1308 $the_void, 1309 'e', 1310 $CTL_OPT{exonerate}, 1311 $CTL_OPT{pcov_blastn}, 1312 $CTL_OPT{pid_blastn}, 1313 $CTL_OPT{en_score_limit}, 1314 $CTL_OPT{en_matrix}, 1315 $LOG 1316 ); 1317 } 1318 1319 #free memmory 1320 @{$blastn_data} = (); 1321 #-------------------------CODE 1322 1323 #------------------------RESULTS 1324 %results = (exonerate_e_data => $exonerate_e_data); 1325 #------------------------RESULTS 1326 } 1327 elsif ($flag eq 'flow') { 1328 #-------------------------NEXT_LEVEL 1329 #-------------------------NEXT_LEVEL 1330 } 1331 } 1332 elsif ($level == 15) { #process chunk divide 1064 1333 $level_status = 'processing the chunk divide'; 1065 1334 if ($flag eq 'load') { … … 1079 1348 blastx_keepers 1080 1349 tblastx_keepers 1350 exonerate_e_data 1351 exonerate_p_data 1081 1352 holdover_blastn 1082 1353 holdover_blastx … … 1108 1379 my $blastx_keepers = $VARS->{blastx_keepers}; 1109 1380 my $tblastx_keepers = $VARS->{tblastx_keepers}; 1381 my $exonerate_e_data = $VARS->{exonerate_e_data}; 1382 my $exonerate_p_data = $VARS->{exonerate_p_data}; 1110 1383 my $holdover_blastn = $VARS->{holdover_blastn}; 1111 1384 my $holdover_blastx = $VARS->{holdover_blastx}; … … 1123 1396 my $LOG = $VARS->{LOG}; 1124 1397 1125 1126 1398 #-get only those predictions on the chunk 1127 1399 my $preds_on_chunk = GI::get_preds_on_chunk($preds, … … 1140 1412 $q_seq_ref, 1141 1413 'protein' 1142 );1414 ); 1143 1415 #-est evidence passthrough 1144 1416 $est_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, 1145 1417 $q_seq_ref, 1146 1418 'est' 1147 );1419 ); 1148 1420 #-altest evidence passthrough 1149 1421 $altest_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, 1150 1422 $q_seq_ref, 1151 1423 'altest' 1152 );1424 ); 1153 1425 #-gff gene annotation passthrough here 1154 1426 $model_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, 1155 1427 $q_seq_ref, 1156 1428 'model' 1157 );1429 ); 1158 1430 #-pred passthrough 1159 1431 $pred_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, 1160 1432 $q_seq_ref, 1161 1433 'pred' 1162 ); 1163 } 1164 1165 #==merge heldover Phathits from last round 1166 if ($chunk->number != 0) { #if not first chunk 1167 #reviews heldover blast hits, 1168 #then merges and reblasts them if they cross the divide 1169 ($blastn_keepers, 1170 $blastx_keepers, 1171 $tblastx_keepers) = GI::merge_resolve_hits(\$masked_fasta, 1172 $fasta_t_index, 1173 $fasta_p_index, 1174 $fasta_a_index, 1175 $blastn_keepers, 1176 $blastx_keepers, 1177 $tblastx_keepers, 1178 $holdover_blastn, 1179 $holdover_blastx, 1180 $holdover_tblastx, 1181 $the_void, 1182 \%CTL_OPT, 1183 $LOG 1184 ); 1185 #combine remaining holdover types 1186 push(@{$preds_on_chunk}, @{$holdover_pred}); 1187 push(@{$pred_gff_keepers}, @{$holdover_pred_gff}); 1188 push(@{$est_gff_keepers}, @{$holdover_est_gff}); 1189 push(@{$altest_gff_keepers}, @{$holdover_altest_gff}); 1190 push(@{$prot_gff_keepers}, @{$holdover_prot_gff}); 1191 push(@{$model_gff_keepers}, @{$holdover_model_gff}); 1192 1193 #clear holdovers 1194 @{$holdover_pred} = (); 1195 @{$holdover_est_gff} = (); 1196 @{$holdover_altest_gff} = (); 1197 @{$holdover_prot_gff} = (); 1198 @{$holdover_pred_gff} = (); 1199 @{$holdover_model_gff} = (); 1200 } 1434 ); 1435 } 1436 1437 #combine remaining holdover types 1438 push(@{$preds_on_chunk}, @{$holdover_pred}); 1439 push(@{$pred_gff_keepers}, @{$holdover_pred_gff}); 1440 push(@{$est_gff_keepers}, @{$holdover_est_gff}); 1441 push(@{$altest_gff_keepers}, @{$holdover_altest_gff}); 1442 push(@{$prot_gff_keepers}, @{$holdover_prot_gff}); 1443 push(@{$model_gff_keepers}, @{$holdover_model_gff}); 1444 1445 #clear holdovers 1446 @{$holdover_pred} = (); 1447 @{$holdover_est_gff} = (); 1448 @{$holdover_altest_gff} = (); 1449 @{$holdover_prot_gff} = (); 1450 @{$holdover_pred_gff} = (); 1451 @{$holdover_model_gff} = (); 1201 1452 1202 1453 #==PROCESS HITS CLOSE TO CODE DIVISIONS 1203 1454 #holdover hits that are too close to the divide for review with next chunk 1204 1455 if (not $chunk->is_last) { #if not last chunk 1205 ($holdover_blastn, 1206 $holdover_blastx, 1207 $holdover_tblastx, 1208 $holdover_pred, 1209 $holdover_est_gff, 1210 $holdover_altest_gff, 1211 $holdover_prot_gff, 1212 $holdover_pred_gff, 1213 $holdover_model_gff, 1214 $blastn_keepers, 1215 $blastx_keepers, 1216 $tblastx_keepers, 1217 $preds_on_chunk, 1218 $est_gff_keepers, 1219 $altest_gff_keepers, 1220 $prot_gff_keepers, 1221 $pred_gff_keepers, 1222 $model_gff_keepers 1223 ) = GI::process_the_chunk_divide_temp($chunk, 1224 $CTL_OPT{'split_hit'}, 1225 $blastn_keepers, 1226 $blastx_keepers, 1227 $tblastx_keepers, 1228 $preds_on_chunk, 1229 $est_gff_keepers, 1230 $altest_gff_keepers, 1231 $prot_gff_keepers, 1232 $pred_gff_keepers, 1233 $model_gff_keepers 1234 ); 1235 } 1236 1237 #shatter hits and flip strand of blastn where appropriate. 1456 ($blastn_keepers, 1457 $blastx_keepers, 1458 $tblastx_keepers, 1459 $preds_on_chunk, 1460 $est_gff_keepers, 1461 $altest_gff_keepers, 1462 $prot_gff_keepers, 1463 $pred_gff_keepers, 1464 $model_gff_keepers, 1465 $exonerate_e_data, 1466 $exonerate_p_data, 1467 $holdover_blastn, 1468 $holdover_blastx, 1469 $holdover_tblastx, 1470 $holdover_pred, 1471 $holdover_est_gff, 1472 $holdover_altest_gff, 1473 $holdover_prot_gff, 1474 $holdover_pred_gff, 1475 $holdover_model_gff 1476 ) = GI::process_the_chunk_divide($chunk, 1477 $CTL_OPT{'split_hit'}, 1478 1, 1479 [$blastn_keepers, 1480 $blastx_keepers, 1481 $tblastx_keepers, 1482 $preds_on_chunk, 1483 $est_gff_keepers, 1484 $altest_gff_keepers, 1485 $prot_gff_keepers, 1486 $pred_gff_keepers, 1487 $model_gff_keepers, 1488 $exonerate_e_data, 1489 $exonerate_p_data] 1490 ); 1491 } 1492 1493 #Shatter hits. This is only for prokaryotic organisms. 1494 #Flip strand of blastn where appropriate. 1495 #This is done on blastn hits because exonerate is skipped. 1238 1496 #I shatter after processing the chunk divide to avoid weird 1239 1497 #complications from flipping on only one side of a split HSP … … 1248 1506 } 1249 1507 } 1250 1251 1508 #-------------------------CODE 1252 1509 … … 1261 1518 blastx_keepers => $blastx_keepers, 1262 1519 tblastx_keepers => $tblastx_keepers, 1520 exonerate_e_data => $exonerate_e_data, 1521 exonerate_p_data => $exonerate_p_data, 1263 1522 holdover_est_gff => $holdover_est_gff, 1264 1523 holdover_altest_gff => $holdover_altest_gff, … … 1278 1537 } 1279 1538 } 1280 elsif ($level == 14) { #exonerate proteins1281 $level_status = 'doing exonerate of proteins';1282 if ($flag eq 'load') {1283 #-------------------------CHUNKER1284 my $chunk = new Process::MpiChunk($level, $VARS);1285 push(@chunks, $chunk);1286 #-------------------------CHUNKER1287 }1288 elsif ($flag eq 'init') {1289 #------------------------ARGS_IN1290 @args = (qw{blastx_keepers1291 the_void1292 q_seq_ref1293 fasta1294 fasta_p_index1295 LOG1296 CTL_OPT}1297 );1298 #------------------------ARGS_IN1299 }1300 elsif ($flag eq 'run') {1301 #-------------------------CODE1302 my %CTL_OPT = %{$VARS->{CTL_OPT}};;1303 my $blastx_keepers = $VARS->{blastx_keepers};1304 my $the_void = $VARS->{the_void};1305 my $q_seq_ref = $VARS->{q_seq_ref};1306 my $fasta = $VARS->{fasta};1307 my $fasta_p_index = $VARS->{fasta_p_index};1308 my $LOG = $VARS->{LOG};1309 1310 #variables that are persistent outside of try block1311 my $blastx_data;1312 my $exonerate_p_data;1313 1314 #-cluster the blastx hits1315 print STDERR "cleaning blastx...\n" unless $main::quiet;1316 1317 my $blastx_clusters = cluster::clean_and_cluster($blastx_keepers,1318 $q_seq_ref,1319 101320 );1321 undef $blastx_keepers; #free up memory1322 1323 #-make a multi-fasta of the seqs in the blastx_clusters1324 #-polish the blastx hits with exonerate1325 my $exoner_p_clust =[];1326 if($CTL_OPT{organism_type} eq 'eukaryotic'){1327 $exoner_p_clust = GI::polish_exonerate($fasta,1328 $blastx_clusters,1329 $fasta_p_index,1330 $the_void,1331 5,1332 'p',1333 $CTL_OPT{exonerate},1334 $CTL_OPT{pcov_blastx},1335 $CTL_OPT{pid_blastx},1336 $CTL_OPT{ep_score_limit},1337 $CTL_OPT{ep_matrix},1338 $LOG1339 );1340 }1341 1342 #flatten clusters1343 $blastx_data = GI::flatten($blastx_clusters);1344 $exonerate_p_data = GI::flatten($exoner_p_clust, 'exonerate:p');1345 #-------------------------CODE1346 1347 #------------------------RESULTS1348 %results = (blastx_data => $blastx_data,1349 exonerate_p_data => $exonerate_p_data1350 );1351 #------------------------RESULTS1352 }1353 elsif ($flag eq 'flow') {1354 #-------------------------NEXT_LEVEL1355 #-------------------------NEXT_LEVEL1356 }1357 }1358 elsif ($level == 15) { #exonerate ESTs1359 $level_status = 'doing exonerate of ESTs';1360 if ($flag eq 'load') {1361 #-------------------------CHUNKER1362 my $chunk = new Process::MpiChunk($level, $VARS);1363 push(@chunks, $chunk);1364 #-------------------------CHUNKER1365 }1366 elsif ($flag eq 'init') {1367 #------------------------ARGS_IN1368 @args = (qw{tblastx_keepers1369 blastn_keepers1370 the_void1371 q_seq_ref1372 fasta1373 fasta_t_index1374 LOG1375 CTL_OPT}1376 );1377 #------------------------ARGS_IN1378 }1379 elsif ($flag eq 'run') {1380 #-------------------------CODE1381 my %CTL_OPT = %{$VARS->{CTL_OPT}};1382 my $tblastx_keepers = $VARS->{tblastx_keepers};1383 my $blastn_keepers = $VARS->{blastn_keepers};1384 my $the_void = $VARS->{the_void};1385 my $q_seq_ref = $VARS->{q_seq_ref};1386 my $fasta = $VARS->{fasta};1387 my $fasta_t_index = $VARS->{fasta_t_index};1388 my $LOG = $VARS->{LOG};1389 1390 1391 #-cluster the tblastx hits1392 print STDERR "cleaning tblastx...\n" unless $main::quiet;1393 my $tblastx_clusters = cluster::clean_and_cluster($tblastx_keepers,1394 $q_seq_ref,1395 101396 );1397 undef $tblastx_keepers; #free up memory1398 1399 #flatten the clusters1400 my $tblastx_data = GI::flatten($tblastx_clusters);1401 1402 #-cluster the blastn hits1403 print STDERR "cleaning blastn...\n" unless $main::quiet;1404 my $blastn_clusters = cluster::clean_and_cluster($blastn_keepers,1405 $q_seq_ref,1406 101407 );1408 undef $blastn_keepers; #free up memory1409 1410 #-polish blastn hits with exonerate1411 my $exoner_e_clust = [];1412 if($CTL_OPT{organism_type} eq 'eukaryotic'){1413 $exoner_e_clust = GI::polish_exonerate($fasta,1414 $blastn_clusters,1415 $fasta_t_index,1416 $the_void,1417 5,1418 'e',1419 $CTL_OPT{exonerate},1420 $CTL_OPT{pcov_blastn},1421 $CTL_OPT{pid_blastn},1422 $CTL_OPT{en_score_limit},1423 $CTL_OPT{en_matrix},1424 $LOG1425 );1426 }1427 1428 #flatten clusters1429 my $blastn_data = GI::flatten($blastn_clusters);1430 my $exonerate_e_data = GI::flatten($exoner_e_clust,1431 'exonerate:e'1432 );1433 #-------------------------CODE1434 1435 #------------------------RESULTS1436 %results = (blastn_data => $blastn_data,1437 exonerate_e_data => $exonerate_e_data,1438 tblastx_data => $tblastx_data1439 );1440 #------------------------RESULTS1441 }1442 elsif ($flag eq 'flow') {1443 #-------------------------NEXT_LEVEL1444 #-------------------------NEXT_LEVEL1445 }1446 }1447 1539 elsif ($level == 16) { #annotations 1448 1540 $level_status = 'calculating annotations'; … … 1462 1554 fasta 1463 1555 masked_fasta 1464 tblastx_ data1465 blastx_ data1466 blastn_ data1556 tblastx_keepers 1557 blastx_keepers 1558 blastn_keepers 1467 1559 exonerate_e_data 1468 1560 exonerate_p_data … … 1488 1580 my $fasta = $VARS->{fasta}; 1489 1581 my $masked_fasta = $VARS->{masked_fasta}; 1490 my $tblastx_ data = $VARS->{tblastx_data};1491 my $blastx_ data = $VARS->{blastx_data};1492 my $blastn_ data = $VARS->{blastn_data};1582 my $tblastx_keepers = $VARS->{tblastx_keepers}; 1583 my $blastx_keepers = $VARS->{blastx_keepers}; 1584 my $blastn_keepers = $VARS->{blastn_keepers}; 1493 1585 my $exonerate_e_data = $VARS->{exonerate_e_data}; 1494 1586 my $exonerate_p_data = $VARS->{exonerate_p_data}; … … 1507 1599 1508 1600 if($CTL_OPT{organism_type} eq 'prokaryotic'){ 1509 $final_est = GI::combine($blastn_ data,1601 $final_est = GI::combine($blastn_keepers, 1510 1602 $final_est 1511 1603 ); 1512 1604 } 1513 1605 1514 my $final_altest = GI::combine($tblastx_ data,1606 my $final_altest = GI::combine($tblastx_keepers, 1515 1607 $altest_gff_keepers 1516 1608 ); 1517 my $final_prot = GI::combine($blastx_ data,1609 my $final_prot = GI::combine($blastx_keepers, 1518 1610 $exonerate_p_data, 1519 1611 $prot_gff_keepers … … 1601 1693 non_over 1602 1694 annotations 1603 blastx_ data1604 blastn_ data1605 tblastx_ data1695 blastx_keepers 1696 blastn_keepers 1697 tblastx_keepers 1606 1698 exonerate_p_data 1607 1699 exonerate_e_data … … 1623 1715 my $non_over = $VARS->{non_over}; 1624 1716 my $annotations = $VARS->{annotations}; 1625 my $blastx_ data = $VARS->{blastx_data};1626 my $blastn_ data = $VARS->{blastn_data};1627 my $tblastx_ data = $VARS->{tblastx_data};1717 my $blastx_keepers = $VARS->{blastx_keepers}; 1718 my $blastn_keepers = $VARS->{blastn_keepers}; 1719 my $tblastx_keepers = $VARS->{tblastx_keepers}; 1628 1720 my $exonerate_p_data = $VARS->{exonerate_p_data}; 1629 1721 my $exonerate_e_data = $VARS->{exonerate_e_data}; … … 1641 1733 #--- GFF3 1642 1734 $GFF3->add_genes($maker_anno); 1643 $GFF3->add_phathits($blastx_ data);1644 $GFF3->add_phathits($blastn_ data);1645 $GFF3->add_phathits($tblastx_ data);1735 $GFF3->add_phathits($blastx_keepers); 1736 $GFF3->add_phathits($blastn_keepers); 1737 $GFF3->add_phathits($tblastx_keepers); 1646 1738 $GFF3->add_phathits($exonerate_p_data); 1647 1739 $GFF3->add_phathits($exonerate_e_data); lib/Widget/exonerate/est2genome.pm
r273 r277 646 646 } 647 647 648 $phat_hit = PhatHit_utils::copy($phat_hit, 'both') 649 if exonerate::splice_info::needs_to_be_revcomped($phat_hit); 648 if (exonerate::splice_info::needs_to_be_revcomped($phat_hit)){ 649 $phat_hit = PhatHit_utils::copy($phat_hit, 'both'); 650 } 651 650 652 651 653 return $phat_hit; lib/runlog.pm
r275 r277 263 263 } 264 264 265 #temp lamprey266 $log_val =~ s/^\/*scratch\/serial[^\/]*\/u0045039\///;267 $ctl_val =~ s/^\/*scratch\/serial[^\/]*\/u0045039\///;268 269 265 #if previous log options are not the same as current control file options 270 266 if ($log_val ne $ctl_val) { … … 377 373 print STDERR "MAKER WARNING: The file $key\n". 378 374 "did not finish on the last run and must be erased\n"; 379 #temp lamprey380 $key =~ s/^\/*scratch\/serial[^\/]*\/.*\/([^\/]*.maker.output)/$1/;381 375 382 376 push(@files, $key);
