| 111 | | my $vars = $self->{VARS}; |
|---|
| 112 | | my @results; |
|---|
| 113 | | |
|---|
| 114 | | if ($level == 0) { |
|---|
| 115 | | #------------------------ARGS_IN |
|---|
| 116 | | my $chunk = shift @{$vars}; |
|---|
| 117 | | my $the_void = shift @{$vars}; |
|---|
| 118 | | my $seq_out_name = shift @{$vars}; |
|---|
| 119 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 120 | | my $opt_f = shift @{$vars}; |
|---|
| 121 | | #------------------------ARGS_IN |
|---|
| 122 | | |
|---|
| 123 | | #-------------------------CHUNK |
|---|
| 124 | | my $rma_keepers = []; |
|---|
| 125 | | |
|---|
| 126 | | #-- repeatmask the input file |
|---|
| 127 | | if(! $CTL_OPTIONS{rmlib_only}){ |
|---|
| 128 | | $chunk->seq(uc($chunk->seq())); #must be upper case before soft masking |
|---|
| 129 | | |
|---|
| 130 | | my $rma_keepers1 = Shared_Functions::repeatmask($chunk, |
|---|
| 131 | | $the_void, |
|---|
| 132 | | $seq_out_name, |
|---|
| 133 | | $CTL_OPTIONS{'model_org'}, |
|---|
| 134 | | $CTL_OPTIONS{'RepeatMasker'}, |
|---|
| 135 | | '', |
|---|
| 136 | | $CTL_OPTIONS{'cpus'}, |
|---|
| 137 | | $opt_f, |
|---|
| 138 | | $self->{LOG} |
|---|
| 139 | | ); |
|---|
| 140 | | push(@{$rma_keepers}, @{$rma_keepers1}); |
|---|
| 141 | | |
|---|
| 142 | | #-mask the chunk using repeatmasker hits |
|---|
| 143 | | $chunk = repeat_mask_seq::mask_chunk($chunk, $rma_keepers1); |
|---|
| 144 | | } |
|---|
| 145 | | |
|---|
| 146 | | #-mask species specific repeats; |
|---|
| 147 | | if($CTL_OPTIONS{rmlib}){ |
|---|
| 148 | | my $rma_keepers2 = Shared_Functions::repeatmask($chunk, |
|---|
| 149 | | $the_void, |
|---|
| 150 | | $seq_out_name, |
|---|
| 151 | | $CTL_OPTIONS{'model_org'}, |
|---|
| 152 | | $CTL_OPTIONS{'RepeatMasker'}, |
|---|
| 153 | | $CTL_OPTIONS{'rmlib'}, |
|---|
| 154 | | $CTL_OPTIONS{'cpus'}, |
|---|
| 155 | | $opt_f, |
|---|
| 156 | | $self->{LOG} |
|---|
| 157 | | ); |
|---|
| 158 | | push(@{$rma_keepers}, @{$rma_keepers2}); |
|---|
| 159 | | |
|---|
| 160 | | #-mask the chunk using repeatmasker hits |
|---|
| 161 | | $chunk = repeat_mask_seq::mask_chunk($chunk, $rma_keepers2); |
|---|
| 162 | | } |
|---|
| 163 | | #-------------------------CHUNK |
|---|
| 164 | | |
|---|
| 165 | | #------------------------RESULTS |
|---|
| 166 | | @results = ($chunk, $rma_keepers); |
|---|
| 167 | | #------------------------RESULTS |
|---|
| 168 | | } |
|---|
| 169 | | elsif ($level == 1) { |
|---|
| 170 | | #------------------------ARGS_IN |
|---|
| 171 | | my $chunk = shift @{$vars}; |
|---|
| 172 | | my $repeat_protein = shift @{$vars}; |
|---|
| 173 | | my $the_void = shift @{$vars}; |
|---|
| 174 | | my $seq_out_name = shift @{$vars}; |
|---|
| 175 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 176 | | my $opt_f = shift @{$vars}; |
|---|
| 177 | | #------------------------ARGS_IN |
|---|
| 178 | | |
|---|
| 179 | | #-------------------------CHUNK |
|---|
| 180 | | #-- blastx against a repeat library (for better masking) |
|---|
| 181 | | my $rep_blastx_res_dir = ''; |
|---|
| 182 | | $rep_blastx_res_dir = Shared_Functions::blastx_as_chunks($chunk, |
|---|
| 183 | | $repeat_protein, |
|---|
| 184 | | $the_void, |
|---|
| 185 | | $seq_out_name, |
|---|
| 186 | | $CTL_OPTIONS{blastx}, |
|---|
| 187 | | $CTL_OPTIONS{eval_blastx}, |
|---|
| 188 | | $CTL_OPTIONS{split_hit}, |
|---|
| 189 | | $CTL_OPTIONS{cpus}, |
|---|
| 190 | | $CTL_OPTIONS{old_repeat_protein}, |
|---|
| 191 | | $CTL_OPTIONS{xdformat}, |
|---|
| 192 | | $CTL_OPTIONS{alt_peptide}, |
|---|
| 193 | | $self->{RANK}, |
|---|
| 194 | | $opt_f, |
|---|
| 195 | | $self->{LOG}, |
|---|
| 196 | | $self->{LOG_FLAG} |
|---|
| 197 | | ) if ($CTL_OPTIONS{old_repeat_protein}); |
|---|
| 198 | | #-------------------------CHUNK |
|---|
| 199 | | |
|---|
| 200 | | #------------------------RESULTS |
|---|
| 201 | | @results = ($rep_blastx_res_dir); |
|---|
| 202 | | #------------------------RESULTS |
|---|
| 203 | | } |
|---|
| 204 | | elsif ($level == 2) { |
|---|
| 205 | | #------------------------ARGS_IN |
|---|
| 206 | | my $chunk = shift @{$vars}; |
|---|
| 207 | | my $rep_blastx_res_dir = shift @{$vars}; |
|---|
| 208 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 209 | | my $opt_f = shift @{$vars}; |
|---|
| 210 | | #------------------------ARGS_IN |
|---|
| 211 | | |
|---|
| 212 | | #-------------------------CHUNK |
|---|
| 213 | | #-- merge and collect blastx results |
|---|
| 214 | | my $repeat_blastx_keepers = []; |
|---|
| 215 | | $repeat_blastx_keepers = Shared_Functions::collect_blastx($chunk, |
|---|
| 216 | | $rep_blastx_res_dir, |
|---|
| 217 | | $CTL_OPTIONS{eval_blastx}, |
|---|
| 218 | | $CTL_OPTIONS{bit_blastx}, |
|---|
| 219 | | $CTL_OPTIONS{percov_blastx}, |
|---|
| 220 | | $CTL_OPTIONS{percid_blastx}, |
|---|
| 221 | | $CTL_OPTIONS{split_hit}, |
|---|
| 222 | | $opt_f, |
|---|
| 223 | | $self->{LOG} |
|---|
| 224 | | ) if($CTL_OPTIONS{old_repeat_protein}); |
|---|
| 225 | | #-------------------------CHUNK |
|---|
| 226 | | |
|---|
| 227 | | #------------------------RESULTS |
|---|
| 228 | | @results = ($repeat_blastx_keepers); |
|---|
| 229 | | #------------------------RESULTS |
|---|
| 230 | | } |
|---|
| 231 | | elsif ($level == 3) { |
|---|
| 232 | | #------------------------ARGS_IN |
|---|
| 233 | | my $chunk = shift @{$vars}; |
|---|
| 234 | | my $rma_keepers = shift @{$vars}; |
|---|
| 235 | | my $repeat_blastx_keepers = shift @{$vars}; |
|---|
| 236 | | my $GFF3 = shift @{$vars}; |
|---|
| 237 | | my $query_def = shift @{$vars}; |
|---|
| 238 | | my $query_seq = shift @{$vars}; |
|---|
| 239 | | my $masked_total_seq = shift @{$vars}; |
|---|
| 240 | | my $the_void = shift @{$vars}; |
|---|
| 241 | | #------------------------ARGS_IN |
|---|
| 242 | | |
|---|
| 243 | | #-------------------------CHUNK |
|---|
| 244 | | #-mask the chunk using blastx hits |
|---|
| 245 | | $chunk = repeat_mask_seq::mask_chunk($chunk, $repeat_blastx_keepers); |
|---|
| 246 | | |
|---|
| 247 | | #-combine and cluster blastx and repeatmasker hits |
|---|
| 248 | | #-to get consensus repeat hits for gff3 and XML annotations |
|---|
| 249 | | my $rm_keepers = repeat_mask_seq::process($rma_keepers, |
|---|
| 250 | | $repeat_blastx_keepers, |
|---|
| 251 | | $query_seq |
|---|
| 252 | | ); |
|---|
| 253 | | |
|---|
| 254 | | #-add repeats to GFF3 |
|---|
| 255 | | $GFF3->repeat_hits($rm_keepers); |
|---|
| 256 | | |
|---|
| 257 | | #-build/fill big masked sequence |
|---|
| 258 | | $masked_total_seq .= $chunk->seq(); |
|---|
| 259 | | #-------------------------CHUNK |
|---|
| 260 | | |
|---|
| 261 | | #------------------------RESULTS |
|---|
| 262 | | @results = ($chunk, $masked_total_seq, $GFF3); |
|---|
| 263 | | #------------------------RESULTS |
|---|
| 264 | | } |
|---|
| 265 | | elsif ($level == 4) { |
|---|
| 266 | | #------------------------ARGS_IN |
|---|
| 267 | | my $masked_total_seq = shift @{$vars}; |
|---|
| 268 | | my $the_void = shift @{$vars}; |
|---|
| 269 | | my $seq_out_name = shift @{$vars}; |
|---|
| 270 | | my $query_def = shift @{$vars}; |
|---|
| 271 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 272 | | my $opt_f = shift @{$vars}; |
|---|
| 273 | | #------------------------ARGS_IN |
|---|
| 274 | | |
|---|
| 275 | | #-------------------------CHUNK |
|---|
| 276 | | my $masked_fasta = Fasta::toFasta($query_def.' masked', \$masked_total_seq); |
|---|
| 277 | | FastaFile::writeFile($masked_fasta ,$the_void."/query.masked.fasta"); |
|---|
| 278 | | |
|---|
| 279 | | #==SNAP ab initio here |
|---|
| 280 | | my $snaps = []; |
|---|
| 281 | | $snaps = Shared_Functions::snap($masked_fasta, |
|---|
| 282 | | $the_void, |
|---|
| 283 | | $seq_out_name, |
|---|
| 284 | | $CTL_OPTIONS{snap}, |
|---|
| 285 | | $CTL_OPTIONS{'snaphmm'}, |
|---|
| 286 | | $opt_f, |
|---|
| 287 | | $self->{LOG} |
|---|
| 288 | | ) if ($CTL_OPTIONS{'snap'}); |
|---|
| 289 | | |
|---|
| 290 | | #==AUGUSTUS ab initio here |
|---|
| 291 | | my $augus = []; |
|---|
| 292 | | $augus = Shared_Functions::augustus($masked_fasta, |
|---|
| 293 | | $the_void, |
|---|
| 294 | | $seq_out_name, |
|---|
| 295 | | $CTL_OPTIONS{'augustus'}, |
|---|
| 296 | | $CTL_OPTIONS{'augustus_species'}, |
|---|
| 297 | | $opt_f, |
|---|
| 298 | | $self->{LOG} |
|---|
| 299 | | ) if ($CTL_OPTIONS{'augustus'}); |
|---|
| 300 | | |
|---|
| 301 | | #-- build an index of the databases |
|---|
| 302 | | my $t_dir = $TMP."/rank".$self->{RANK}; |
|---|
| 303 | | File::Path::mkpath($t_dir); |
|---|
| 304 | | |
|---|
| 305 | | $CTL_OPTIONS{old_est} =~ /([^\/]+)$/; |
|---|
| 306 | | my $t_name = $1; |
|---|
| 307 | | |
|---|
| 308 | | $CTL_OPTIONS{old_protein} =~ /([^\/]+)$/; |
|---|
| 309 | | my $p_name = $1; |
|---|
| 310 | | |
|---|
| 311 | | my $a_name; |
|---|
| 312 | | if($CTL_OPTIONS{old_alt_est}){ |
|---|
| 313 | | $CTL_OPTIONS{old_alt_est} =~ /([^\/]+)$/; |
|---|
| 314 | | $a_name = $1;; |
|---|
| 315 | | } |
|---|
| 316 | | |
|---|
| 317 | | my $trans_file = $t_dir."/".$t_name; |
|---|
| 318 | | my $prot_file = $t_dir."/".$p_name; |
|---|
| 319 | | my $alt_est_file = $t_dir."/".$a_name if($CTL_OPTIONS{old_alt_est}); |
|---|
| 320 | | |
|---|
| 321 | | if (! -e $trans_file) { |
|---|
| 322 | | system("cp $CTL_OPTIONS{old_est} $trans_file"); |
|---|
| 323 | | } |
|---|
| 324 | | if (! -e $prot_file) { |
|---|
| 325 | | system("cp $CTL_OPTIONS{old_protein} $prot_file"); |
|---|
| 326 | | } |
|---|
| 327 | | if ($CTL_OPTIONS{old_alt_est} && ! -e $alt_est_file) { |
|---|
| 328 | | system("cp $CTL_OPTIONS{old_alt_est} $alt_est_file"); |
|---|
| 329 | | } |
|---|
| 330 | | |
|---|
| 331 | | my $fasta_t_index = Shared_Functions::build_fasta_index($trans_file); |
|---|
| 332 | | my $fasta_p_index = Shared_Functions::build_fasta_index($prot_file); |
|---|
| 333 | | my $fasta_a_index; |
|---|
| 334 | | $fasta_a_index = Shared_Functions::build_fasta_index($alt_est_file) if ($CTL_OPTIONS{old_alt_est}); |
|---|
| 335 | | |
|---|
| 336 | | #--set up new chunks for remaining levels |
|---|
| 337 | | my $fasta_chunker = new FastaChunker(); |
|---|
| 338 | | $fasta_chunker->parent_fasta($$masked_fasta); |
|---|
| 339 | | $fasta_chunker->chunk_size($CTL_OPTIONS{'max_dna_len'}); |
|---|
| 340 | | $fasta_chunker->min_size($CTL_OPTIONS{'split_hit'}); |
|---|
| 341 | | $fasta_chunker->load_chunks(); |
|---|
| 342 | | |
|---|
| 343 | | my $chunk_count = 0; |
|---|
| 344 | | |
|---|
| 345 | | #-------------------------CHUNK |
|---|
| 346 | | |
|---|
| 347 | | #------------------------RESULTS |
|---|
| 348 | | @results = ($masked_fasta, $snaps, $augus, $fasta_chunker, $chunk_count, $fasta_t_index, $fasta_p_index, $fasta_a_index); |
|---|
| 349 | | #------------------------RESULTS |
|---|
| 350 | | } |
|---|
| 351 | | elsif ($level == 5) { |
|---|
| 352 | | #------------------------ARGS_IN |
|---|
| 353 | | my $chunk = shift @{$vars}; |
|---|
| 354 | | my $transcripts = shift @{$vars}; |
|---|
| 355 | | my $the_void = shift @{$vars}; |
|---|
| 356 | | my $seq_out_name = shift @{$vars}; |
|---|
| 357 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 358 | | my $opt_f = shift @{$vars}; |
|---|
| 359 | | #------------------------ARGS_IN |
|---|
| 360 | | |
|---|
| 361 | | #-------------------------CHUNK |
|---|
| 362 | | #-- blastn search the file against ESTs |
|---|
| 363 | | my $blastn_res_dir = Shared_Functions::blastn_as_chunks($chunk, |
|---|
| 364 | | $transcripts, |
|---|
| 365 | | $the_void, |
|---|
| 366 | | $seq_out_name, |
|---|
| 367 | | $CTL_OPTIONS{blastn}, |
|---|
| 368 | | $CTL_OPTIONS{eval_blastn}, |
|---|
| 369 | | $CTL_OPTIONS{split_hit}, |
|---|
| 370 | | $CTL_OPTIONS{cpus}, |
|---|
| 371 | | $CTL_OPTIONS{old_est}, |
|---|
| 372 | | $CTL_OPTIONS{xdformat}, |
|---|
| 373 | | $self->{RANK}, |
|---|
| 374 | | $opt_f, |
|---|
| 375 | | $self->{LOG}, |
|---|
| 376 | | $self->{LOG_FLAG} |
|---|
| 377 | | ); |
|---|
| 378 | | #-------------------------CHUNK |
|---|
| 379 | | |
|---|
| 380 | | #------------------------RESULTS |
|---|
| 381 | | @results = ($blastn_res_dir); |
|---|
| 382 | | #------------------------RESULTS |
|---|
| 383 | | } |
|---|
| 384 | | elsif ($level == 6) { |
|---|
| 385 | | #------------------------ARGS_IN |
|---|
| 386 | | my $chunk = shift @{$vars}; |
|---|
| 387 | | my $blastn_res_dir = shift @{$vars}; |
|---|
| 388 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 389 | | my $opt_f = shift @{$vars}; |
|---|
| 390 | | #------------------------ARGS_IN |
|---|
| 391 | | |
|---|
| 392 | | #-------------------------CHUNK |
|---|
| 393 | | #-- merge and collect blastn results |
|---|
| 394 | | my $blastn_keepers = Shared_Functions::collect_blastn($chunk, |
|---|
| 395 | | $blastn_res_dir, |
|---|
| 396 | | $CTL_OPTIONS{eval_blastn}, |
|---|
| 397 | | $CTL_OPTIONS{bit_blastn}, |
|---|
| 398 | | $CTL_OPTIONS{percov_blastn}, |
|---|
| 399 | | $CTL_OPTIONS{percid_blastn}, |
|---|
| 400 | | $CTL_OPTIONS{split_hit}, |
|---|
| 401 | | $opt_f, |
|---|
| 402 | | $self->{LOG} |
|---|
| 403 | | ); |
|---|
| 404 | | #-------------------------CHUNK |
|---|
| 405 | | |
|---|
| 406 | | #------------------------RESULTS |
|---|
| 407 | | @results = ($blastn_keepers); |
|---|
| 408 | | #------------------------RESULTS |
|---|
| 409 | | } |
|---|
| 410 | | elsif ($level == 7) { |
|---|
| 411 | | #------------------------ARGS_IN |
|---|
| 412 | | my $chunk = shift @{$vars}; |
|---|
| 413 | | my $proteins = shift @{$vars}; |
|---|
| 414 | | my $the_void = shift @{$vars}; |
|---|
| 415 | | my $seq_out_name = shift @{$vars}; |
|---|
| 416 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 417 | | my $opt_f = shift @{$vars}; |
|---|
| 418 | | #------------------------ARGS_IN |
|---|
| 419 | | |
|---|
| 420 | | #-------------------------CHUNK |
|---|
| 421 | | #-- blastx search the masked input file |
|---|
| 422 | | my $blastx_res_dir = Shared_Functions::blastx_as_chunks($chunk, |
|---|
| 423 | | $proteins, |
|---|
| 424 | | $the_void, |
|---|
| 425 | | $seq_out_name, |
|---|
| 426 | | $CTL_OPTIONS{blastx}, |
|---|
| 427 | | $CTL_OPTIONS{eval_blastx}, |
|---|
| 428 | | $CTL_OPTIONS{split_hit}, |
|---|
| 429 | | $CTL_OPTIONS{cpus}, |
|---|
| 430 | | $CTL_OPTIONS{old_protein}, |
|---|
| 431 | | $CTL_OPTIONS{xdformat}, |
|---|
| 432 | | $CTL_OPTIONS{alt_peptide}, |
|---|
| 433 | | $self->{RANK}, |
|---|
| 434 | | $opt_f, |
|---|
| 435 | | $self->{LOG}, |
|---|
| 436 | | $self->{LOG_FLAG} |
|---|
| 437 | | ); |
|---|
| 438 | | #-------------------------CHUNK |
|---|
| 439 | | |
|---|
| 440 | | #------------------------RESULTS |
|---|
| 441 | | @results = ($blastx_res_dir); |
|---|
| 442 | | #------------------------RESULTS |
|---|
| 443 | | } |
|---|
| 444 | | elsif ($level == 8) { |
|---|
| 445 | | #------------------------ARGS_IN |
|---|
| 446 | | my $chunk = shift @{$vars}; |
|---|
| 447 | | my $blastx_res_dir = shift @{$vars}; |
|---|
| 448 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 449 | | my $opt_f = shift @{$vars}; |
|---|
| 450 | | #------------------------ARGS_IN |
|---|
| 451 | | |
|---|
| 452 | | #-------------------------CHUNK |
|---|
| 453 | | #-- merge and collect blastx results |
|---|
| 454 | | my $blastx_keepers = Shared_Functions::collect_blastx($chunk, |
|---|
| 455 | | $blastx_res_dir, |
|---|
| 456 | | $CTL_OPTIONS{eval_blastx}, |
|---|
| 457 | | $CTL_OPTIONS{bit_blastx}, |
|---|
| 458 | | $CTL_OPTIONS{percov_blastx}, |
|---|
| 459 | | $CTL_OPTIONS{percid_blastx}, |
|---|
| 460 | | $CTL_OPTIONS{split_hit}, |
|---|
| 461 | | $opt_f, |
|---|
| 462 | | $self->{LOG} |
|---|
| 463 | | ); |
|---|
| 464 | | #-------------------------CHUNK |
|---|
| 465 | | |
|---|
| 466 | | #------------------------RESULTS |
|---|
| 467 | | @results = ($blastx_keepers); |
|---|
| 468 | | #------------------------RESULTS |
|---|
| 469 | | } |
|---|
| 470 | | elsif ($level == 9) { |
|---|
| 471 | | #------------------------ARGS_IN |
|---|
| 472 | | my $chunk = shift @{$vars}; |
|---|
| 473 | | my $alt_ests = shift @{$vars}; |
|---|
| 474 | | my $the_void = shift @{$vars}; |
|---|
| 475 | | my $seq_out_name = shift @{$vars}; |
|---|
| 476 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 477 | | my $opt_f = shift @{$vars}; |
|---|
| 478 | | #------------------------ARGS_IN |
|---|
| 479 | | |
|---|
| 480 | | #-------------------------CHUNK |
|---|
| 481 | | #-- blastx search the masked input file |
|---|
| 482 | | my $tblastx_res_dir = ''; |
|---|
| 483 | | $tblastx_res_dir = Shared_Functions::tblastx_as_chunks($chunk, |
|---|
| 484 | | $alt_ests, |
|---|
| 485 | | $the_void, |
|---|
| 486 | | $seq_out_name, |
|---|
| 487 | | $CTL_OPTIONS{tblastx}, |
|---|
| 488 | | $CTL_OPTIONS{eval_tblastx}, |
|---|
| 489 | | $CTL_OPTIONS{split_hit}, |
|---|
| 490 | | $CTL_OPTIONS{cpus}, |
|---|
| 491 | | $CTL_OPTIONS{old_alt_est}, |
|---|
| 492 | | $CTL_OPTIONS{xdformat}, |
|---|
| 493 | | $self->{RANK}, |
|---|
| 494 | | $opt_f, |
|---|
| 495 | | $self->{LOG}, |
|---|
| 496 | | $self->{LOG_FLAG} |
|---|
| 497 | | ) if($alt_ests); |
|---|
| 498 | | #-------------------------CHUNK |
|---|
| 499 | | |
|---|
| 500 | | #------------------------RESULTS |
|---|
| 501 | | @results = ($tblastx_res_dir); |
|---|
| 502 | | #------------------------RESULTS |
|---|
| 503 | | } |
|---|
| 504 | | elsif ($level == 10) { |
|---|
| 505 | | #------------------------ARGS_IN |
|---|
| 506 | | my $chunk = shift @{$vars}; |
|---|
| 507 | | my $tblastx_res_dir = shift @{$vars}; |
|---|
| 508 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 509 | | my $opt_f = shift @{$vars}; |
|---|
| 510 | | #------------------------ARGS_IN |
|---|
| 511 | | |
|---|
| 512 | | #-------------------------CHUNK |
|---|
| 513 | | #-- merge and collect blastx results |
|---|
| 514 | | my $tblastx_keepers = []; |
|---|
| 515 | | $tblastx_keepers = Shared_Functions::collect_tblastx($chunk, |
|---|
| 516 | | $tblastx_res_dir, |
|---|
| 517 | | $CTL_OPTIONS{eval_tblastx}, |
|---|
| 518 | | $CTL_OPTIONS{bit_tblastx}, |
|---|
| 519 | | $CTL_OPTIONS{percov_tblastx}, |
|---|
| 520 | | $CTL_OPTIONS{percid_tblastx}, |
|---|
| 521 | | $CTL_OPTIONS{split_hit}, |
|---|
| 522 | | $opt_f, |
|---|
| 523 | | $self->{LOG} |
|---|
| 524 | | ) if($CTL_OPTIONS{old_alt_est}); |
|---|
| 525 | | #-------------------------CHUNK |
|---|
| 526 | | |
|---|
| 527 | | #------------------------RESULTS |
|---|
| 528 | | @results = ($tblastx_keepers); |
|---|
| 529 | | #------------------------RESULTS |
|---|
| 530 | | } |
|---|
| 531 | | elsif ($level == 11) { |
|---|
| 532 | | #------------------------ARGS_IN |
|---|
| 533 | | my $chunk = shift @{$vars}; |
|---|
| 534 | | my $masked_fasta = shift @{$vars}; |
|---|
| 535 | | my $snaps = shift @{$vars}; |
|---|
| 536 | | my $augus = shift @{$vars}; |
|---|
| 537 | | my $blastn_keepers = shift @{$vars}; |
|---|
| 538 | | my $blastx_keepers = shift @{$vars}; |
|---|
| 539 | | my $tblastx_keepers = shift @{$vars}; |
|---|
| 540 | | my $fasta_t_index = shift @{$vars}; |
|---|
| 541 | | my $fasta_p_index = shift @{$vars}; |
|---|
| 542 | | my $fasta_a_index = shift @{$vars}; |
|---|
| 543 | | my $holdover_blastn = shift @{$vars}; |
|---|
| 544 | | my $holdover_blastx = shift @{$vars}; |
|---|
| 545 | | my $holdover_tblastx = shift @{$vars}; |
|---|
| 546 | | my $the_void = shift @{$vars}; |
|---|
| 547 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 548 | | my $opt_f = shift @{$vars}; |
|---|
| 549 | | #------------------------ARGS_IN |
|---|
| 550 | | |
|---|
| 551 | | #-------------------------CHUNK |
|---|
| 552 | | |
|---|
| 553 | | #-- decide which gene finder to use to build annotations |
|---|
| 554 | | my $preds_on_chunk; |
|---|
| 555 | | |
|---|
| 556 | | if ($CTL_OPTIONS{predictor} eq 'augustus') { |
|---|
| 557 | | $preds_on_chunk = Shared_Functions::get_preds_on_chunk($augus, |
|---|
| 558 | | $chunk |
|---|
| 559 | | ); |
|---|
| 560 | | } |
|---|
| 561 | | elsif ($CTL_OPTIONS{predictor} eq 'snap') { |
|---|
| 562 | | $preds_on_chunk = Shared_Functions::get_preds_on_chunk($snaps, |
|---|
| 563 | | $chunk |
|---|
| 564 | | ); |
|---|
| 565 | | } |
|---|
| 566 | | elsif ($CTL_OPTIONS{predictor} eq 'est2genome') { |
|---|
| 567 | | $preds_on_chunk = []; |
|---|
| 568 | | } |
|---|
| 569 | | else { |
|---|
| 570 | | die "ERROR: invalid predictor type: $CTL_OPTIONS{predictor}\n"; |
|---|
| 571 | | } |
|---|
| 572 | | |
|---|
| 573 | | #==merge heldover Phathits from last round |
|---|
| 574 | | if ($chunk->number != 0) { #if not first chunk |
|---|
| 575 | | ($blastn_keepers, |
|---|
| 576 | | $blastx_keepers, |
|---|
| 577 | | $tblastx_keepers) = Shared_Functions::merge_and_resolve_hits($masked_fasta, |
|---|
| 578 | | $fasta_t_index, |
|---|
| 579 | | $fasta_p_index, |
|---|
| 580 | | $fasta_a_index, |
|---|
| 581 | | $blastn_keepers, |
|---|
| 582 | | $blastx_keepers, |
|---|
| 583 | | $tblastx_keepers, |
|---|
| 584 | | $holdover_blastn, |
|---|
| 585 | | $holdover_blastx, |
|---|
| 586 | | $holdover_tblastx, |
|---|
| 587 | | $the_void, |
|---|
| 588 | | \%CTL_OPTIONS, |
|---|
| 589 | | $opt_f, |
|---|
| 590 | | $self->{LOG} |
|---|
| 591 | | ); |
|---|
| 592 | | } |
|---|
| 593 | | |
|---|
| 594 | | #==PROCESS HITS CLOSE TOO CHUNK DIVISIONS |
|---|
| 595 | | my $holdover_preds = []; |
|---|
| 596 | | $holdover_blastn = []; |
|---|
| 597 | | $holdover_blastx = []; |
|---|
| 598 | | $holdover_tblastx = []; |
|---|
| 599 | | |
|---|
| 600 | | if (not $chunk->is_last) { #if not last chunk |
|---|
| 601 | | ($holdover_blastn, |
|---|
| 602 | | $holdover_blastx, |
|---|
| 603 | | $holdover_tblastx, |
|---|
| 604 | | $holdover_preds, |
|---|
| 605 | | $blastn_keepers, |
|---|
| 606 | | $blastx_keepers, |
|---|
| 607 | | $tblastx_keepers, |
|---|
| 608 | | $preds_on_chunk) = Shared_Functions::process_the_chunk_divide($chunk, |
|---|
| 609 | | $CTL_OPTIONS{'split_hit'}, |
|---|
| 610 | | $blastn_keepers, |
|---|
| 611 | | $blastx_keepers, |
|---|
| 612 | | $tblastx_keepers, |
|---|
| 613 | | $preds_on_chunk |
|---|
| 614 | | ); |
|---|
| 615 | | } |
|---|
| 616 | | #-------------------------CHUNK |
|---|
| 617 | | |
|---|
| 618 | | #------------------------RESULTS |
|---|
| 619 | | @results = ($holdover_blastn, $holdover_blastx, $holdover_tblastx, $holdover_preds, |
|---|
| 620 | | $blastn_keepers, $blastx_keepers, $tblastx_keepers, $preds_on_chunk); |
|---|
| 621 | | #------------------------RESULTS |
|---|
| 622 | | } |
|---|
| 623 | | elsif ($level == 12) { |
|---|
| 624 | | #------------------------ARGS_IN |
|---|
| 625 | | my $fasta = shift @{$vars}; |
|---|
| 626 | | my $blastx_keepers = shift @{$vars}; |
|---|
| 627 | | my $query_seq = shift @{$vars}; |
|---|
| 628 | | my $fasta_p_index = shift @{$vars}; |
|---|
| 629 | | my $the_void = shift @{$vars}; |
|---|
| 630 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 631 | | my $opt_f = shift @{$vars}; |
|---|
| 632 | | #------------------------ARGS_IN |
|---|
| 633 | | |
|---|
| 634 | | #-------------------------CHUNK |
|---|
| 635 | | #-cluster the blastx hits |
|---|
| 636 | | print STDERR "cleaning blastx...\n" unless($main::quiet); |
|---|
| 637 | | my $blastx_clusters = cluster::clean_and_cluster($blastx_keepers, |
|---|
| 638 | | $query_seq, |
|---|
| 639 | | 10); |
|---|
| 640 | | |
|---|
| 641 | | #-- make a multi-fasta of the seqs in the blastx_clusters |
|---|
| 642 | | #-- polish the blastx hits with exonerate |
|---|
| 643 | | my $exonerate_p_clusters = Shared_Functions::polish_exonerate($fasta, |
|---|
| 644 | | $blastx_clusters, |
|---|
| 645 | | $fasta_p_index, |
|---|
| 646 | | $the_void, |
|---|
| 647 | | 5, |
|---|
| 648 | | 'p', |
|---|
| 649 | | $CTL_OPTIONS{exonerate}, |
|---|
| 650 | | $CTL_OPTIONS{percov_blastx}, |
|---|
| 651 | | $CTL_OPTIONS{percid_blastx}, |
|---|
| 652 | | $CTL_OPTIONS{ep_score_limit}, |
|---|
| 653 | | $CTL_OPTIONS{ep_matrix}, |
|---|
| 654 | | $opt_f, |
|---|
| 655 | | $self->{LOG} |
|---|
| 656 | | ); |
|---|
| 657 | | |
|---|
| 658 | | my $blastx_data = Shared_Functions::flatten($blastx_clusters); |
|---|
| 659 | | my $exonerate_p_data = Shared_Functions::flatten($exonerate_p_clusters, 'exonerate:p'); |
|---|
| 660 | | #-------------------------CHUNK |
|---|
| 661 | | |
|---|
| 662 | | #------------------------RESULTS |
|---|
| 663 | | @results = ($blastx_data, $exonerate_p_data); |
|---|
| 664 | | #------------------------RESULTS |
|---|
| 665 | | } |
|---|
| 666 | | elsif ($level == 13) { |
|---|
| 667 | | #------------------------ARGS_IN |
|---|
| 668 | | my $fasta = shift @{$vars}; |
|---|
| 669 | | my $blastn_keepers = shift @{$vars}; |
|---|
| 670 | | my $tblastx_keepers = shift @{$vars}; |
|---|
| 671 | | my $query_seq = shift @{$vars}; |
|---|
| 672 | | my $fasta_t_index = shift @{$vars}; |
|---|
| 673 | | my $the_void = shift @{$vars}; |
|---|
| 674 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 675 | | my $opt_f = shift @{$vars}; |
|---|
| 676 | | #------------------------ARGS_IN |
|---|
| 677 | | |
|---|
| 678 | | #-------------------------CHUNK |
|---|
| 679 | | #-cluster the tblastx hits |
|---|
| 680 | | print STDERR "cleaning tblastx...\n" unless $main::quiet; |
|---|
| 681 | | my $tblastx_clusters = cluster::clean_and_cluster($tblastx_keepers, |
|---|
| 682 | | $query_seq, |
|---|
| 683 | | 10 |
|---|
| 684 | | ); |
|---|
| 685 | | |
|---|
| 686 | | undef $tblastx_keepers; #free up memory |
|---|
| 687 | | my $tblastx_data = Shared_Functions::flatten($tblastx_clusters); |
|---|
| 688 | | |
|---|
| 689 | | #-- Cluster the blastn hits |
|---|
| 690 | | print STDERR "cleaning blastn...\n" unless($main::quiet); |
|---|
| 691 | | my $blastn_clusters = cluster::clean_and_cluster($blastn_keepers, |
|---|
| 692 | | $query_seq, |
|---|
| 693 | | 10 |
|---|
| 694 | | ); |
|---|
| 695 | | |
|---|
| 696 | | #-- polish blastn hits with exonerate |
|---|
| 697 | | my $exonerate_e_clusters = Shared_Functions::polish_exonerate($fasta, |
|---|
| 698 | | $blastn_clusters, |
|---|
| 699 | | $fasta_t_index, |
|---|
| 700 | | $the_void, |
|---|
| 701 | | 5, |
|---|
| 702 | | 'e', |
|---|
| 703 | | $CTL_OPTIONS{exonerate}, |
|---|
| 704 | | $CTL_OPTIONS{percov_blastn}, |
|---|
| 705 | | $CTL_OPTIONS{percid_blastn}, |
|---|
| 706 | | $CTL_OPTIONS{en_score_limit}, |
|---|
| 707 | | $CTL_OPTIONS{en_matrix}, |
|---|
| 708 | | $opt_f, |
|---|
| 709 | | $self->{LOG} |
|---|
| 710 | | ); |
|---|
| 711 | | |
|---|
| 712 | | my $blastn_data = Shared_Functions::flatten($blastn_clusters); |
|---|
| 713 | | my $exonerate_e_data = Shared_Functions::flatten($exonerate_e_clusters, 'exonerate:e'); |
|---|
| 714 | | #-------------------------CHUNK |
|---|
| 715 | | |
|---|
| 716 | | #------------------------RESULTS |
|---|
| 717 | | @results = ($blastn_data, $tblastx_data, $exonerate_e_data); |
|---|
| 718 | | #------------------------RESULTS |
|---|
| 719 | | } |
|---|
| 720 | | elsif ($level == 14) { |
|---|
| 721 | | #------------------------ARGS_IN |
|---|
| 722 | | my $fasta = shift @{$vars}; |
|---|
| 723 | | my $masked_fasta = shift @{$vars}; |
|---|
| 724 | | my $c_number = shift @{$vars}; |
|---|
| 725 | | my $exonerate_p_data = shift @{$vars}; |
|---|
| 726 | | my $exonerate_e_data = shift @{$vars}; |
|---|
| 727 | | my $blastx_data = shift @{$vars}; |
|---|
| 728 | | my $preds_on_chunk = shift @{$vars}; |
|---|
| 729 | | my $the_void = shift @{$vars}; |
|---|
| 730 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 731 | | my $opt_f = shift @{$vars}; |
|---|
| 732 | | my $opt_preds = shift @{$vars}; |
|---|
| 733 | | #------------------------ARGS_IN |
|---|
| 734 | | |
|---|
| 735 | | #-------------------------CHUNK |
|---|
| 736 | | #==MAKER annotations built here |
|---|
| 737 | | |
|---|
| 738 | | #-- decide which gene finder to use to build annotations |
|---|
| 739 | | my $pred_command; |
|---|
| 740 | | |
|---|
| 741 | | if ($CTL_OPTIONS{predictor} eq 'augustus') { |
|---|
| 742 | | $pred_command = $CTL_OPTIONS{augustus} .' --species='.$CTL_OPTIONS{augustus_species}; |
|---|
| 743 | | } |
|---|
| 744 | | elsif ($CTL_OPTIONS{predictor} eq 'snap') { |
|---|
| 745 | | $pred_command = $CTL_OPTIONS{snap}.' '.$CTL_OPTIONS{snaphmm}; |
|---|
| 746 | | } |
|---|
| 747 | | elsif ($CTL_OPTIONS{predictor} eq 'est2genome') { |
|---|
| 748 | | $pred_command = ''; |
|---|
| 749 | | } |
|---|
| 750 | | else { |
|---|
| 751 | | die "ERROR: invalid predictor type: $CTL_OPTIONS{predictor}\n"; |
|---|
| 752 | | } |
|---|
| 753 | | |
|---|
| 754 | | #-auto-annotate the input file |
|---|
| 755 | | |
|---|
| 756 | | my $annotations = maker::auto_annotator::annotate($fasta, |
|---|
| 757 | | $$masked_fasta, |
|---|
| 758 | | $c_number, |
|---|
| 759 | | $exonerate_p_data, |
|---|
| 760 | | $exonerate_e_data, |
|---|
| 761 | | $blastx_data, |
|---|
| 762 | | $preds_on_chunk, |
|---|
| 763 | | $the_void, |
|---|
| 764 | | $pred_command, |
|---|
| 765 | | $CTL_OPTIONS{'snap_flank'}, |
|---|
| 766 | | $CTL_OPTIONS{'single_exon'}, |
|---|
| 767 | | $opt_f, |
|---|
| 768 | | $opt_preds, |
|---|
| 769 | | $CTL_OPTIONS{predictor}, |
|---|
| 770 | | $self->{LOG} |
|---|
| 771 | | ); |
|---|
| 772 | | #-------------------------CHUNK |
|---|
| 773 | | |
|---|
| 774 | | #------------------------RESULTS |
|---|
| 775 | | @results = ($annotations); |
|---|
| 776 | | #------------------------RESULTS |
|---|
| 777 | | } |
|---|
| 778 | | elsif ($level == 15) { |
|---|
| 779 | | #------------------------ARGS_IN |
|---|
| 780 | | my $blastx_data = shift @{$vars}; |
|---|
| 781 | | my $blastn_data = shift @{$vars}; |
|---|
| 782 | | my $tblastx_data = shift @{$vars}; |
|---|
| 783 | | my $exonerate_p_data = shift @{$vars}; |
|---|
| 784 | | my $exonerate_e_data = shift @{$vars}; |
|---|
| 785 | | my $annotations = shift @{$vars}; |
|---|
| 786 | | my $p_fastas = shift @{$vars}; |
|---|
| 787 | | my $t_fastas = shift @{$vars}; |
|---|
| 788 | | my $GFF3 = shift @{$vars}; |
|---|
| 789 | | #------------------------ARGS_IN |
|---|
| 790 | | |
|---|
| 791 | | #-------------------------CHUNK |
|---|
| 792 | | #--- GFF3 |
|---|
| 793 | | $GFF3->genes($annotations); |
|---|
| 794 | | $GFF3->phat_hits($blastx_data); |
|---|
| 795 | | $GFF3->phat_hits($blastn_data); |
|---|
| 796 | | $GFF3->phat_hits($tblastx_data); |
|---|
| 797 | | $GFF3->phat_hits($exonerate_p_data); |
|---|
| 798 | | $GFF3->phat_hits($exonerate_e_data); |
|---|
| 799 | | |
|---|
| 800 | | #--- building fastas for annotations |
|---|
| 801 | | my ($p_fasta, $t_fasta) = Shared_Functions::get_maker_p_and_t_fastas($annotations); |
|---|
| 802 | | $p_fastas .= $p_fasta; |
|---|
| 803 | | $t_fastas .= $t_fasta; |
|---|
| 804 | | #-------------------------CHUNK |
|---|
| 805 | | |
|---|
| 806 | | #------------------------RESULTS |
|---|
| 807 | | @results = ($GFF3, $p_fastas, $t_fastas); |
|---|
| 808 | | #------------------------RESULTS |
|---|
| 809 | | } |
|---|
| 810 | | elsif ($level == 16) { |
|---|
| 811 | | #------------------------ARGS_IN |
|---|
| 812 | | my $snaps = shift @{$vars}; |
|---|
| 813 | | my $augus = shift @{$vars}; |
|---|
| 814 | | my $p_fastas = shift @{$vars}; |
|---|
| 815 | | my $t_fastas = shift @{$vars}; |
|---|
| 816 | | my $GFF3 = shift @{$vars}; |
|---|
| 817 | | my $seq_out_name = shift @{$vars}; |
|---|
| 818 | | my $out_dir = shift @{$vars}; |
|---|
| 819 | | my $the_void = shift @{$vars}; |
|---|
| 820 | | my $query_seq = shift @{$vars}; |
|---|
| 821 | | my %CTL_OPTIONS = %{shift @{$vars}}; |
|---|
| 822 | | #------------------------ARGS_IN |
|---|
| 823 | | |
|---|
| 824 | | #-------------------------CHUNK |
|---|
| 825 | | #--- building fastas of predictions |
|---|
| 826 | | my ($p_snap_fastas, $t_snap_fastas) = Shared_Functions::get_snap_p_and_t_fastas($query_seq, $snaps); |
|---|
| 827 | | my ($p_augus_fastas, $t_augus_fastas) = Shared_Functions::get_snap_p_and_t_fastas($query_seq, $augus); |
|---|
| 828 | | |
|---|
| 829 | | #--Write fasta files and gff3 files now that all chunks are finished |
|---|
| 830 | | FastaFile::writeFile(\$p_fastas ,"$out_dir\/$seq_out_name\.maker.proteins.fasta"); |
|---|
| 831 | | FastaFile::writeFile(\$t_fastas ,"$out_dir\/$seq_out_name\.maker.transcripts.fasta"); |
|---|
| 832 | | if ($CTL_OPTIONS{'snap'}) { |
|---|
| 833 | | FastaFile::writeFile(\$p_snap_fastas ,"$out_dir\/$seq_out_name\.maker.snap.proteins.fasta"); |
|---|
| 834 | | FastaFile::writeFile(\$t_snap_fastas ,"$out_dir\/$seq_out_name\.maker.snap.transcript.fasta"); |
|---|
| 835 | | } |
|---|
| 836 | | if ($CTL_OPTIONS{'augustus'}) { |
|---|
| 837 | | FastaFile::writeFile(\$p_augus_fastas ,"$out_dir\/$seq_out_name\.maker.augus.proteins.fasta"); |
|---|
| 838 | | FastaFile::writeFile(\$t_augus_fastas ,"$out_dir\/$seq_out_name\.maker.augus.transcript.fasta"); |
|---|
| 839 | | } |
|---|
| 840 | | $GFF3->predictions($snaps); |
|---|
| 841 | | $GFF3->predictions($augus); |
|---|
| 842 | | $GFF3->print($out_dir."/".$seq_out_name.".gff"); |
|---|
| 843 | | |
|---|
| 844 | | #--cleanup maker files created with each fasta sequence |
|---|
| 845 | | File::Path::rmtree ($the_void) if $CTL_OPTIONS{clean_up}; #rm temp directory |
|---|
| 846 | | #-------------------------CHUNK |
|---|
| 847 | | |
|---|
| 848 | | #------------------------RESULTS |
|---|
| 849 | | @results = (); |
|---|
| 850 | | #------------------------RESULTS |
|---|
| 851 | | } |
|---|
| 852 | | else { |
|---|
| 853 | | warn "Error: Invalid argument for method run() in Process::MpiChunk\n"; |
|---|
| | 211 | my $VARS = $self->{VARS}; |
|---|
| | 212 | |
|---|
| | 213 | $self->{RANK} = shift || $self->{RANK}; |
|---|
| | 214 | |
|---|
| | 215 | if ($self->{FINISHED} || $self->{FAILED}) { |
|---|
| | 218 | |
|---|
| | 219 | my $results = $self->_go('run', $level, $VARS); |
|---|
| | 220 | |
|---|
| | 221 | $self->{VARS} = {}; |
|---|
| | 222 | $self->{RESULTS} = $results; |
|---|
| | 223 | $self->{FINISHED} = 1; |
|---|
| | 224 | |
|---|
| | 225 | if(! $self->failed){ |
|---|
| | 226 | return 1 ; |
|---|
| | 227 | } |
|---|
| | 228 | else{ |
|---|
| | 229 | return undef; |
|---|
| | 230 | } |
|---|
| | 231 | } |
|---|
| | 232 | #-------------------------------------------------------------- |
|---|
| | 233 | #this funcion is called by MakerTiers. It returns the flow of |
|---|
| | 234 | #levels, i.e. order control, looping, etc. |
|---|
| | 235 | |
|---|
| | 236 | sub flow { |
|---|
| | 237 | my $self = shift; |
|---|
| | 238 | my $level = shift; |
|---|
| | 239 | my $VARS = shift; |
|---|
| | 240 | |
|---|
| | 241 | #handle case of calling as function rather than method |
|---|
| | 242 | if (ref($self) ne "Process::MpiChunk") { |
|---|
| | 243 | $VARS = $level; |
|---|
| | 244 | $level = $self; |
|---|
| | 245 | $self = new Process::MpiChunk(); |
|---|
| | 246 | } |
|---|
| | 247 | |
|---|
| | 248 | return $self->_go('flow', $level, $VARS); |
|---|
| | 249 | } |
|---|
| | 250 | #-------------------------------------------------------------- |
|---|
| | 251 | #initializes chunk variables, runs code, or returns flow |
|---|
| | 252 | #depending on flag. |
|---|
| | 253 | #This method is the core of the MpiChunk object |
|---|
| | 254 | |
|---|
| | 255 | sub _go { |
|---|
| | 256 | my $self = shift; |
|---|
| | 257 | my $flag = shift; |
|---|
| | 258 | my $level = shift @_; |
|---|
| | 259 | my $VARS = shift @_; |
|---|
| | 260 | |
|---|
| | 261 | my $next_level = $level + 1; |
|---|
| | 262 | my @chunks; |
|---|
| | 263 | my @args; |
|---|
| | 264 | my %results; |
|---|
| | 265 | |
|---|
| | 266 | my $level_status = ''; |
|---|
| | 267 | |
|---|
| | 268 | try{ |
|---|
| | 269 | if ($level == 0) { #set up GFF3 output and fasta chunks |
|---|
| | 270 | $level_status = 'setting up GFF3 output and fasta chunks'; |
|---|
| | 271 | if ($flag eq 'load') { |
|---|
| | 272 | #-------------------------CHUNKER |
|---|
| | 273 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 274 | push(@chunks, $chunk); |
|---|
| | 275 | #-------------------------CHUNKER |
|---|
| | 276 | } |
|---|
| | 277 | elsif ($flag eq 'init') { |
|---|
| | 278 | #------------------------ARGS_IN |
|---|
| | 279 | @args = (qw{fasta |
|---|
| | 280 | CTL_OPT |
|---|
| | 281 | out_dir |
|---|
| | 282 | build |
|---|
| | 283 | seq_id |
|---|
| | 284 | safe_seq_id |
|---|
| | 285 | the_void |
|---|
| | 286 | q_seq_ref} |
|---|
| | 287 | ); |
|---|
| | 288 | #------------------------ARGS_IN |
|---|
| | 289 | } |
|---|
| | 290 | elsif ($flag eq 'run') { |
|---|
| | 291 | #-------------------------CODE |
|---|
| | 292 | my $out_dir = $VARS->{out_dir}; |
|---|
| | 293 | my $build = $VARS->{build}; |
|---|
| | 294 | my $seq_id = $VARS->{seq_id}; |
|---|
| | 295 | my $safe_seq_id = $VARS->{safe_seq_id}; |
|---|
| | 296 | my $the_void = $VARS->{the_void}; |
|---|
| | 297 | my $q_seq_ref = $VARS->{q_seq_ref}; |
|---|
| | 298 | my $fasta = $VARS->{fasta}; |
|---|
| | 299 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 300 | |
|---|
| | 301 | |
|---|
| | 302 | my $GFF3 = Dumper::GFF::GFFV3->new("$out_dir/$safe_seq_id.gff", |
|---|
| | 303 | $build, |
|---|
| | 304 | $the_void |
|---|
| | 305 | ); |
|---|
| | 306 | $GFF3->set_current_contig($seq_id, $q_seq_ref); |
|---|
| | 307 | |
|---|
| | 308 | my $fasta_chunker = new FastaChunker(); |
|---|
| | 309 | $fasta_chunker->parent_fasta($fasta); |
|---|
| | 310 | $fasta_chunker->chunk_size($CTL_OPT{max_dna_len}); |
|---|
| | 311 | $fasta_chunker->min_size($CTL_OPT{split_hit}); |
|---|
| | 312 | $fasta_chunker->load_chunks(); |
|---|
| | 313 | |
|---|
| | 314 | #--build an index of the databases |
|---|
| | 315 | my $proteins = $VARS->{CTL_OPT}{_protein}; |
|---|
| | 316 | my $trans = $VARS->{CTL_OPT}{_est}; |
|---|
| | 317 | my $altests = $VARS->{CTL_OPT}{_altest}; |
|---|
| | 318 | my $fasta_t_index = GI::build_fasta_index($trans) if($trans); |
|---|
| | 319 | my $fasta_p_index = GI::build_fasta_index($proteins) if($proteins); |
|---|
| | 320 | my $fasta_a_index = GI::build_fasta_index($altests) if($altests); |
|---|
| | 321 | |
|---|
| | 322 | my $chunk = $fasta_chunker->next_chunk(); |
|---|
| | 323 | #-------------------------CODE |
|---|
| | 324 | |
|---|
| | 325 | #------------------------RESULTS |
|---|
| | 326 | %results = (GFF3 => $GFF3, |
|---|
| | 327 | fasta_chunker => $fasta_chunker, |
|---|
| | 328 | chunk => $chunk, |
|---|
| | 329 | fasta_t_index => $fasta_t_index, |
|---|
| | 330 | fasta_p_index => $fasta_p_index, |
|---|
| | 331 | fasta_a_index => $fasta_a_index |
|---|
| | 332 | ); |
|---|
| | 333 | #------------------------RESULTS |
|---|
| | 334 | } |
|---|
| | 335 | elsif ($flag eq 'flow') { |
|---|
| | 336 | #-------------------------NEXT_LEVEL |
|---|
| | 337 | #-------------------------NEXT_LEVEL |
|---|
| | 338 | } |
|---|
| | 339 | } |
|---|
| | 340 | elsif ($level == 1) { #do repeat masking |
|---|
| | 341 | $level_status = 'doing repeat masking'; |
|---|
| | 342 | if ($flag eq 'load') { |
|---|
| | 343 | #-------------------------CHUNKER |
|---|
| | 344 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 345 | push(@chunks, $chunk); |
|---|
| | 346 | #-------------------------CHUNKER |
|---|
| | 347 | } |
|---|
| | 348 | elsif ($flag eq 'init') { |
|---|
| | 349 | #------------------------ARGS_IN |
|---|
| | 350 | @args = (qw{chunk |
|---|
| | 351 | the_void |
|---|
| | 352 | safe_seq_id |
|---|
| | 353 | q_seq_ref |
|---|
| | 354 | GFF_DB |
|---|
| | 355 | LOG |
|---|
| | 356 | CTL_OPT} |
|---|
| | 357 | ); |
|---|
| | 358 | #------------------------ARGS_IN |
|---|
| | 359 | } |
|---|
| | 360 | elsif ($flag eq 'run') { |
|---|
| | 361 | #-------------------------CODE |
|---|
| | 362 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 363 | my $LOG = $VARS->{LOG}; |
|---|
| | 364 | my $GFF_DB = $VARS->{GFF_DB}; |
|---|
| | 365 | my $chunk = $VARS->{chunk}; |
|---|
| | 366 | my $q_seq_ref = $VARS->{q_seq_ref}; |
|---|
| | 367 | my $the_void = $VARS->{the_void}; |
|---|
| | 368 | my $safe_seq_id = $VARS->{safe_seq_id}; |
|---|
| | 369 | |
|---|
| | 370 | #-- repeatmask with gff3 input |
|---|
| | 371 | my $rm_gff_keepers = []; |
|---|
| | 372 | if ($CTL_OPT{go_gffdb}) { |
|---|
| | 373 | $rm_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, |
|---|
| | 374 | $q_seq_ref, |
|---|
| | 375 | 'repeat' |
|---|
| | 376 | ); |
|---|
| | 377 | #mask the chunk |
|---|
| | 378 | $chunk = repeat_mask_seq::mask_chunk($chunk, $rm_gff_keepers); |
|---|
| | 379 | } |
|---|
| | 380 | |
|---|
| | 381 | #-- repeatmask with RepeatMasker |
|---|
| | 382 | my $rm_rb_keepers = []; #repeat masker RepBase |
|---|
| | 383 | if ($CTL_OPT{model_org}) { #model organism repeats |
|---|
| | 384 | $rm_rb_keepers = GI::repeatmask($chunk, |
|---|
| | 385 | $the_void, |
|---|
| | 386 | $safe_seq_id, |
|---|
| | 387 | $CTL_OPT{model_org}, |
|---|
| | 388 | $CTL_OPT{RepeatMasker}, |
|---|
| | 389 | '', |
|---|
| | 390 | $CTL_OPT{cpus}, |
|---|
| | 391 | $CTL_OPT{force}, |
|---|
| | 392 | $LOG |
|---|
| | 393 | ); |
|---|
| | 394 | |
|---|
| | 395 | #mask the chunk |
|---|
| | 396 | $chunk = repeat_mask_seq::mask_chunk($chunk, $rm_rb_keepers); |
|---|
| | 397 | } |
|---|
| | 398 | my $rm_sp_keepers = []; #repeat masker species |
|---|
| | 399 | if ($CTL_OPT{rmlib}) { #species specific repeats; |
|---|
| | 400 | $rm_sp_keepers = GI::repeatmask($chunk, |
|---|
| | 401 | $the_void, |
|---|
| | 402 | $safe_seq_id, |
|---|
| | 403 | $CTL_OPT{model_org}, |
|---|
| | 404 | $CTL_OPT{RepeatMasker}, |
|---|
| | 405 | $CTL_OPT{rmlib}, |
|---|
| | 406 | $CTL_OPT{cpus}, |
|---|
| | 407 | $CTL_OPT{force}, |
|---|
| | 408 | $LOG |
|---|
| | 409 | ); |
|---|
| | 410 | |
|---|
| | 411 | #mask the chunk |
|---|
| | 412 | $chunk = repeat_mask_seq::mask_chunk($chunk, $rm_sp_keepers); |
|---|
| | 413 | } |
|---|
| | 414 | #-------------------------CODE |
|---|
| | 415 | |
|---|
| | 416 | #------------------------RESULTS |
|---|
| | 417 | %results = (rm_gff_keepers => $rm_gff_keepers, |
|---|
| | 418 | rm_rb_keepers => $rm_rb_keepers, |
|---|
| | 419 | rm_sp_keepers => $rm_sp_keepers, |
|---|
| | 420 | chunk => $chunk |
|---|
| | 421 | ); |
|---|
| | 422 | #------------------------RESULTS |
|---|
| | 423 | } |
|---|
| | 424 | elsif ($flag eq 'flow') { |
|---|
| | 425 | #-------------------------NEXT_LEVEL |
|---|
| | 426 | #-------------------------NEXT_LEVEL |
|---|
| | 427 | } |
|---|
| | 428 | } |
|---|
| | 429 | elsif ($level == 2) { #blastx repeat mask |
|---|
| | 430 | $level_status = 'doing blastx repeats'; |
|---|
| | 431 | if ($flag eq 'load') { |
|---|
| | 432 | #-------------------------CHUNKER |
|---|
| | 433 | foreach my $db (@{$VARS->{CTL_OPT}{r_db}}) { |
|---|
| | 434 | $VARS->{db} = $db; |
|---|
| | 435 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 436 | push(@chunks, $chunk); |
|---|
| | 437 | } |
|---|
| | 438 | #-------------------------CHUNKER |
|---|
| | 439 | } |
|---|
| | 440 | elsif ($flag eq 'init') { |
|---|
| | 441 | #------------------------ARGS_IN |
|---|
| | 442 | @args = (qw{db |
|---|
| | 443 | chunk |
|---|
| | 444 | the_void |
|---|
| | 445 | safe_seq_id |
|---|
| | 446 | LOG |
|---|
| | 447 | CTL_OPT} |
|---|
| | 448 | ); |
|---|
| | 449 | #------------------------ARGS_IN |
|---|
| | 450 | } |
|---|
| | 451 | elsif ($flag eq 'run') { |
|---|
| | 452 | #-------------------------CODE |
|---|
| | 453 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 454 | my $chunk = $VARS->{chunk}; |
|---|
| | 455 | my $db = $VARS->{db}; |
|---|
| | 456 | my $the_void = $VARS->{the_void}; |
|---|
| | 457 | my $safe_seq_id = $VARS->{safe_seq_id}; |
|---|
| | 458 | my $LOG = $VARS->{LOG}; |
|---|
| | 459 | my $LOG_FLAG = ($self->id =~ /^\d+\:\d+\:0$/) ? 1 : 0; |
|---|
| | 460 | |
|---|
| | 461 | my $res_dir; |
|---|
| | 462 | if ($CTL_OPT{_repeat_protein}) { |
|---|
| | 463 | $res_dir = GI::blastx_as_chunks($chunk, |
|---|
| | 464 | $db, |
|---|
| | 465 | $the_void, |
|---|
| | 466 | $safe_seq_id, |
|---|
| | 467 | $CTL_OPT{_blastx}, |
|---|
| | 468 | $CTL_OPT{eval_rm_blastx}, |
|---|
| | 469 | $CTL_OPT{split_hit}, |
|---|
| | 470 | $CTL_OPT{cpus}, |
|---|
| | 471 | $CTL_OPT{_repeat_protein}, |
|---|
| | 472 | $CTL_OPT{_formater}, |
|---|
| | 473 | $self->{RANK}, |
|---|
| | 474 | $CTL_OPT{force}, |
|---|
| | 475 | $LOG, |
|---|
| | 476 | $LOG_FLAG |
|---|
| | 477 | ); |
|---|
| | 478 | } |
|---|
| | 479 | #-------------------------CODE |
|---|
| | 480 | |
|---|
| | 481 | #------------------------RESULTS |
|---|
| | 482 | %results = (res_dir => $res_dir); |
|---|
| | 483 | #------------------------RESULTS |
|---|
| | 484 | } |
|---|
| | 485 | elsif ($flag eq 'flow') { |
|---|
| | 486 | #-------------------------NEXT_LEVEL |
|---|
| | 487 | #-------------------------NEXT_LEVEL |
|---|
| | 488 | } |
|---|
| | 489 | } |
|---|
| | 490 | elsif ($level == 3) { #collect blastx repeatmask |
|---|
| | 491 | $level_status = 'collecting blastx repeatmasking'; |
|---|
| | 492 | if ($flag eq 'load') { |
|---|
| | 493 | #-------------------------CHUNKER |
|---|
| | 494 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 495 | push(@chunks, $chunk); |
|---|
| | 496 | #-------------------------CHUNKER |
|---|
| | 497 | } |
|---|
| | 498 | elsif ($flag eq 'init') { |
|---|
| | 499 | #------------------------ARGS_IN |
|---|
| | 500 | @args = (qw{chunk |
|---|
| | 501 | res_dir |
|---|
| | 502 | LOG |
|---|
| | 503 | CTL_OPT} |
|---|
| | 504 | ); |
|---|
| | 505 | #------------------------ARGS_IN |
|---|
| | 506 | } |
|---|
| | 507 | elsif ($flag eq 'run') { |
|---|
| | 508 | #-------------------------CODE |
|---|
| | 509 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 510 | my $chunk = $VARS->{chunk}; |
|---|
| | 511 | my $res_dir = $VARS->{res_dir}; |
|---|
| | 512 | my $LOG = $VARS->{LOG}; |
|---|
| | 513 | |
|---|
| | 514 | my $rm_blastx_keepers = []; |
|---|
| | 515 | if ($res_dir) { |
|---|
| | 516 | $rm_blastx_keepers = GI::collect_blastx($chunk, |
|---|
| | 517 | $res_dir, |
|---|
| | 518 | $CTL_OPT{eval_rm_blastx}, |
|---|
| | 519 | $CTL_OPT{bit_rm_blastx}, |
|---|
| | 520 | $CTL_OPT{pcov_rm_blastx}, |
|---|
| | 521 | $CTL_OPT{pid_rm_blastx}, |
|---|
| | 522 | $CTL_OPT{split_hit}, |
|---|
| | 523 | $CTL_OPT{force}, |
|---|
| | 524 | $LOG |
|---|
| | 525 | ); |
|---|
| | 526 | |
|---|
| | 527 | #mask the chunk |
|---|
| | 528 | $chunk = repeat_mask_seq::mask_chunk($chunk, $rm_blastx_keepers); |
|---|
| | 529 | } |
|---|
| | 530 | $res_dir = undef; |
|---|
| | 531 | #-------------------------CODE |
|---|
| | 532 | |
|---|
| | 533 | #------------------------RESULTS |
|---|
| | 534 | %results = ( rm_blastx_keepers=> $rm_blastx_keepers, |
|---|
| | 535 | chunk => $chunk, |
|---|
| | 536 | res_dir => $res_dir |
|---|
| | 537 | ); |
|---|
| | 538 | #------------------------RESULTS |
|---|
| | 539 | } |
|---|
| | 540 | elsif ($flag eq 'flow') { |
|---|
| | 541 | #-------------------------NEXT_LEVEL |
|---|
| | 542 | #-------------------------NEXT_LEVEL |
|---|
| | 543 | } |
|---|
| | 544 | } |
|---|
| | 545 | elsif ($level == 4) { #process all repeats |
|---|
| | 546 | $level_status = 'processing all repeats'; |
|---|
| | 547 | if ($flag eq 'load') { |
|---|
| | 548 | #-------------------------CHUNKER |
|---|
| | 549 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 550 | push(@chunks, $chunk); |
|---|
| | 551 | #-------------------------CHUNKER |
|---|
| | 552 | } |
|---|
| | 553 | elsif ($flag eq 'init') { |
|---|
| | 554 | #------------------------ARGS_IN |
|---|
| | 555 | @args = (qw{chunk |
|---|
| | 556 | rm_gff_keepers |
|---|
| | 557 | rm_rb_keepers |
|---|
| | 558 | rm_sp_keepers |
|---|
| | 559 | rm_blastx_keepers |
|---|
| | 560 | q_seq_ref |
|---|
| | 561 | GFF3 |
|---|
| | 562 | masked_total_seq} |
|---|
| | 563 | ); |
|---|
| | 564 | #------------------------ARGS_IN |
|---|
| | 565 | } |
|---|
| | 566 | elsif ($flag eq 'run') { |
|---|
| | 567 | #-------------------------CODE |
|---|
| | 568 | my $chunk = $VARS->{chunk}; |
|---|
| | 569 | my $rm_gff_keepers = $VARS->{rm_gff_keepers}; |
|---|
| | 570 | my $rm_rb_keepers = $VARS->{rm_rb_keepers}; |
|---|
| | 571 | my $rm_sp_keepers = $VARS->{rm_sp_keepers}; |
|---|
| | 572 | my $rm_blastx_keepers = $VARS->{rm_blastx_keepers}; |
|---|
| | 573 | my $q_seq_ref = $VARS->{q_seq_ref}; |
|---|
| | 574 | my $GFF3 = $VARS->{GFF3}; |
|---|
| | 575 | my $masked_total_seq = $VARS->{masked_total_seq}; |
|---|
| | 576 | |
|---|
| | 577 | |
|---|
| | 578 | #-combine and cluster repeat hits for consensus |
|---|
| | 579 | my $rm_keepers = repeat_mask_seq::process($rm_gff_keepers, |
|---|
| | 580 | $rm_rb_keepers, |
|---|
| | 581 | $rm_sp_keepers, |
|---|
| | 582 | $rm_blastx_keepers, |
|---|
| | 583 | $q_seq_ref |
|---|
| | 584 | ); |
|---|
| | 585 | |
|---|
| | 586 | #-add repeats to GFF3 |
|---|
| | 587 | $GFF3->add_repeat_hits($rm_keepers); |
|---|
| | 588 | |
|---|
| | 589 | #-build big masked sequence |
|---|
| | 590 | $masked_total_seq .= $chunk->seq(); |
|---|
| | 591 | #-------------------------CODE |
|---|
| | 592 | |
|---|
| | 593 | #------------------------RESULTS |
|---|
| | 594 | %results = (rm_keepers => $rm_keepers, |
|---|
| | 595 | masked_total_seq => $masked_total_seq, |
|---|
| | 596 | rm_gff_keepers => [], #clear memory |
|---|
| | 597 | rm_rb_keepers => [], #clear memory |
|---|
| | 598 | rm_sp_keepers => [], #clear memory |
|---|
| | 599 | rm_blastx_keepers => [] #clear memory |
|---|
| | 600 | ); |
|---|
| | 601 | #------------------------RESULTS |
|---|
| | 602 | } |
|---|
| | 603 | elsif ($flag eq 'flow') { |
|---|
| | 604 | #-------------------------NEXT_LEVEL |
|---|
| | 605 | if ($VARS->{chunk} = $VARS->{fasta_chunker}->next_chunk) { |
|---|
| | 606 | $next_level = 1; |
|---|
| | 607 | } |
|---|
| | 608 | #-------------------------NEXT_LEVEL |
|---|
| | 609 | } |
|---|
| | 610 | } |
|---|
| | 611 | elsif ($level == 5) { #prep masked sequence and abinits |
|---|
| | 612 | $level_status = 'preparing masked sequence and ab-inits'; |
|---|
| | 613 | if ($flag eq 'load') { |
|---|
| | 614 | #-------------------------CHUNKER |
|---|
| | 615 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 616 | push(@chunks, $chunk); |
|---|
| | 617 | #-------------------------CHUNKER |
|---|
| | 618 | } |
|---|
| | 619 | elsif ($flag eq 'init') { |
|---|
| | 620 | #------------------------ARGS_IN |
|---|
| | 621 | @args = (qw{the_void |
|---|
| | 622 | safe_seq_id |
|---|
| | 623 | q_def |
|---|
| | 624 | masked_total_seq |
|---|
| | 625 | LOG |
|---|
| | 626 | CTL_OPT} |
|---|
| | 627 | ); |
|---|
| | 628 | #------------------------ARGS_IN |
|---|
| | 629 | } |
|---|
| | 630 | elsif ($flag eq 'run') { |
|---|
| | 631 | #-------------------------CODE |
|---|
| | 632 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 633 | my $q_def = $VARS->{q_def}; |
|---|
| | 634 | my $masked_total_seq = $VARS->{masked_total_seq}; |
|---|
| | 635 | my $the_void = $VARS->{the_void}; |
|---|
| | 636 | my $safe_seq_id = $VARS->{safe_seq_id}; |
|---|
| | 637 | my $LOG = $VARS->{LOG}; |
|---|
| | 638 | |
|---|
| | 639 | |
|---|
| | 640 | my $masked_fasta = Fasta::toFasta($q_def.' masked', \$masked_total_seq); |
|---|
| | 641 | my $masked_file = $the_void."/query.masked.fasta"; |
|---|
| | 642 | FastaFile::writeFile(\$masked_fasta ,$masked_file); |
|---|
| | 643 | |
|---|
| | 644 | #==ab initio predictions here |
|---|
| | 645 | my $preds = GI::abinits($masked_file, |
|---|
| | 646 | $the_void, |
|---|
| | 647 | $safe_seq_id, |
|---|
| | 648 | \%CTL_OPT, |
|---|
| | 649 | $LOG |
|---|
| | 650 | ); |
|---|
| | 651 | |
|---|
| | 652 | #==QRNA noncoding RNA prediction here |
|---|
| | 653 | my $qra_preds = []; |
|---|
| | 654 | #-------------------------CODE |
|---|
| | 655 | |
|---|
| | 656 | #------------------------RESULTS |
|---|
| | 657 | %results = (masked_fasta => $masked_fasta, |
|---|
| | 658 | preds => $preds, |
|---|
| | 659 | qra_preds => $qra_preds |
|---|
| | 660 | ); |
|---|
| | 661 | #------------------------RESULTS |
|---|
| | 662 | } |
|---|
| | 663 | elsif ($flag eq 'flow') { |
|---|
| | 664 | #-------------------------NEXT_LEVEL |
|---|
| | 665 | #-------------------------NEXT_LEVEL |
|---|
| | 666 | } |
|---|
| | 667 | } |
|---|
| | 668 | elsif ($level == 6) { #prep new fasta chunks |
|---|
| | 669 | $level_status = 'preparing new fasa chunks'; |
|---|
| | 670 | if ($flag eq 'load') { |
|---|
| | 671 | #-------------------------CHUNKER |
|---|
| | 672 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 673 | push(@chunks, $chunk); |
|---|
| | 674 | #-------------------------CHUNKER |
|---|
| | 675 | } |
|---|
| | 676 | elsif ($flag eq 'init') { |
|---|
| | 677 | #------------------------ARGS_IN |
|---|
| | 678 | @args = (qw{masked_fasta |
|---|
| | 679 | CTL_OPT} |
|---|
| | 680 | ); |
|---|
| | 681 | #------------------------ARGS_IN |
|---|
| | 682 | } |
|---|
| | 683 | elsif ($flag eq 'run') { |
|---|
| | 684 | #-------------------------CODE |
|---|
| | 685 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 686 | my $masked_fasta = $VARS->{masked_fasta}; |
|---|
| | 687 | |
|---|
| | 688 | |
|---|
| | 689 | #--reset fastachunker for masked chunks |
|---|
| | 690 | my $fasta_chunker = new FastaChunker(); |
|---|
| | 691 | $fasta_chunker = new FastaChunker(); |
|---|
| | 692 | $fasta_chunker->parent_fasta($masked_fasta); |
|---|
| | 693 | $fasta_chunker->chunk_size($CTL_OPT{max_dna_len}); |
|---|
| | 694 | $fasta_chunker->min_size($CTL_OPT{split_hit}); |
|---|
| | 695 | $fasta_chunker->load_chunks(); |
|---|
| | 696 | |
|---|
| | 697 | my $chunk = $fasta_chunker->next_chunk(); |
|---|
| | 698 | #-------------------------CODE |
|---|
| | 699 | |
|---|
| | 700 | #------------------------RESULTS |
|---|
| | 701 | %results = (fasta_chunker => $fasta_chunker, |
|---|
| | 702 | masked_fasta => $masked_fasta, |
|---|
| | 703 | chunk => $chunk |
|---|
| | 704 | ); |
|---|
| | 705 | #------------------------RESULTS |
|---|
| | 706 | } |
|---|
| | 707 | elsif ($flag eq 'flow') { |
|---|
| | 708 | #-------------------------NEXT_LEVEL |
|---|
| | 709 | #-------------------------NEXT_LEVEL |
|---|
| | 710 | } |
|---|
| | 711 | } |
|---|
| | 712 | elsif ($level == 7) { #blastn |
|---|
| | 713 | $level_status = 'doing blastn of ESTs'; |
|---|
| | 714 | if ($flag eq 'load') { |
|---|
| | 715 | #-------------------------CHUNKER |
|---|
| | 716 | foreach my $db (@{$VARS->{CTL_OPT}{e_db}}) { |
|---|
| | 717 | $VARS->{db} = $db; |
|---|
| | 718 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 719 | push(@chunks, $chunk); |
|---|
| | 720 | } |
|---|
| | 721 | #-------------------------CHUNKER |
|---|
| | 722 | } |
|---|
| | 723 | elsif ($flag eq 'init') { |
|---|
| | 724 | #------------------------ARGS_IN |
|---|
| | 725 | @args = (qw{db |
|---|
| | 726 | chunk |
|---|
| | 727 | the_void |
|---|
| | 728 | safe_seq_id |
|---|
| | 729 | LOG |
|---|
| | 730 | CTL_OPT} |
|---|
| | 731 | ); |
|---|
| | 732 | #------------------------ARGS_IN |
|---|
| | 733 | } |
|---|
| | 734 | elsif ($flag eq 'run') { |
|---|
| | 735 | #-------------------------CODE |
|---|
| | 736 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 737 | my $chunk = $VARS->{chunk}; |
|---|
| | 738 | my $db = $VARS->{db}; |
|---|
| | 739 | my $the_void = $VARS->{the_void}; |
|---|
| | 740 | my $safe_seq_id = $VARS->{safe_seq_id}; |
|---|
| | 741 | my $LOG = $VARS->{LOG}; |
|---|
| | 742 | my $LOG_FLAG = ($self->id =~ /^\d+\:\d+\:0$/) ? 1 : 0; |
|---|
| | 743 | |
|---|
| | 744 | #==BLAST ANALYSIS HERE |
|---|
| | 745 | #-blastn search the file against ESTs |
|---|
| | 746 | my $res_dir; |
|---|
| | 747 | if ($CTL_OPT{_est}) { |
|---|
| | 748 | $res_dir = GI::blastn_as_chunks($chunk, |
|---|
| | 749 | $db, |
|---|
| | 750 | $the_void, |
|---|
| | 751 | $safe_seq_id, |
|---|
| | 752 | $CTL_OPT{_blastn}, |
|---|
| | 753 | $CTL_OPT{eval_blastn}, |
|---|
| | 754 | $CTL_OPT{split_hit}, |
|---|
| | 755 | $CTL_OPT{cpus}, |
|---|
| | 756 | $CTL_OPT{_est}, |
|---|
| | 757 | $CTL_OPT{_formater}, |
|---|
| | 758 | $self->{RANK}, |
|---|
| | 759 | $CTL_OPT{force}, |
|---|
| | 760 | $LOG, |
|---|
| | 761 | $LOG_FLAG |
|---|
| | 762 | ); |
|---|
| | 763 | |
|---|
| | 764 | } |
|---|
| | 765 | #-------------------------CODE |
|---|
| | 766 | |
|---|
| | 767 | #------------------------RESULTS |
|---|
| | 768 | %results = (res_dir => $res_dir, |
|---|
| | 769 | chunk => $chunk |
|---|
| | 770 | ); |
|---|
| | 771 | #------------------------RESULTS |
|---|
| | 772 | } |
|---|
| | 773 | elsif ($flag eq 'flow') { |
|---|
| | 774 | #-------------------------NEXT_LEVEL |
|---|
| | 775 | #-------------------------NEXT_LEVEL |
|---|
| | 776 | } |
|---|
| | 777 | } |
|---|
| | 778 | elsif ($level == 8) { #collect blastn |
|---|
| | 779 | $level_status = 'collecting blastn reports'; |
|---|
| | 780 | if ($flag eq 'load') { |
|---|
| | 781 | #-------------------------CHUNKER |
|---|
| | 782 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 783 | push(@chunks, $chunk); |
|---|
| | 784 | #-------------------------CHUNKER |
|---|
| | 785 | } |
|---|
| | 786 | elsif ($flag eq 'init') { |
|---|
| | 787 | #------------------------ARGS_IN |
|---|
| | 788 | @args = (qw{chunk |
|---|
| | 789 | res_dir |
|---|
| | 790 | LOG |
|---|
| | 791 | CTL_OPT} |
|---|
| | 792 | ); |
|---|
| | 793 | #------------------------ARGS_IN |
|---|
| | 794 | } |
|---|
| | 795 | elsif ($flag eq 'run') { |
|---|
| | 796 | #-------------------------CODE |
|---|
| | 797 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 798 | my $res_dir = $VARS->{res_dir}; |
|---|
| | 799 | my $LOG = $VARS->{LOG}; |
|---|
| | 800 | my $chunk = $VARS->{chunk}; |
|---|
| | 801 | |
|---|
| | 802 | |
|---|
| | 803 | my $blastn_keepers = []; |
|---|
| | 804 | if ($res_dir) { |
|---|
| | 805 | $blastn_keepers = GI::collect_blastn($chunk, |
|---|
| | 806 | $res_dir, |
|---|
| | 807 | $CTL_OPT{eval_blastn}, |
|---|
| | 808 | $CTL_OPT{bit_blastn}, |
|---|
| | 809 | $CTL_OPT{pcov_blastn}, |
|---|
| | 810 | $CTL_OPT{pid_blastn}, |
|---|
| | 811 | $CTL_OPT{split_hit}, |
|---|
| | 812 | $CTL_OPT{force}, |
|---|
| | 813 | $LOG |
|---|
| | 814 | ); |
|---|
| | 815 | } |
|---|
| | 816 | $res_dir = undef; |
|---|
| | 817 | #-------------------------CODE |
|---|
| | 818 | |
|---|
| | 819 | #------------------------RESULTS |
|---|
| | 820 | %results = (blastn_keepers => $blastn_keepers, |
|---|
| | 821 | res_dir => $res_dir |
|---|
| | 822 | ); |
|---|
| | 823 | #------------------------RESULTS |
|---|
| | 824 | } |
|---|
| | 825 | elsif ($flag eq 'flow') { |
|---|
| | 826 | #-------------------------NEXT_LEVEL |
|---|
| | 827 | #-------------------------NEXT_LEVEL |
|---|
| | 828 | } |
|---|
| | 829 | } |
|---|
| | 830 | elsif ($level == 9) { #blastx |
|---|
| | 831 | $level_status = 'doing blastx of proteins'; |
|---|
| | 832 | if ($flag eq 'load') { |
|---|
| | 833 | #-------------------------CHUNKER |
|---|
| | 834 | foreach my $db (@{$VARS->{CTL_OPT}{p_db}}) { |
|---|
| | 835 | $VARS->{db} = $db; |
|---|
| | 836 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 837 | push(@chunks, $chunk); |
|---|
| | 838 | } |
|---|
| | 839 | #-------------------------CHUNKER |
|---|
| | 840 | } |
|---|
| | 841 | elsif ($flag eq 'init') { |
|---|
| | 842 | #------------------------ARGS_IN |
|---|
| | 843 | @args = (qw{db |
|---|
| | 844 | chunk |
|---|
| | 845 | the_void |
|---|
| | 846 | safe_seq_id |
|---|
| | 847 | LOG |
|---|
| | 848 | CTL_OPT} |
|---|
| | 849 | ); |
|---|
| | 850 | #------------------------ARGS_IN |
|---|
| | 851 | } |
|---|
| | 852 | elsif ($flag eq 'run') { |
|---|
| | 853 | #-------------------------CODE |
|---|
| | 854 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 855 | my $chunk = $VARS->{chunk}; |
|---|
| | 856 | my $db = $VARS->{db}; |
|---|
| | 857 | my $the_void = $VARS->{the_void}; |
|---|
| | 858 | my $safe_seq_id = $VARS->{safe_seq_id}; |
|---|
| | 859 | my $LOG = $VARS->{LOG}; |
|---|
| | 860 | my $LOG_FLAG = ($self->id =~ /^\d+\:\d+\:0$/) ? 1 : 0; |
|---|
| | 861 | |
|---|
| | 862 | |
|---|
| | 863 | #-blastx search the file against ESTs |
|---|
| | 864 | my $res_dir; |
|---|
| | 865 | if ($CTL_OPT{_protein}) { |
|---|
| | 866 | $res_dir = GI::blastx_as_chunks($chunk, |
|---|
| | 867 | $db, |
|---|
| | 868 | $the_void, |
|---|
| | 869 | $safe_seq_id, |
|---|
| | 870 | $CTL_OPT{_blastx}, |
|---|
| | 871 | $CTL_OPT{eval_blastx}, |
|---|
| | 872 | $CTL_OPT{split_hit}, |
|---|
| | 873 | $CTL_OPT{cpus}, |
|---|
| | 874 | $CTL_OPT{_protein}, |
|---|
| | 875 | $CTL_OPT{_formater}, |
|---|
| | 876 | $self->{RANK}, |
|---|
| | 877 | $CTL_OPT{force}, |
|---|
| | 878 | $LOG, |
|---|
| | 879 | $LOG_FLAG |
|---|
| | 880 | ); |
|---|
| | 881 | |
|---|
| | 882 | } |
|---|
| | 883 | #-------------------------CODE |
|---|
| | 884 | |
|---|
| | 885 | #------------------------RESULTS |
|---|
| | 886 | %results = (res_dir => $res_dir); |
|---|
| | 887 | #------------------------RESULTS |
|---|
| | 888 | } |
|---|
| | 889 | elsif ($flag eq 'flow') { |
|---|
| | 890 | #-------------------------NEXT_LEVEL |
|---|
| | 891 | #-------------------------NEXT_LEVEL |
|---|
| | 892 | } |
|---|
| | 893 | } |
|---|
| | 894 | elsif ($level == 10) { #collect blastx |
|---|
| | 895 | $level_status = 'collecting blastx reports'; |
|---|
| | 896 | if ($flag eq 'load') { |
|---|
| | 897 | #-------------------------CHUNKER |
|---|
| | 898 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 899 | push(@chunks, $chunk); |
|---|
| | 900 | #-------------------------CHUNKER |
|---|
| | 901 | } |
|---|
| | 902 | elsif ($flag eq 'init') { |
|---|
| | 903 | #------------------------ARGS_IN |
|---|
| | 904 | @args = (qw{chunk |
|---|
| | 905 | res_dir |
|---|
| | 906 | LOG |
|---|
| | 907 | CTL_OPT} |
|---|
| | 908 | ); |
|---|
| | 909 | #------------------------ARGS_IN |
|---|
| | 910 | } |
|---|
| | 911 | elsif ($flag eq 'run') { |
|---|
| | 912 | #-------------------------CODE |
|---|
| | 913 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 914 | my $res_dir = $VARS->{res_dir}; |
|---|
| | 915 | my $LOG = $VARS->{LOG}; |
|---|
| | 916 | my $chunk = $VARS->{chunk}; |
|---|
| | 917 | |
|---|
| | 918 | |
|---|
| | 919 | my $blastx_keepers = []; |
|---|
| | 920 | if ($res_dir) { |
|---|
| | 921 | $blastx_keepers = GI::collect_blastx($chunk, |
|---|
| | 922 | $res_dir, |
|---|
| | 923 | $CTL_OPT{eval_blastx}, |
|---|
| | 924 | $CTL_OPT{bit_blastx}, |
|---|
| | 925 | $CTL_OPT{pcov_blastx}, |
|---|
| | 926 | $CTL_OPT{pid_blastx}, |
|---|
| | 927 | $CTL_OPT{split_hit}, |
|---|
| | 928 | $CTL_OPT{force}, |
|---|
| | 929 | $LOG |
|---|
| | 930 | ); |
|---|
| | 931 | } |
|---|
| | 932 | $res_dir = undef; |
|---|
| | 933 | #-------------------------CODE |
|---|
| | 934 | |
|---|
| | 935 | #------------------------RESULTS |
|---|
| | 936 | %results = (blastx_keepers => $blastx_keepers, |
|---|
| | 937 | res_dir => $res_dir |
|---|
| | 938 | ); |
|---|
| | 939 | #------------------------RESULTS |
|---|
| | 940 | } |
|---|
| | 941 | elsif ($flag eq 'flow') { |
|---|
| | 942 | #-------------------------NEXT_LEVEL |
|---|
| | 943 | #-------------------------NEXT_LEVEL |
|---|
| | 944 | } |
|---|
| | 945 | } |
|---|
| | 946 | elsif ($level == 11) { #tblastx |
|---|
| | 947 | $level_status = 'doing tblastx of altESTs'; |
|---|
| | 948 | if ($flag eq 'load') { |
|---|
| | 949 | #-------------------------CHUNKER |
|---|
| | 950 | foreach my $db (@{$VARS->{CTL_OPT}{a_db}}) { |
|---|
| | 951 | $VARS->{db} = $db; |
|---|
| | 952 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 953 | push(@chunks, $chunk); |
|---|
| | 954 | } |
|---|
| | 955 | #-------------------------CHUNKER |
|---|
| | 956 | } |
|---|
| | 957 | elsif ($flag eq 'init') { |
|---|
| | 958 | #------------------------ARGS_IN |
|---|
| | 959 | @args = (qw{db |
|---|
| | 960 | chunk |
|---|
| | 961 | the_void |
|---|
| | 962 | safe_seq_id |
|---|
| | 963 | LOG |
|---|
| | 964 | CTL_OPT} |
|---|
| | 965 | ); |
|---|
| | 966 | #------------------------ARGS_IN |
|---|
| | 967 | } |
|---|
| | 968 | elsif ($flag eq 'run') { |
|---|
| | 969 | #-------------------------CODE |
|---|
| | 970 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 971 | my $chunk = $VARS->{chunk}; |
|---|
| | 972 | my $db = $VARS->{db}; |
|---|
| | 973 | my $the_void = $VARS->{the_void}; |
|---|
| | 974 | my $safe_seq_id = $VARS->{safe_seq_id}; |
|---|
| | 975 | my $LOG = $VARS->{LOG}; |
|---|
| | 976 | my $LOG_FLAG = ($self->id =~ /^\d+\:\d+\:0$/) ? 1 : 0; |
|---|
| | 977 | |
|---|
| | 978 | |
|---|
| | 979 | my $res_dir; |
|---|
| | 980 | if ($CTL_OPT{_altest}) { |
|---|
| | 981 | $res_dir = GI::tblastx_as_chunks($chunk, |
|---|
| | 982 | $db, |
|---|
| | 983 | $the_void, |
|---|
| | 984 | $safe_seq_id, |
|---|
| | 985 | $CTL_OPT{_tblastx}, |
|---|
| | 986 | $CTL_OPT{eval_tblastx}, |
|---|
| | 987 | $CTL_OPT{split_hit}, |
|---|
| | 988 | $CTL_OPT{cpus}, |
|---|
| | 989 | $CTL_OPT{_altest}, |
|---|
| | 990 | $CTL_OPT{_formater}, |
|---|
| | 991 | $self->{RANK}, |
|---|
| | 992 | $CTL_OPT{force}, |
|---|
| | 993 | $LOG, |
|---|
| | 994 | $LOG_FLAG |
|---|
| | 995 | ); |
|---|
| | 996 | } |
|---|
| | 997 | #-------------------------CODE |
|---|
| | 998 | |
|---|
| | 999 | #------------------------RESULTS |
|---|
| | 1000 | %results = (res_dir => $res_dir); |
|---|
| | 1001 | #------------------------RESULTS |
|---|
| | 1002 | } |
|---|
| | 1003 | elsif ($flag eq 'flow') { |
|---|
| | 1004 | #-------------------------NEXT_LEVEL |
|---|
| | 1005 | #-------------------------NEXT_LEVEL |
|---|
| | 1006 | } |
|---|
| | 1007 | } |
|---|
| | 1008 | elsif ($level == 12) { #collect tblastx |
|---|
| | 1009 | $level_status = 'collecting tblastx reports'; |
|---|
| | 1010 | if ($flag eq 'load') { |
|---|
| | 1011 | #-------------------------CHUNKER |
|---|
| | 1012 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 1013 | push(@chunks, $chunk); |
|---|
| | 1014 | #-------------------------CHUNKER |
|---|
| | 1015 | } |
|---|
| | 1016 | elsif ($flag eq 'init') { |
|---|
| | 1017 | #------------------------ARGS_IN |
|---|
| | 1018 | @args = (qw{chunk |
|---|
| | 1019 | res_dir |
|---|
| | 1020 | LOG |
|---|
| | 1021 | CTL_OPT} |
|---|
| | 1022 | ); |
|---|
| | 1023 | #------------------------ARGS_IN |
|---|
| | 1024 | } |
|---|
| | 1025 | elsif ($flag eq 'run') { |
|---|
| | 1026 | #-------------------------CODE |
|---|
| | 1027 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 1028 | my $res_dir = $VARS->{res_dir}; |
|---|
| | 1029 | my $LOG = $VARS->{LOG}; |
|---|
| | 1030 | my $chunk = $VARS->{chunk}; |
|---|
| | 1031 | |
|---|
| | 1032 | |
|---|
| | 1033 | my $tblastx_keepers = []; |
|---|
| | 1034 | if ($res_dir) { |
|---|
| | 1035 | $tblastx_keepers = GI::collect_tblastx($chunk, |
|---|
| | 1036 | $res_dir, |
|---|
| | 1037 | $CTL_OPT{eval_tblastx}, |
|---|
| | 1038 | $CTL_OPT{bit_tblastx}, |
|---|
| | 1039 | $CTL_OPT{pcov_tblastx}, |
|---|
| | 1040 | $CTL_OPT{pid_tblastx}, |
|---|
| | 1041 | $CTL_OPT{split_hit}, |
|---|
| | 1042 | $CTL_OPT{force}, |
|---|
| | 1043 | $LOG |
|---|
| | 1044 | ); |
|---|
| | 1045 | } |
|---|
| | 1046 | $res_dir = undef; |
|---|
| | 1047 | #-------------------------CODE |
|---|
| | 1048 | |
|---|
| | 1049 | #------------------------RESULTS |
|---|
| | 1050 | %results = (tblastx_keepers => $tblastx_keepers, |
|---|
| | 1051 | res_dir => $res_dir |
|---|
| | 1052 | ); |
|---|
| | 1053 | #------------------------RESULTS |
|---|
| | 1054 | } |
|---|
| | 1055 | elsif ($flag eq 'flow') { |
|---|
| | 1056 | #-------------------------NEXT_LEVEL |
|---|
| | 1057 | #-------------------------NEXT_LEVEL |
|---|
| | 1058 | } |
|---|
| | 1059 | } |
|---|
| | 1060 | elsif ($level == 13) { #process chunk divide |
|---|
| | 1061 | $level_status = 'processing the chunk divide'; |
|---|
| | 1062 | if ($flag eq 'load') { |
|---|
| | 1063 | #-------------------------CHUNKER |
|---|
| | 1064 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 1065 | push(@chunks, $chunk); |
|---|
| | 1066 | #-------------------------CHUNKER |
|---|
| | 1067 | } |
|---|
| | 1068 | elsif ($flag eq 'init') { |
|---|
| | 1069 | #------------------------ARGS_IN |
|---|
| | 1070 | @args = (qw{chunk |
|---|
| | 1071 | the_void |
|---|
| | 1072 | q_seq_ref |
|---|
| | 1073 | masked_fasta |
|---|
| | 1074 | preds |
|---|
| | 1075 | blastn_keepers |
|---|
| | 1076 | blastx_keepers |
|---|
| | 1077 | tblastx_keepers |
|---|
| | 1078 | holdover_blastn |
|---|
| | 1079 | holdover_blastx |
|---|
| | 1080 | holdover_tblastx |
|---|
| | 1081 | holdover_pred |
|---|
| | 1082 | holdover_est_gff |
|---|
| | 1083 | holdover_altest_gff |
|---|
| | 1084 | holdover_prot_gff |
|---|
| | 1085 | holdover_pred_gff |
|---|
| | 1086 | holdover_model_gff |
|---|
| | 1087 | fasta_p_index |
|---|
| | 1088 | fasta_t_index |
|---|
| | 1089 | fasta_a_index |
|---|
| | 1090 | GFF_DB |
|---|
| | 1091 | LOG |
|---|
| | 1092 | CTL_OPT} |
|---|
| | 1093 | ); |
|---|
| | 1094 | #------------------------ARGS_IN |
|---|
| | 1095 | } |
|---|
| | 1096 | elsif ($flag eq 'run') { |
|---|
| | 1097 | #-------------------------CODE |
|---|
| | 1098 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 1099 | my $chunk = $VARS->{chunk}; |
|---|
| | 1100 | my $the_void = $VARS->{the_void}; |
|---|
| | 1101 | my $q_seq_ref = $VARS->{q_seq_ref}; |
|---|
| | 1102 | my $masked_fasta = $VARS->{masked_fasta}; |
|---|
| | 1103 | my $preds = $VARS->{preds}; |
|---|
| | 1104 | my $blastn_keepers = $VARS->{blastn_keepers}; |
|---|
| | 1105 | my $blastx_keepers = $VARS->{blastx_keepers}; |
|---|
| | 1106 | my $tblastx_keepers = $VARS->{tblastx_keepers}; |
|---|
| | 1107 | my $holdover_blastn = $VARS->{holdover_blastn}; |
|---|
| | 1108 | my $holdover_blastx = $VARS->{holdover_blastx}; |
|---|
| | 1109 | my $holdover_tblastx = $VARS->{holdover_tblastx}; |
|---|
| | 1110 | my $holdover_pred = $VARS->{holdover_pred}; |
|---|
| | 1111 | my $holdover_est_gff = $VARS->{holdover_est_gff}; |
|---|
| | 1112 | my $holdover_altest_gff = $VARS->{holdover_altest_gff}; |
|---|
| | 1113 | my $holdover_prot_gff = $VARS->{holdover_prot_gff}; |
|---|
| | 1114 | my $holdover_pred_gff = $VARS->{holdover_pred_gff}; |
|---|
| | 1115 | my $holdover_model_gff = $VARS->{holdover_model_gff}; |
|---|
| | 1116 | my $fasta_p_index = $VARS->{fasta_p_index}; |
|---|
| | 1117 | my $fasta_t_index = $VARS->{fasta_t_index}; |
|---|
| | 1118 | my $fasta_a_index = $VARS->{fasta_a_index}; |
|---|
| | 1119 | my $GFF_DB = $VARS->{GFF_DB}; |
|---|
| | 1120 | my $LOG = $VARS->{LOG}; |
|---|
| 857 | | $self->{RESULT} = \@results; |
|---|
| | 1122 | |
|---|
| | 1123 | #-get only those predictions on the chunk |
|---|
| | 1124 | my $preds_on_chunk = GI::get_preds_on_chunk($preds, |
|---|
| | 1125 | $chunk |
|---|
| | 1126 | ); |
|---|
| | 1127 | |
|---|
| | 1128 | #==GFF3 passthrough of evidence |
|---|
| | 1129 | my $prot_gff_keepers = []; |
|---|
| | 1130 | my $est_gff_keepers = []; |
|---|
| | 1131 | my $altest_gff_keepers = []; |
|---|
| | 1132 | my $model_gff_keepers = []; |
|---|
| | 1133 | my $pred_gff_keepers = []; |
|---|
| | 1134 | if ($CTL_OPT{go_gffdb}) { |
|---|
| | 1135 | #-protein evidence passthraough |
|---|
| | 1136 | $prot_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, |
|---|
| | 1137 | $q_seq_ref, |
|---|
| | 1138 | 'protein' |
|---|
| | 1139 | ); |
|---|
| | 1140 | #-est evidence passthrough |
|---|
| | 1141 | $est_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, |
|---|
| | 1142 | $q_seq_ref, |
|---|
| | 1143 | 'est' |
|---|
| | 1144 | ); |
|---|
| | 1145 | #-altest evidence passthrough |
|---|
| | 1146 | $altest_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, |
|---|
| | 1147 | $q_seq_ref, |
|---|
| | 1148 | 'altest' |
|---|
| | 1149 | ); |
|---|
| | 1150 | #-gff gene annotation passthrough here |
|---|
| | 1151 | $model_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, |
|---|
| | 1152 | $q_seq_ref, |
|---|
| | 1153 | 'model' |
|---|
| | 1154 | ); |
|---|
| | 1155 | #-pred passthrough |
|---|
| | 1156 | $pred_gff_keepers = $GFF_DB->phathits_on_chunk($chunk, |
|---|
| | 1157 | $q_seq_ref, |
|---|
| | 1158 | 'repeat' |
|---|
| | 1159 | ); |
|---|
| | 1160 | } |
|---|
| | 1161 | |
|---|
| | 1162 | #==merge heldover Phathits from last round |
|---|
| | 1163 | if ($chunk->number != 0) { #if not first chunk |
|---|
| | 1164 | #reviews heldover blast hits, |
|---|
| | 1165 | #then merges and reblasts them if they cross the divide |
|---|
| | 1166 | ($blastn_keepers, |
|---|
| | 1167 | $blastx_keepers, |
|---|
| | 1168 | $tblastx_keepers) = GI::merge_resolve_hits(\$masked_fasta, |
|---|
| | 1169 | $fasta_t_index, |
|---|
| | 1170 | $fasta_p_index, |
|---|
| | 1171 | $fasta_a_index, |
|---|
| | 1172 | $blastn_keepers, |
|---|
| | 1173 | $blastx_keepers, |
|---|
| | 1174 | $tblastx_keepers, |
|---|
| | 1175 | $holdover_blastn, |
|---|
| | 1176 | $holdover_blastx, |
|---|
| | 1177 | $holdover_tblastx, |
|---|
| | 1178 | $the_void, |
|---|
| | 1179 | \%CTL_OPT, |
|---|
| | 1180 | $LOG |
|---|
| | 1181 | ); |
|---|
| | 1182 | #combine remaining holdover types |
|---|
| | 1183 | push(@{$preds_on_chunk}, @{$holdover_pred}); |
|---|
| | 1184 | push(@{$pred_gff_keepers}, @{$holdover_pred_gff}); |
|---|
| | 1185 | push(@{$est_gff_keepers}, @{$holdover_est_gff}); |
|---|
| | 1186 | push(@{$altest_gff_keepers}, @{$holdover_altest_gff}); |
|---|
| | 1187 | push(@{$prot_gff_keepers}, @{$holdover_prot_gff}); |
|---|
| | 1188 | push(@{$model_gff_keepers}, @{$holdover_model_gff}); |
|---|
| | 1189 | |
|---|
| | 1190 | #clear holdovers |
|---|
| | 1191 | @{$holdover_pred} = (); |
|---|
| | 1192 | @{$holdover_est_gff} = (); |
|---|
| | 1193 | @{$holdover_altest_gff} = (); |
|---|
| | 1194 | @{$holdover_prot_gff} = (); |
|---|
| | 1195 | @{$holdover_pred_gff} = (); |
|---|
| | 1196 | @{$holdover_model_gff} = (); |
|---|
| | 1197 | } |
|---|
| | 1198 | |
|---|
| | 1199 | #==PROCESS HITS CLOSE TO CODE DIVISIONS |
|---|
| | 1200 | #holdover hits that are too close to the divide for review with next chunk |
|---|
| | 1201 | if (not $chunk->is_last) { #if not last chunk |
|---|
| | 1202 | ($holdover_blastn, |
|---|
| | 1203 | $holdover_blastx, |
|---|
| | 1204 | $holdover_tblastx, |
|---|
| | 1205 | $holdover_pred, |
|---|
| | 1206 | $holdover_est_gff, |
|---|
| | 1207 | $holdover_altest_gff, |
|---|
| | 1208 | $holdover_prot_gff, |
|---|
| | 1209 | $holdover_pred_gff, |
|---|
| | 1210 | $holdover_model_gff, |
|---|
| | 1211 | $blastn_keepers, |
|---|
| | 1212 | $blastx_keepers, |
|---|
| | 1213 | $tblastx_keepers, |
|---|
| | 1214 | $preds_on_chunk, |
|---|
| | 1215 | $est_gff_keepers, |
|---|
| | 1216 | $altest_gff_keepers, |
|---|
| | 1217 | $prot_gff_keepers, |
|---|
| | 1218 | $pred_gff_keepers, |
|---|
| | 1219 | $model_gff_keepers |
|---|
| | 1220 | ) = GI::process_the_chunk_divide($chunk, |
|---|
| | 1221 | $CTL_OPT{'split_hit'}, |
|---|
| | 1222 | $blastn_keepers, |
|---|
| | 1223 | $blastx_keepers, |
|---|
| | 1224 | $tblastx_keepers, |
|---|
| | 1225 | $preds_on_chunk, |
|---|
| | 1226 | $est_gff_keepers, |
|---|
| | 1227 | $altest_gff_keepers, |
|---|
| | 1228 | $prot_gff_keepers, |
|---|
| | 1229 | $pred_gff_keepers, |
|---|
| | 1230 | $model_gff_keepers |
|---|
| | 1231 | ); |
|---|
| | 1232 | } |
|---|
| | 1233 | #-------------------------CODE |
|---|
| | 1234 | |
|---|
| | 1235 | #------------------------RESULTS |
|---|
| | 1236 | %results = (prot_gff_keepers => $prot_gff_keepers, |
|---|
| | 1237 | est_gff_keepers => $est_gff_keepers, |
|---|
| | 1238 | altest_gff_keepers => $altest_gff_keepers, |
|---|
| | 1239 | model_gff_keepers => $model_gff_keepers, |
|---|
| | 1240 | pred_gff_keepers => $pred_gff_keepers, |
|---|
| | 1241 | preds_on_chunk => $preds_on_chunk, |
|---|
| | 1242 | blastn_keepers => $blastn_keepers, |
|---|
| | 1243 | blastx_keepers => $blastx_keepers, |
|---|
| | 1244 | tblastx_keepers => $tblastx_keepers, |
|---|
| | 1245 | holdover_est_gff => $holdover_est_gff, |
|---|
| | 1246 | holdover_altest_gff => $holdover_altest_gff, |
|---|
| | 1247 | holdover_prot_gff => $holdover_prot_gff, |
|---|
| | 1248 | holdover_pred_gff => $holdover_pred_gff, |
|---|
| | 1249 | holdover_model_gff => $holdover_model_gff, |
|---|
| | 1250 | holdover_pred => $holdover_pred, |
|---|
| | 1251 | holdover_blastn => $holdover_blastn, |
|---|
| | 1252 | holdover_blastx => $holdover_blastx, |
|---|
| | 1253 | holdover_tblastx => $holdover_tblastx |
|---|
| | 1254 | ); |
|---|
| | 1255 | #------------------------RESULTS |
|---|
| | 1256 | } |
|---|
| | 1257 | elsif ($flag eq 'flow') { |
|---|
| | 1258 | #-------------------------NEXT_LEVEL |
|---|
| | 1259 | #-------------------------NEXT_LEVEL |
|---|
| | 1260 | } |
|---|
| | 1261 | } |
|---|
| | 1262 | elsif ($level == 14) { #exonerate proteins |
|---|
| | 1263 | $level_status = 'doing exonerate of proteins'; |
|---|
| | 1264 | if ($flag eq 'load') { |
|---|
| | 1265 | #-------------------------CHUNKER |
|---|
| | 1266 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 1267 | push(@chunks, $chunk); |
|---|
| | 1268 | #-------------------------CHUNKER |
|---|
| | 1269 | } |
|---|
| | 1270 | elsif ($flag eq 'init') { |
|---|
| | 1271 | #------------------------ARGS_IN |
|---|
| | 1272 | @args = (qw{blastx_keepers |
|---|
| | 1273 | the_void |
|---|
| | 1274 | q_seq_ref |
|---|
| | 1275 | fasta |
|---|
| | 1276 | fasta_p_index |
|---|
| | 1277 | LOG |
|---|
| | 1278 | CTL_OPT} |
|---|
| | 1279 | ); |
|---|
| | 1280 | #------------------------ARGS_IN |
|---|
| | 1281 | } |
|---|
| | 1282 | elsif ($flag eq 'run') { |
|---|
| | 1283 | #-------------------------CODE |
|---|
| | 1284 | my %CTL_OPT = %{$VARS->{CTL_OPT}};; |
|---|
| | 1285 | my $blastx_keepers = $VARS->{blastx_keepers}; |
|---|
| | 1286 | my $the_void = $VARS->{the_void}; |
|---|
| | 1287 | my $q_seq_ref = $VARS->{q_seq_ref}; |
|---|
| | 1288 | my $fasta = $VARS->{fasta}; |
|---|
| | 1289 | my $fasta_p_index = $VARS->{fasta_p_index}; |
|---|
| | 1290 | my $LOG = $VARS->{LOG}; |
|---|
| | 1291 | |
|---|
| | 1292 | #variables that are persistent outside of try block |
|---|
| | 1293 | my $blastx_data; |
|---|
| | 1294 | my $exonerate_p_data; |
|---|
| | 1295 | |
|---|
| | 1296 | #-cluster the blastx hits |
|---|
| | 1297 | print STDERR "cleaning blastx...\n" unless $main::quiet; |
|---|
| | 1298 | |
|---|
| | 1299 | my $blastx_clusters = cluster::clean_and_cluster($blastx_keepers, |
|---|
| | 1300 | $q_seq_ref, |
|---|
| | 1301 | 10 |
|---|
| | 1302 | ); |
|---|
| | 1303 | undef $blastx_keepers; #free up memory |
|---|
| | 1304 | |
|---|
| | 1305 | #-make a multi-fasta of the seqs in the blastx_clusters |
|---|
| | 1306 | #-polish the blastx hits with exonerate |
|---|
| | 1307 | |
|---|
| | 1308 | my $exoner_p_clust = GI::polish_exonerate($fasta, |
|---|
| | 1309 | $blastx_clusters, |
|---|
| | 1310 | $fasta_p_index, |
|---|
| | 1311 | $the_void, |
|---|
| | 1312 | 5, |
|---|
| | 1313 | 'p', |
|---|
| | 1314 | $CTL_OPT{exonerate}, |
|---|
| | 1315 | $CTL_OPT{pcov_blastx}, |
|---|
| | 1316 | $CTL_OPT{pid_blastx}, |
|---|
| | 1317 | $CTL_OPT{ep_score_limit}, |
|---|
| | 1318 | $CTL_OPT{ep_matrix}, |
|---|
| | 1319 | $CTL_OPT{force}, |
|---|
| | 1320 | $LOG |
|---|
| | 1321 | ); |
|---|
| | 1322 | |
|---|
| | 1323 | #flatten clusters |
|---|
| | 1324 | $blastx_data = GI::flatten($blastx_clusters); |
|---|
| | 1325 | $exonerate_p_data = GI::flatten($exoner_p_clust, 'exonerate:p'); |
|---|
| | 1326 | #-------------------------CODE |
|---|
| | 1327 | |
|---|
| | 1328 | #------------------------RESULTS |
|---|
| | 1329 | %results = (blastx_data => $blastx_data, |
|---|
| | 1330 | exonerate_p_data => $exonerate_p_data |
|---|
| | 1331 | ); |
|---|
| | 1332 | #------------------------RESULTS |
|---|
| | 1333 | } |
|---|
| | 1334 | elsif ($flag eq 'flow') { |
|---|
| | 1335 | #-------------------------NEXT_LEVEL |
|---|
| | 1336 | #-------------------------NEXT_LEVEL |
|---|
| | 1337 | } |
|---|
| | 1338 | } |
|---|
| | 1339 | elsif ($level == 15) { #exonerate ESTs |
|---|
| | 1340 | $level_status = 'doing exonerate of ESTs'; |
|---|
| | 1341 | if ($flag eq 'load') { |
|---|
| | 1342 | #-------------------------CHUNKER |
|---|
| | 1343 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 1344 | push(@chunks, $chunk); |
|---|
| | 1345 | #-------------------------CHUNKER |
|---|
| | 1346 | } |
|---|
| | 1347 | elsif ($flag eq 'init') { |
|---|
| | 1348 | #------------------------ARGS_IN |
|---|
| | 1349 | @args = (qw{tblastx_keepers |
|---|
| | 1350 | blastn_keepers |
|---|
| | 1351 | the_void |
|---|
| | 1352 | q_seq_ref |
|---|
| | 1353 | fasta |
|---|
| | 1354 | fasta_t_index |
|---|
| | 1355 | LOG |
|---|
| | 1356 | CTL_OPT} |
|---|
| | 1357 | ); |
|---|
| | 1358 | #------------------------ARGS_IN |
|---|
| | 1359 | } |
|---|
| | 1360 | elsif ($flag eq 'run') { |
|---|
| | 1361 | #-------------------------CODE |
|---|
| | 1362 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 1363 | my $tblastx_keepers = $VARS->{tblastx_keepers}; |
|---|
| | 1364 | my $blastn_keepers = $VARS->{blastn_keepers}; |
|---|
| | 1365 | my $the_void = $VARS->{the_void}; |
|---|
| | 1366 | my $q_seq_ref = $VARS->{q_seq_ref}; |
|---|
| | 1367 | my $fasta = $VARS->{fasta}; |
|---|
| | 1368 | my $fasta_t_index = $VARS->{fasta_t_index}; |
|---|
| | 1369 | my $LOG = $VARS->{LOG}; |
|---|
| | 1370 | |
|---|
| | 1371 | |
|---|
| | 1372 | #-cluster the tblastx hits |
|---|
| | 1373 | print STDERR "cleaning tblastx...\n" unless $main::quiet; |
|---|
| | 1374 | my $tblastx_clusters = cluster::clean_and_cluster($tblastx_keepers, |
|---|
| | 1375 | $q_seq_ref, |
|---|
| | 1376 | 10 |
|---|
| | 1377 | ); |
|---|
| | 1378 | undef $tblastx_keepers; #free up memory |
|---|
| | 1379 | |
|---|
| | 1380 | #flatten the clusters |
|---|
| | 1381 | my $tblastx_data = GI::flatten($tblastx_clusters); |
|---|
| | 1382 | |
|---|
| | 1383 | |
|---|
| | 1384 | #-cluster the blastn hits |
|---|
| | 1385 | print STDERR "cleaning blastn...\n" unless $main::quiet; |
|---|
| | 1386 | my $blastn_clusters = cluster::clean_and_cluster($blastn_keepers, |
|---|
| | 1387 | $q_seq_ref, |
|---|
| | 1388 | 10 |
|---|
| | 1389 | ); |
|---|
| | 1390 | undef $blastn_keepers; #free up memory |
|---|
| | 1391 | |
|---|
| | 1392 | #-polish blastn hits with exonerate |
|---|
| | 1393 | my $exoner_e_clust = GI::polish_exonerate($fasta, |
|---|
| | 1394 | $blastn_clusters, |
|---|
| | 1395 | $fasta_t_index, |
|---|
| | 1396 | $the_void, |
|---|
| | 1397 | 5, |
|---|
| | 1398 | 'e', |
|---|
| | 1399 | $CTL_OPT{exonerate}, |
|---|
| | 1400 | $CTL_OPT{pcov_blastn}, |
|---|
| | 1401 | $CTL_OPT{pid_blastn}, |
|---|
| | 1402 | $CTL_OPT{en_score_limit}, |
|---|
| | 1403 | $CTL_OPT{en_matrix}, |
|---|
| | 1404 | $CTL_OPT{force}, |
|---|
| | 1405 | $LOG |
|---|
| | 1406 | ); |
|---|
| | 1407 | |
|---|
| | 1408 | #flatten clusters |
|---|
| | 1409 | my $blastn_data = GI::flatten($blastn_clusters); |
|---|
| | 1410 | my $exonerate_e_data = GI::flatten($exoner_e_clust, |
|---|
| | 1411 | 'exonerate:e' |
|---|
| | 1412 | ); |
|---|
| | 1413 | #-------------------------CODE |
|---|
| | 1414 | |
|---|
| | 1415 | #------------------------RESULTS |
|---|
| | 1416 | %results = (blastn_data => $blastn_data, |
|---|
| | 1417 | exonerate_e_data => $exonerate_e_data, |
|---|
| | 1418 | tblastx_data => $tblastx_data |
|---|
| | 1419 | ); |
|---|
| | 1420 | #------------------------RESULTS |
|---|
| | 1421 | } |
|---|
| | 1422 | elsif ($flag eq 'flow') { |
|---|
| | 1423 | #-------------------------NEXT_LEVEL |
|---|
| | 1424 | #-------------------------NEXT_LEVEL |
|---|
| | 1425 | } |
|---|
| | 1426 | } |
|---|
| | 1427 | elsif ($level == 16) { #annotations |
|---|
| | 1428 | $level_status = 'calculating annotations'; |
|---|
| | 1429 | if ($flag eq 'load') { |
|---|
| | 1430 | #-------------------------CHUNKER |
|---|
| | 1431 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 1432 | push(@chunks, $chunk); |
|---|
| | 1433 | #-------------------------CHUNKER |
|---|
| | 1434 | } |
|---|
| | 1435 | elsif ($flag eq 'init') { |
|---|
| | 1436 | #------------------------ARGS_IN |
|---|
| | 1437 | @args = (qw{chunk |
|---|
| | 1438 | the_void |
|---|
| | 1439 | out_dir |
|---|
| | 1440 | build |
|---|
| | 1441 | fasta |
|---|
| | 1442 | masked_fasta |
|---|
| | 1443 | tblastx_data |
|---|
| | 1444 | blastx_data |
|---|
| | 1445 | exonerate_e_data |
|---|
| | 1446 | exonerate_p_data |
|---|
| | 1447 | preds_on_chunk |
|---|
| | 1448 | est_gff_keepers |
|---|
| | 1449 | altest_gff_keepers |
|---|
| | 1450 | prot_gff_keepers |
|---|
| | 1451 | pred_gff_keepers |
|---|
| | 1452 | model_gff_keepers |
|---|
| | 1453 | LOG |
|---|
| | 1454 | CTL_OPT} |
|---|
| | 1455 | ); |
|---|
| | 1456 | #------------------------ARGS_IN |
|---|
| | 1457 | } |
|---|
| | 1458 | elsif ($flag eq 'run') { |
|---|
| | 1459 | #-------------------------CODE |
|---|
| | 1460 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 1461 | my $chunk = $VARS->{chunk}; |
|---|
| | 1462 | my $the_void = $VARS->{the_void}; |
|---|
| | 1463 | my $out_dir = $VARS->{out_dir}; |
|---|
| | 1464 | my $build = $VARS->{build}; |
|---|
| | 1465 | my $fasta = $VARS->{fasta}; |
|---|
| | 1466 | my $masked_fasta = $VARS->{masked_fasta}; |
|---|
| | 1467 | my $tblastx_data = $VARS->{tblastx_data}; |
|---|
| | 1468 | my $blastx_data = $VARS->{blastx_data}; |
|---|
| | 1469 | my $exonerate_e_data = $VARS->{exonerate_e_data}; |
|---|
| | 1470 | my $exonerate_p_data = $VARS->{exonerate_p_data}; |
|---|
| | 1471 | my $preds_on_chunk = $VARS->{preds_on_chunk}; |
|---|
| | 1472 | my $est_gff_keepers = $VARS->{est_gff_keepers}; |
|---|
| | 1473 | my $altest_gff_keepers = $VARS->{altest_gff_keepers}; |
|---|
| | 1474 | my $prot_gff_keepers = $VARS->{prot_gff_keepers}; |
|---|
| | 1475 | my $pred_gff_keepers = $VARS->{pred_gff_keepers}; |
|---|
| | 1476 | my $model_gff_keepers = $VARS->{model_gff_keepers}; |
|---|
| | 1477 | my $LOG = $VARS->{LOG}; |
|---|
| | 1478 | |
|---|
| | 1479 | #combine final data sets |
|---|
| | 1480 | my $final_est = GI::combine($exonerate_e_data, |
|---|
| | 1481 | $est_gff_keepers |
|---|
| | 1482 | ); |
|---|
| | 1483 | my $final_altest = GI::combine($tblastx_data, |
|---|
| | 1484 | $altest_gff_keepers |
|---|
| | 1485 | ); |
|---|
| | 1486 | my $final_prot = GI::combine($blastx_data, |
|---|
| | 1487 | $exonerate_p_data, |
|---|
| | 1488 | $prot_gff_keepers |
|---|
| | 1489 | ); |
|---|
| | 1490 | my $final_pred = GI::combine($preds_on_chunk, |
|---|
| | 1491 | $pred_gff_keepers |
|---|
| | 1492 | ); |
|---|
| | 1493 | |
|---|
| | 1494 | #####working here########### |
|---|
| | 1495 | #==MAKER annotations built here |
|---|
| | 1496 | #-auto-annotate the input file |
|---|
| | 1497 | my $annotations = maker::auto_annotator::annotate($fasta, |
|---|
| | 1498 | $masked_fasta, |
|---|
| | 1499 | $chunk->number(), |
|---|
| | 1500 | $final_prot, |
|---|
| | 1501 | $final_est, |
|---|
| | 1502 | $final_altest, |
|---|
| | 1503 | $final_pred, |
|---|
| | 1504 | $model_gff_keepers, |
|---|
| | 1505 | $the_void, |
|---|
| | 1506 | $build, |
|---|
| | 1507 | \%CTL_OPT, |
|---|
| | 1508 | $LOG |
|---|
| | 1509 | ); |
|---|
| | 1510 | |
|---|
| | 1511 | my $maker_anno = maker::auto_annotator::best_annotations($annotations, |
|---|
| | 1512 | $out_dir, |
|---|
| | 1513 | \%CTL_OPT |
|---|
| | 1514 | ); |
|---|
| | 1515 | |
|---|
| | 1516 | #-------------------------CODE |
|---|
| | 1517 | |
|---|
| | 1518 | #------------------------RESULTS |
|---|
| | 1519 | %results = (maker_anno => $maker_anno); |
|---|
| | 1520 | #------------------------RESULTS |
|---|
| | 1521 | } |
|---|
| | 1522 | elsif ($flag eq 'flow') { |
|---|
| | 1523 | #-------------------------NEXT_LEVEL |
|---|
| | 1524 | #-------------------------NEXT_LEVEL |
|---|
| | 1525 | } |
|---|
| | 1526 | } |
|---|
| | 1527 | elsif ($level == 17) { #local output |
|---|
| | 1528 | $level_status = 'processing chunk output'; |
|---|
| | 1529 | if ($flag eq 'load') { |
|---|
| | 1530 | #-------------------------CHUNKER |
|---|
| | 1531 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 1532 | push(@chunks, $chunk); |
|---|
| | 1533 | #-------------------------CHUNKER |
|---|
| | 1534 | } |
|---|
| | 1535 | elsif ($flag eq 'init') { |
|---|
| | 1536 | #------------------------ARGS_IN |
|---|
| | 1537 | @args = (qw{chunk |
|---|
| | 1538 | maker_anno |
|---|
| | 1539 | blastx_data |
|---|
| | 1540 | blastn_data |
|---|
| | 1541 | tblastx_data |
|---|
| | 1542 | exonerate_p_data |
|---|
| | 1543 | exonerate_e_data |
|---|
| | 1544 | est_gff_keepers |
|---|
| | 1545 | altest_gff_keepers |
|---|
| | 1546 | prot_gff_keepers |
|---|
| | 1547 | pred_gff_keepers |
|---|
| | 1548 | preds_on_chunk |
|---|
| | 1549 | p_fastas |
|---|
| | 1550 | t_fastas |
|---|
| | 1551 | GFF3} |
|---|
| | 1552 | ); |
|---|
| | 1553 | #------------------------ARGS_IN |
|---|
| | 1554 | } |
|---|
| | 1555 | elsif ($flag eq 'run') { |
|---|
| | 1556 | #-------------------------CODE |
|---|
| | 1557 | my $chunk = $VARS->{chunk}; |
|---|
| | 1558 | my $maker_anno = $VARS->{maker_anno}; |
|---|
| | 1559 | my $blastx_data = $VARS->{blastx_data}; |
|---|
| | 1560 | my $blastn_data = $VARS->{blastn_data}; |
|---|
| | 1561 | my $tblastx_data = $VARS->{tblastx_data}; |
|---|
| | 1562 | my $exonerate_p_data = $VARS->{exonerate_p_data}; |
|---|
| | 1563 | my $exonerate_e_data = $VARS->{exonerate_e_data}; |
|---|
| | 1564 | my $est_gff_keepers = $VARS->{est_gff_keepers}; |
|---|
| | 1565 | my $altest_gff_keepers = $VARS->{altest_gff_keepers}; |
|---|
| | 1566 | my $prot_gff_keepers = $VARS->{prot_gff_keepers}; |
|---|
| | 1567 | my $pred_gff_keepers = $VARS->{pred_gff_keepers}; |
|---|
| | 1568 | my $preds_on_chunk = $VARS->{preds_on_chunk}; |
|---|
| | 1569 | my $p_fastas = $VARS->{p_fastas}; |
|---|
| | 1570 | my $t_fastas = $VARS->{t_fastas}; |
|---|
| | 1571 | my $GFF3 = $VARS->{GFF3}; |
|---|
| | 1572 | |
|---|
| | 1573 | |
|---|
| | 1574 | #==OUTPUT DATA HERE |
|---|
| | 1575 | #--- GFF3 |
|---|
| | 1576 | $GFF3->add_genes($maker_anno); |
|---|
| | 1577 | $GFF3->add_phathits($blastx_data); |
|---|
| | 1578 | $GFF3->add_phathits($blastn_data); |
|---|
| | 1579 | $GFF3->add_phathits($tblastx_data); |
|---|
| | 1580 | $GFF3->add_phathits($exonerate_p_data); |
|---|
| | 1581 | $GFF3->add_phathits($exonerate_e_data); |
|---|
| | 1582 | $GFF3->add_phathits($est_gff_keepers); |
|---|
| | 1583 | $GFF3->add_phathits($altest_gff_keepers); |
|---|
| | 1584 | $GFF3->add_phathits($prot_gff_keepers); |
|---|
| | 1585 | $GFF3->add_phathits($preds_on_chunk); |
|---|
| | 1586 | $GFF3->add_phathits($pred_gff_keepers); |
|---|
| | 1587 | $GFF3->resolved_flag if (not $chunk->is_last); #adds ### between contigs |
|---|
| | 1588 | |
|---|
| | 1589 | #--- building fastas for annotations (grows with itteration) |
|---|
| | 1590 | my ($p_fasta, $t_fasta) = GI::maker_p_and_t_fastas($maker_anno); |
|---|
| | 1591 | $p_fastas .= $p_fasta; |
|---|
| | 1592 | $t_fastas .= $t_fasta; |
|---|
| | 1593 | #-------------------------CODE |
|---|
| | 1594 | |
|---|
| | 1595 | #------------------------RESULTS |
|---|
| | 1596 | %results = (p_fastas => $p_fastas, |
|---|
| | 1597 | t_fastas => $t_fastas |
|---|
| | 1598 | ); |
|---|
| | 1599 | #------------------------RESULTS |
|---|
| | 1600 | } |
|---|
| | 1601 | elsif ($flag eq 'flow') { |
|---|
| | 1602 | #-------------------------NEXT_LEVEL |
|---|
| | 1603 | if ($VARS->{chunk} = $VARS->{fasta_chunker}->next_chunk) { |
|---|
| | 1604 | $next_level = 7; |
|---|
| | 1605 | } |
|---|
| | 1606 | #-------------------------NEXT_LEVEL |
|---|
| | 1607 | } |
|---|
| | 1608 | } |
|---|
| | 1609 | elsif ($level == 18) { #global output |
|---|
| | 1610 | $level_status = 'processing contig output'; |
|---|
| | 1611 | if ($flag eq 'load') { |
|---|
| | 1612 | #-------------------------CHUNKER |
|---|
| | 1613 | my $chunk = new Process::MpiChunk($level, $VARS); |
|---|
| | 1614 | push(@chunks, $chunk); |
|---|
| | 1615 | #-------------------------CHUNKER |
|---|
| | 1616 | } |
|---|
| | 1617 | elsif ($flag eq 'init') { |
|---|
| | 1618 | #------------------------ARGS_IN |
|---|
| | 1619 | @args = (qw{the_void |
|---|
| | 1620 | out_dir |
|---|
| | 1621 | seq_id |
|---|
| | 1622 | safe_seq_id |
|---|
| | 1623 | q_seq_ref |
|---|
| | 1624 | preds |
|---|
| | 1625 | p_fastas |
|---|
| | 1626 | t_fastas |
|---|
| | 1627 | GFF3 |
|---|
| | 1628 | DS_CTL |
|---|
| | 1629 | CTL_OPT} |
|---|
| | 1630 | ); |
|---|
| | 1631 | #------------------------ARGS_IN |
|---|
| | 1632 | } |
|---|
| | 1633 | elsif ($flag eq 'run') { |
|---|
| | 1634 | #-------------------------CODE |
|---|
| | 1635 | my %CTL_OPT = %{$VARS->{CTL_OPT}}; |
|---|
| | 1636 | my $the_void = $VARS->{the_void}; |
|---|
| | 1637 | my $out_dir = $VARS->{out_dir}; |
|---|
| | 1638 | my $seq_id = $VARS->{seq_id}; |
|---|
| | 1639 | my $safe_seq_id = $VARS->{safe_seq_id}; |
|---|
| | 1640 | my $q_seq_ref = $VARS->{q_seq_ref}; |
|---|
| | 1641 | my $preds = $VARS->{preds}; |
|---|
| | 1642 | my $p_fastas = $VARS->{p_fastas}; |
|---|
| | 1643 | my $t_fastas = $VARS->{t_fastas}; |
|---|
| | 1644 | my $GFF3 = $VARS->{GFF3}; |
|---|
| | 1645 | my $DS_CTL = $VARS->{DS_CTL}; |
|---|
| | 1646 | |
|---|
| | 1647 | |
|---|
| | 1648 | #--- write fastas for ab-initio predictions |
|---|
| | 1649 | my ($p_snap_fastas, |
|---|
| | 1650 | $t_snap_fastas) = GI::abinit_p_and_t_fastas($preds, |
|---|
| | 1651 | $safe_seq_id, |
|---|
| | 1652 | $q_seq_ref, |
|---|
| | 1653 | $out_dir |
|---|
| | 1654 | ); |
|---|
| | 1655 | |
|---|
| | 1656 | #--Write fasta files now that all chunks are finished |
|---|
| | 1657 | FastaFile::writeFile(\$p_fastas, |
|---|
| | 1658 | "$out_dir/$safe_seq_id.maker.proteins.fasta" |
|---|
| | 1659 | ); |
|---|
| | 1660 | FastaFile::writeFile(\$t_fastas, |
|---|
| | 1661 | "$out_dir/$safe_seq_id.maker.transcripts.fasta" |
|---|
| | 1662 | ); |
|---|
| | 1663 | |
|---|
| | 1664 | #--- write GFF3 file |
|---|
| | 1665 | $GFF3->finalize(); |
|---|
| | 1666 | |
|---|
| | 1667 | #--cleanup maker files created with each fasta sequence |
|---|
| | 1668 | File::Path::rmtree ($the_void) if $CTL_OPT{clean_up}; #rm temp directory |
|---|
| | 1669 | |
|---|
| | 1670 | #-- write to DS log the finished files |
|---|
| | 1671 | $DS_CTL->add_entry($seq_id, $out_dir, 'FINISHED'); |
|---|
| | 1672 | |
|---|
| | 1673 | #--- clear the log variable |
|---|
| | 1674 | $VARS->{LOG} = undef; |
|---|
| | 1675 | #-------------------------CODE |
|---|
| | 1676 | |
|---|
| | 1677 | #------------------------RESULTS |
|---|
| | 1678 | %results = (); |
|---|
| | 1679 | #------------------------RESULTS |
|---|
| | 1680 | } |
|---|
| | 1681 | elsif ($flag eq 'flow') { |
|---|
| | 1682 | #-------------------------NEXT_LEVEL |
|---|
| | 1683 | $next_level = undef; |
|---|
| | 1684 | #-------------------------NEXT_LEVEL |
|---|
| | 1685 | } |
|---|
| | 1686 | } |
|---|
| | 1687 | else { |
|---|
| | 1688 | warn "WARNING: Invalid level for method _go() in Process::MpiChunk\n"; |
|---|
| | 1689 | return undef; |
|---|
| | 1690 | } |
|---|
| | 1691 | } |
|---|
| | 1692 | catch Error::Simple with{ |
|---|
| | 1693 | my $E = shift; |
|---|
| | 1694 | |
|---|
| | 1695 | my $tag = ($flag eq 'run') ? 'handle' : 'throw'; |
|---|
| | 1696 | |
|---|
| | 1697 | $self->_handler($E, $level_status, $tag); |
|---|
| | 1698 | }; |
|---|
| | 1699 | |
|---|
| | 1700 | #return args list for initializing |
|---|
| | 1701 | return \@args if($flag eq 'init'); |
|---|
| | 1702 | #return results after running |
|---|
| | 1703 | return \%results if($flag eq 'run'); |
|---|
| | 1704 | #return chunks for loader |
|---|
| | 1705 | return \@chunks if($flag eq 'load'); |
|---|
| | 1706 | #return next_level for flow |
|---|
| | 1707 | return $next_level if($flag eq 'flow'); |
|---|
| | 1708 | |
|---|
| | 1709 | #should never reach this line |
|---|
| | 1710 | die "FATAL: \'$flag\' is not a valid flag in MpiChunk _go!!\n"; |
|---|