| 1 |
#! /usr/bin/perl -w |
|---|
| 2 |
|
|---|
| 3 |
use strict "vars"; |
|---|
| 4 |
use strict "refs"; |
|---|
| 5 |
|
|---|
| 6 |
use FindBin; |
|---|
| 7 |
use lib "$FindBin::Bin/../lib"; |
|---|
| 8 |
|
|---|
| 9 |
BEGIN{ |
|---|
| 10 |
#what to do on ^C |
|---|
| 11 |
$SIG{'INT'} = sub { |
|---|
| 12 |
print STDERR "\n\naborted by user!!\n\n"; |
|---|
| 13 |
exit (1); |
|---|
| 14 |
}; |
|---|
| 15 |
} |
|---|
| 16 |
|
|---|
| 17 |
use Dumper::GFF::GFFV3; |
|---|
| 18 |
use Iterator::Any; |
|---|
| 19 |
use Iterator::Fasta; |
|---|
| 20 |
use Iterator::GFF3; |
|---|
| 21 |
use Fasta; |
|---|
| 22 |
use GFFDB; |
|---|
| 23 |
use Cwd; |
|---|
| 24 |
use maker::auto_annotator; |
|---|
| 25 |
use GI; |
|---|
| 26 |
|
|---|
| 27 |
$| = 1; |
|---|
| 28 |
|
|---|
| 29 |
my $usage = " |
|---|
| 30 |
Usage: |
|---|
| 31 |
|
|---|
| 32 |
gff3_preds2models <gff3 file> <pred list> |
|---|
| 33 |
|
|---|
| 34 |
"; |
|---|
| 35 |
|
|---|
| 36 |
if(@ARGV != 2){ |
|---|
| 37 |
print $usage; |
|---|
| 38 |
exit(1); |
|---|
| 39 |
} |
|---|
| 40 |
|
|---|
| 41 |
my $gff3_in = shift @ARGV; |
|---|
| 42 |
my $list = shift @ARGV; |
|---|
| 43 |
|
|---|
| 44 |
my %index; |
|---|
| 45 |
open(IN, "< $list"); |
|---|
| 46 |
while(defined (my $line = <IN>)){ |
|---|
| 47 |
chomp $line; |
|---|
| 48 |
$line =~ s/^\s+|\s+$//g; |
|---|
| 49 |
$index{$line}++; |
|---|
| 50 |
} |
|---|
| 51 |
close(IN); |
|---|
| 52 |
|
|---|
| 53 |
#--set up gff database |
|---|
| 54 |
my ($gff3_base) = $gff3_in =~ /(.*)\.gff3?$/; |
|---|
| 55 |
my $db_file = "$gff3_base.db"; |
|---|
| 56 |
my $GFF_DB = new GFFDB($db_file); |
|---|
| 57 |
$GFF_DB->add_maker($gff3_in, {pred_pass => 1}); |
|---|
| 58 |
$GFF_DB->do_indexing; |
|---|
| 59 |
my $build = $GFF_DB->last_build; |
|---|
| 60 |
|
|---|
| 61 |
#---load genome multifasta/GFF3 file |
|---|
| 62 |
my $iterator = new Iterator::Any(-gff => $gff3_in); |
|---|
| 63 |
|
|---|
| 64 |
#build output gff3 |
|---|
| 65 |
my $gff3_out = "$gff3_base.pred_keepers.gff"; |
|---|
| 66 |
|
|---|
| 67 |
my $GFF3 = Dumper::GFF::GFFV3->new("$gff3_out", |
|---|
| 68 |
$build, |
|---|
| 69 |
Cwd::cwd |
|---|
| 70 |
); |
|---|
| 71 |
|
|---|
| 72 |
my %p_fastas; |
|---|
| 73 |
my %t_fastas; |
|---|
| 74 |
|
|---|
| 75 |
#---iterate over each sequence in the fasta |
|---|
| 76 |
while (my $fasta = $iterator->nextFasta){ |
|---|
| 77 |
#get fasta parts |
|---|
| 78 |
my $seq_id = Fasta::getSeqID(\$fasta); |
|---|
| 79 |
my $seq_ref = Fasta::getSeqRef(\$fasta); |
|---|
| 80 |
|
|---|
| 81 |
$GFF3->set_current_contig($seq_id, $seq_ref); |
|---|
| 82 |
my $pred_gff = $GFF_DB->phathits_on_contig($seq_id, |
|---|
| 83 |
$seq_ref, |
|---|
| 84 |
'pred' |
|---|
| 85 |
); |
|---|
| 86 |
#==MAKER annotations built here |
|---|
| 87 |
my $annotations = maker::auto_annotator::annotate($fasta, |
|---|
| 88 |
$fasta, |
|---|
| 89 |
'0', |
|---|
| 90 |
[], |
|---|
| 91 |
[], |
|---|
| 92 |
[], |
|---|
| 93 |
$pred_gff, |
|---|
| 94 |
[], |
|---|
| 95 |
Cwd::cwd, |
|---|
| 96 |
$build, |
|---|
| 97 |
{ _predictor => ['gff'], |
|---|
| 98 |
single_exon => '1' |
|---|
| 99 |
}, |
|---|
| 100 |
); |
|---|
| 101 |
|
|---|
| 102 |
my @keepers; |
|---|
| 103 |
|
|---|
| 104 |
foreach my $ann (@{$annotations->{abinit}}){ |
|---|
| 105 |
my ($name) = $ann->{t_structs}->[0]->{t_name} =~ /^([^\s\t\n]+)/; |
|---|
| 106 |
push(@keepers, $ann) if(exists $index{$name}); |
|---|
| 107 |
} |
|---|
| 108 |
|
|---|
| 109 |
#--- GFF3 |
|---|
| 110 |
$GFF3->add_genes(\@keepers); |
|---|
| 111 |
#$GFF3->resolved_flag; #adds ### between contigs |
|---|
| 112 |
|
|---|
| 113 |
#--- building fastas for annotations (grows with iteration) |
|---|
| 114 |
GI::maker_p_and_t_fastas(\@keepers, |
|---|
| 115 |
[], |
|---|
| 116 |
[], |
|---|
| 117 |
\%p_fastas, |
|---|
| 118 |
\%t_fastas, |
|---|
| 119 |
); |
|---|
| 120 |
} |
|---|
| 121 |
|
|---|
| 122 |
#--- write GFF3 file |
|---|
| 123 |
$GFF3->finalize(); |
|---|
| 124 |
#--- write fastas for ab-initio predictions |
|---|
| 125 |
|
|---|
| 126 |
#--Write annotation fasta files now that all chunks are finished |
|---|
| 127 |
GI::write_p_and_t_fastas(\%p_fastas, \%t_fastas, "$gff3_base.pred_keepers", Cwd::cwd); |
|---|