Changeset 235
- Timestamp:
- 07/14/09 23:23:44 (4 months ago)
- Files:
-
- bin/maker2zff.pl (modified) (4 diffs)
- lib/GI.pm (modified) (1 diff)
- lib/maker/auto_annotator.pm (modified) (11 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
bin/maker2zff.pl
r234 r235 46 46 my %exons = (); 47 47 my %hc = (); 48 my %bad = (); 48 49 my %seq = (); 49 50 … … 62 63 63 64 #### Go through the GFF file and determine which genes are HC #### 65 #### This step must finish before any output is produced since #### 66 #### featues can be out of order #### 67 68 my %index; #used to replace long maker names that mess up snap 69 my $count = 0; 64 70 65 71 foreach my $file (@files) { … … 96 102 my $ishc = is_hc($qi, $AED); 97 103 if ($ishc == 1 ) { 98 $hc{$id} = 1;104 push(@{$hc{$seqid}}, $id); 99 105 } 100 106 }elsif ($tag eq "CDS" && $source eq "maker") { 101 107 my $parent = $annotation{'Parent'}; 102 if ($strand eq "-") { 103 my $temp = $start; 104 $start = $end; 105 $end = $temp; 108 109 #set alias 110 if(! $index{$parent}){ 111 $index{$parent} = "MODEL$count"; 112 $count++; 106 113 } 107 push @{$exons{$seqid}}, [$start, $end, $parent] 114 115 if($strand eq '-'){ 116 ($start, $end) = ($end, $start); 117 } 118 119 push @{$exons{$seqid}{$parent}}, [$start, $end]; 108 120 } 109 121 } 110 122 } 111 123 } 124 125 #### Now filter out anything thats not high quality #### 112 126 113 127 #### Print out the exon locations of the HC genes #### … … 116 130 open DNA, ">$outfile\.dna" or die $!; 117 131 118 foreach my $seqid (sort {$a cmp $b} keys %exons) { 119 my @exons = @{$exons{$seqid}}; 132 foreach my $seqid (sort {$a cmp $b} keys %hc) { 120 133 print ZFF ">",$seqid,"\n"; 121 134 print DNA ">",$seqid,"\n",$seq{$seqid},"\n"; 122 foreach my $e (@exons) { 123 my ($start, $end, $pname) = @{$e}; 124 if ($hc{$pname}) { 125 print ZFF join(" ", "Exon ",$start, $end, $pname),"\n"; 135 136 foreach my $mRNA (@{$hc{$seqid}}){ 137 my @exons = @{$exons{$seqid}{$mRNA}}; 138 my $pname = $index{$mRNA}; 139 140 next if (! @exons); 141 142 my $f = shift @exons; 143 if(! @exons){ 144 print ZFF join(" ", "Esngl ", $f->[0], $f->[1], $pname),"\n"; 145 next; 146 } 147 elsif($f->[0] < $f->[1]){ 148 print ZFF join(" ", "Einit ", $f->[0], $f->[1], $pname),"\n"; 149 } 150 else{ 151 print ZFF join(" ", "Eterm ", $f->[0], $f->[1], $pname),"\n"; 152 } 153 154 my $l = pop @exons; 155 if($l->[0] < $l->[1]){ 156 print ZFF join(" ", "Eterm ", $l->[0], $l->[1], $pname),"\n"; 157 } 158 else{ 159 print ZFF join(" ", "Einit ", $l->[0], $l->[1], $pname),"\n"; 160 } 161 162 foreach my $e (@exons) { 163 print ZFF join(" ", "Exon ", $e->[0], $e->[1], $pname),"\n"; 126 164 } 127 165 } 128 166 } 167 168 close(ZFF); 169 close(DNA); 129 170 130 171 ################## Subroutines ################### lib/GI.pm
r232 r235 2316 2316 foreach my $p (@predictors) { 2317 2317 if ($p !~ /^snap$|^augustus$|^est2genome$|^fgenesh$/ && 2318 $p !~ /^genemark$|^jigsaw$|^model_gff$|^ abinit$/2318 $p !~ /^genemark$|^jigsaw$|^model_gff$|^pred_gff$/ 2319 2319 ) { 2320 2320 $error .= "ERROR: Invalid predictor defined: $p\n". 2321 "Valid entries are: est2genome, model_gff, snap,\n".2322 " genemark, augustus, fgenesh, and abinit\n\n";2321 "Valid entries are: est2genome, model_gff, pred_gff,\n". 2322 "snap, genemark, augustus, and fgenesh\n\n"; 2323 2323 } 2324 2324 else { lib/maker/auto_annotator.pm
r202 r235 637 637 } 638 638 639 #---hint based gene prediction here 639 #---hint based gene prediction here (includes est2genome) 640 640 foreach my $prdr (@{$CTL_OPTIONS->{_predictor}}){ 641 next if($prdr eq 'model_gff' || $prdr eq ' abinit' || $prdr eq 'genemark');641 next if($prdr eq 'model_gff' || $prdr eq 'pred_gff'); 642 642 print STDERR "Producing $prdr hint based annotations\n" unless($main::quiet); 643 643 … … 673 673 $v_seq_ref, 674 674 $def, 675 'abinit', 675 'abinit', #all abinits not just pref_gff 676 676 $predictions, 677 677 $CTL_OPTIONS … … 683 683 $chunk_number, 684 684 $build, 685 'abinit', 685 'abinit', #all abinits not_just pred_gff 686 686 $the_void, 687 687 $CTL_OPTIONS 688 688 ); 689 690 $annotations{'abinit'} = $all_ab; 689 690 $annotations{'abinit'} = $all_ab; #all abinit 691 692 #add abinits to their predictor type after they have been proccessed 693 #remeber they get treated differently so you want to add them as a 694 #seperate step. 695 foreach my $g (@$all_ab){ 696 if($g->{algorithm} =~ /^snap/i){ 697 push(@{$annotations{'snap'}}, $g); 698 } 699 elsif($g->{algorithm} =~ /^genemark/i){ 700 push(@{$annotations{'genemark'}}, $g); 701 } 702 elsif($g->{algorithm} =~ /^augustus/i){ 703 push(@{$annotations{'augustus'}}, $g); 704 } 705 elsif($g->{algorithm} =~ /^fgenesh/i){ 706 push(@{$annotations{'fgenesh'}}, $g); 707 } 708 elsif($g->{algorithm} =~ /^pred_gff/i){ 709 push(@{$annotations{'pred_gff'}}, $g); 710 } 711 else{ 712 die "ERROR: Not a supported algorithm: ".$g->{algorithm}."\n"; 713 } 714 } 691 715 692 716 add_abAED(\%annotations); 693 694 #fill genemark hint based with abinits since hints do not work695 #but only if not using abinits already696 if((grep {/^genemark$/} @{$CTL_OPTIONS->{_predictor}}) &&697 (! grep {/^abinit$/} @{$CTL_OPTIONS->{_predictor}})698 ){699 foreach my $g (@$all_ab){700 if($g->{algorithm} =~ /^genemark_masked$/i){701 push(@{$annotations{'genemark'}}, $g);702 }703 elsif($CTL_OPTIONS->{_no_mask} && $g->{algorithm} =~ /^genemark$/i){704 push(@{$annotations{'genemark'}}, $g);705 }706 }707 708 }709 717 710 718 return \%annotations; … … 789 797 #collect all no UTR base models to get abAED 790 798 foreach my $p (keys %$annotations){ 799 next if ($p eq 'abinit'); #these are redundant inside predictor type 791 800 foreach my $g (@{$annotations->{$p}}){ 792 801 if($g->{g_strand} == 1){ … … 851 860 my @p_keepers; 852 861 my @m_keepers; 853 if(@{$CTL_OPTIONS->{_predictor}} > 1 || grep {/^abinit$/} @{$CTL_OPTIONS->{_predictor}}){ 862 863 #keep all gff3 passthrough if there's nothing else 864 if(@{$CTL_OPTIONS->{_predictor}} == 1 && $CTL_OPTIONS->{_predictor}->[0] eq 'model_gff'){ 865 foreach my $g (@{$annotations->{'model_gff'}}){ 866 if($g->{g_strand} == 1){ 867 push(@p_keepers, $g); 868 } 869 elsif($g->{g_strand} == -1){ 870 push(@m_keepers, $g); 871 } 872 } 873 } 874 elsif(@{$CTL_OPTIONS->{_predictor}}){ 854 875 #set up lists for plus and minus strands as well as possible mergers 855 876 #predictor types are processed in the order given by control files … … 890 911 push(@m_keepers, @$m_list); 891 912 } 892 elsif(@{$CTL_OPTIONS->{_predictor}}){893 my $key = $CTL_OPTIONS->{_predictor}->[0];894 foreach my $g (@{$annotations->{$key}}){895 if($g->{g_strand} == 1){896 push(@p_keepers, $g) if($g->{AED} < 1 || $key eq 'model_gff');897 }898 elsif($g->{g_strand} == -1){899 push(@m_keepers, $g) if($g->{AED} < 1 || $key eq 'model_gff');900 }901 }902 }903 913 904 914 #remove CDS competition on opposite strand … … 1206 1216 next; 1207 1217 } 1208 1209 #------genemark does not have hints enabled1210 return [] if ($predictor eq 'genemark');1211 1218 1212 1219 #------est2genome … … 1224 1231 next; 1225 1232 } 1233 1234 #------genemark does not have hints enabled 1235 return [] if ($predictor eq 'genemark'); 1226 1236 1227 1237 #------default hint based behavior … … 1544 1554 } 1545 1555 1546 #remove redundant t anscripts in gene1556 #remove redundant transcripts in gene 1547 1557 my @keepers; 1548 1558 foreach my $c (@{$careful_clusters}) { … … 1850 1860 #that don't overlap maker's final annotation set. The non-overlapping 1851 1861 #set is filtered using abAED so that they don't overlap each other. 1862 #Only masked ab inits are considered (otherwise I get a lot of revere 1863 #transcriptase genes). Unmasked are allowed when there is no masking 1864 #performed. 1852 1865 1853 1866 sub get_non_overlaping_abinits { … … 1874 1887 } 1875 1888 1876 #separate abinits by strand 1889 #separate abinits by strand (only masked unless I'm not masking) 1877 1890 my @p_ab; 1878 1891 my @m_ab;
