Changeset 222
- Timestamp:
- 06/22/09 12:34:39 (5 months ago)
- Files:
-
- bin/gff3_2_gtf (modified) (1 diff)
- bin/maker2zff.pl (modified) (8 diffs)
- lib/GI.pm (modified) (3 diffs)
- lib/Widget/fgenesh.pm (modified) (1 diff)
- lib/Widget/snap.pm (modified) (3 diffs)
- lib/exonerate/splice_info.pm (modified) (3 diffs)
- lib/runlog.pm (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
bin/gff3_2_gtf
r196 r222 44 44 45 45 foreach my $g (@genes){ 46 my $gene_id = $g->{ Name};46 my $gene_id = $g->{ID}; 47 47 my $g_mRNAs = $mRNAinx{$g->{ID}}; 48 48 49 49 foreach my $t (@$g_mRNAs){ 50 my $transcript_id = $t->{ Name};50 my $transcript_id = $t->{ID}; 51 51 my $t_stuff = $stuffinx{$t->{ID}}; 52 52 bin/maker2zff.pl
r127 r222 5 5 ##### Initialize Threshhold #### 6 6 my @thresh = (); 7 use vars qw($opt_h $opt_c $opt_e $opt_o $opt_a $opt_t $opt_l); 7 my $thrAED = 0.5; 8 use vars qw($opt_h $opt_c $opt_e $opt_o $opt_a $opt_t $opt_l $opt_x); 8 9 9 10 push @thresh, 0.5; … … 14 15 push @thresh, 75; 15 16 16 getopts("hc:e:o:a:t:"); 17 18 getopts("hc:e:o:a:t:x"); 17 19 my $usage = "maker2zff.pl directory name [options] 18 20 … … 28 30 -t fraction The fraction of exons the overlap an ab-initio SNAP prediction, default 0 29 31 -l number The min length of the protein sequence produced by the mRNA 32 -x number Max AED to allow 0.5 is default 30 33 "; 31 34 … … 36 39 if ($opt_t) {$thresh[4] = $opt_t} 37 40 if ($opt_l) {$thresh[5] = $opt_l} 41 if ($opt_x) {$thrAED = $opt_x} 38 42 39 43 my %id2name = (); … … 63 67 while (my $line = <GFF>) { 64 68 chomp($line); 65 if ($line =~ m/^\s*#/) { 69 if ($line =~ m/\#\#FASTA/) { 70 my $header; 71 while (my $d = <GFF>) { 72 if($d =~ /^>(\S+)/){ 73 $header = $1; 74 $seq{$header} = ""; 75 } 76 else{ 77 $seq{$header} .= $d; 78 } 79 } 80 } 81 elsif($line =~ /^\s*\#|^\n$|^\s*$/){ 66 82 next; 67 } elsif ($line =~ m/^\s*$/) { 68 next; 69 } elsif ($line =~ m/^>(\S+)/) { 70 my $header = $1; 71 $seq{$header} = ""; 72 while (<GFF>) { 73 $seq{$header} = join("", $seq{$header}, $_) 74 } 75 } else { 83 } 84 else { 76 85 my ($seqid, $source, $tag, $start, $end, $score, $strand, $phase, $annot) = split(/\t/, $line); 77 86 my %annotation = split(/[;=]/, $annot); … … 81 90 my $parent = $annotation{'Parent'}; 82 91 my ($name, $qi) = split(/\s+/, $lname); 83 my $ishc = is_hc($qi); 92 if(! $qi){ 93 ($qi) = $line =~ /_QI\=([^\;\n]+)/; 94 } 95 my ($AED) = $line =~ /_AED\=([^\;\n]+)/; 96 my $ishc = is_hc($qi, $AED); 84 97 if ($ishc == 1 ) { 85 98 $hc{$id} = 1; … … 119 132 sub is_hc { 120 133 my $qi = shift @_; 134 my $AED = shift @_; 135 121 136 my @q = split(/\|/, $qi); 122 137 my @qual = (@q[1..5],$q[8]); … … 127 142 } 128 143 } 144 145 $hc = 0 if($AED > $thrAED); 146 129 147 return $hc; 130 148 } lib/GI.pm
r209 r222 1436 1436 $command .= " hspsepqmax=$split_hit"; 1437 1437 $command .= " lcmask"; 1438 $command .= " wordmask=seg";1439 1438 $command .= " gi"; 1440 1439 #$command .= " mformat=2"; # remove for full report … … 1698 1697 $command .= " lcmask"; 1699 1698 $command .= " kap"; 1700 $command .= " wordmask=seg";1701 1699 $command .= " gi"; 1702 1700 #$command .= " mformat=2"; # remove for full report … … 1936 1934 $command .= " hspsepqmax=$split_hit"; 1937 1935 $command .= " lcmask"; 1938 $command .= " wordmask=seg";1939 1936 $command .= " gi"; 1940 1937 #$command .= " mformat=2"; # remove for full report lib/Widget/fgenesh.pm
r207 r222 144 144 } 145 145 waitpid $pid, 0; 146 die "ERROR: FgenesH failed\n" if $? != 0;146 #die "ERROR: FgenesH failed\n" if $? != 0; 147 147 } 148 148 else { lib/Widget/snap.pm
r207 r222 66 66 my $plus = 0; 67 67 my $minus = 0; 68 my $least;68 my $least; 69 69 my $most; 70 70 foreach my $hit (@t_data){ … … 289 289 my $i_flank = shift; 290 290 291 my @index; 292 291 293 my $p_pieces = Shadower::getPieces($seq, $p_coors, 0); 292 294 … … 326 328 $c .= "\t$s\t-100\t\.\t\.\t\.\tADJ"; 327 329 push(@xdef, $c); 328 329 330 } 330 } 331 331 332 return \@xdef; 332 333 } lib/exonerate/splice_info.pm
r127 r222 25 25 26 26 my $str = ''; 27 if ($d eq 'gt' && $a eq 'ag') {27 if ($d =~ /^gt$/i && $a =~ /^ag$/i) { 28 28 $str = '+'; 29 29 } 30 elsif ($d eq 'ct' && $a eq 'ac') {30 elsif ($d =~ /^ct$/i && $a =~ /^ac$/i) { 31 31 $str = '-'; 32 32 } 33 elsif ($d eq 'at' && $a eq 'ac'){33 elsif ($d =~ /^at$/i && $a =~ /^ac$/i){ 34 34 $str = '+'; 35 35 } 36 elsif ($d eq 'gt' && $a eq 'at'){36 elsif ($d =~ /^gt$/i && $a =~ /^at$/i){ 37 37 $str = '-'; 38 38 } … … 109 109 110 110 my $splice_str = ''; 111 for (my $i = 1; $i < @{$sorted}; $i++){112 my $pre_hsp = $sorted->[$i -1];113 my $pos_hsp = $sorted->[$i ];111 for (my $i = 0; $i < @{$sorted} - 1; $i++){ 112 my $pre_hsp = $sorted->[$i]; 113 my $pos_hsp = $sorted->[$i+1]; 114 114 115 115 my $code = splice_code($pre_hsp->donor(), … … 149 149 my $acceptor = substr($$seq, $p, $length); 150 150 $acceptor = Fasta::revComp($acceptor) if($strand == -1); 151 $p re_hsp->{acceptor} = $acceptor;151 $pos_hsp->{acceptor} = $acceptor; 152 152 } 153 153 } lib/runlog.pm
r209 r222 265 265 } 266 266 267 if (-e $gff_file) { 268 $rm_key{gff}++; #always rebuild gff when some option has changed 269 } 267 $rm_key{gff}++; #always rebuild gff when some option has changed 270 268 271 269 if ($key eq 'max_dna_len') { … … 536 534 537 535 if (exists $rm_key{gff}) { 538 print STDERR "MAKER WARNING: The gff file $gff_filemust now be removed.\n";536 print STDERR "MAKER WARNING: Any preexisting GFF3 and fasta files for this contig must now be removed.\n"; 539 537 push (@files, $gff_file); 540 push (@files, <$out_base/evaluator/*.eva>);541 push (@files, <$out_base/*.maker.*.fasta>);538 push (@files, @{[<$out_base/evaluator/*.eva>]}); 539 push (@files, @{[<$out_base/*maker*.fasta>]}); 542 540 } 543 541
