Changeset 268
- Timestamp:
- 09/23/09 11:23:55 (2 months ago)
- Files:
-
- lib/GI.pm (modified) (3 diffs)
- lib/clean.pm (modified) (1 diff)
- lib/cluster.pm (modified) (2 diffs)
- lib/maker/auto_annotator.pm (modified) (1 diff)
- lib/runlog.pm (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
lib/GI.pm
r266 r268 2237 2237 $CTL_OPT{'single_length'} = 250; 2238 2238 $CTL_OPT{'min_protein'} = 0; 2239 $CTL_OPT{'AED_threshod'} = 1; 2239 2240 $CTL_OPT{'keep_preds'} = 0; 2240 2241 $CTL_OPT{'map_forward'} = 0; … … 2661 2662 "min_protein will be reset to 0\n\n"; 2662 2663 $CTL_OPT{min_protein} = 0; 2664 } 2665 if ($CTL_OPT{AED_threshold} < 0 || $CTL_OPT{AED_threshold} > 1) { 2666 warn "WARNING: \'AED_threshold\' must be set to a value betweeb 0 and 1.\n". 2667 "AED_threshold will be reset to 1\n\n"; 2668 $CTL_OPT{AED_threshold} = 1; 2663 2669 } 2664 2670 if ($CTL_OPT{retry} < 0) { … … 2831 2837 print OUT "min_contig:$O{min_contig} #all contigs from the input genome file below this size will be skipped\n"; 2832 2838 print OUT "min_protein:$O{min_protein} #all gene annotations must produce a protein of at least this many amino acids in length\n" if(!$ev); 2839 print OUT "AED_threshold:$O{AED_threshold} #Maximum Annotation Edit Distance allowed for annotations (bound by 0 and 1)\n" if(!$ev); 2833 2840 print OUT "softmask:$O{softmask} #use soft-masked rather than hard-masked seg filtering for wublast\n"; 2834 2841 print OUT "split_hit:$O{split_hit} #length for the splitting of hits (expected max intron size for evidence alignments)\n"; lib/clean.pm
r151 r268 168 168 } 169 169 #------------------------------------------------------------------------ 170 #this function throws out hits that result from multiple overlapping HSPs 171 #from the same region (caused by low complexity repeats) 172 sub complexity_filter { 173 my $candidates = shift; 174 my $seq = shift; 175 176 my @keepers; 177 178 foreach my $f (@$candidates){ 179 my $qOff = $f->start('query'); 180 my $hOff = $f->start('hit'); 181 182 my @q_cov; 183 my @h_cov; 184 foreach my $hsp ($f->hsps){ 185 my $qS = $hsp->start('query') - $qOff; 186 my $qE = $hsp->end('query') - $qOff; 187 188 my $hS = $hsp->start('hit') - $hOff; 189 my $hE = $hsp->end('hit') - $hOff; 190 191 @q_cov[$qS..$qE] = map {(defined $_) ? $_ + 1 : 1} (@q_cov[$qS..$qE]); 192 @h_cov[$hS..$hE] = map {(defined $_) ? $_ + 1 : 1} (@h_cov[$hS..$hE]); 193 } 194 195 #average coverage for qeury 196 my $count = 0; 197 my $total = 0; 198 foreach my $c (@q_cov){ 199 next if(! defined $c); 200 $count++; 201 $total += $c; 202 } 203 204 my $q_ave = ($count) ? $total/$count : 0; 205 206 #average coverage for hit 207 $count = 0; 208 $total = 0; 209 foreach my $c (@h_cov){ 210 next if(! defined $c); 211 $count++; 212 $total += $c; 213 } 214 215 my $h_ave = ($count) ? $total/$count : 0; 216 217 #if average basepair hits twice on both query and subject, skip 218 push(@keepers, $f) unless(($q_ave + $h_ave)/2 > 2); 219 } 220 221 return \@keepers; 222 } 223 #------------------------------------------------------------------------ 170 224 1; 171 225 lib/cluster.pm
r194 r268 46 46 print STDERR "total clusters:$num_c now processing $counter\n" 47 47 unless $main::quiet; 48 my $alts = clean::get_best_alt_splices($c, $seq, 10); 48 my $clean = clean::complexity_filter($c, $seq); 49 my $alts = clean::get_best_alt_splices($clean, $seq, 10); 49 50 my $i = 0; 50 51 my @new_cluster; … … 54 55 $i++; 55 56 } 57 next if(! @new_cluster); 56 58 push(@{$clean_clusters[$counter]}, @new_cluster); 57 59 $counter++; 58 } 59 60 } 60 61 61 62 return \@clean_clusters; 62 63 63 } 64 64 #------------------------------------------------------------------------ lib/maker/auto_annotator.pm
r243 r268 1690 1690 my $t_struct = load_transcript_struct($f, $g_name, $i, $seq, $evidence, $p_base, $the_void, $CTL_OPTIONS); 1691 1691 1692 push(@t_structs, $t_struct) unless ($t_struct->{p_length} < $CTL_OPTIONS->{min_protein}); 1692 push(@t_structs, $t_struct) unless ($t_struct->{p_length} < $CTL_OPTIONS->{min_protein} || 1693 $t_struct->{AED} > $CTL_OPTIONS->{AED_threshold} 1694 ); 1693 1695 1694 1696 $AED = $t_struct->{AED} if($t_struct->{AED} < $AED); lib/runlog.pm
r243 r268 38 38 'pred_flank', 39 39 'min_protein', 40 'AED_threshold', 40 41 'single_exon', 41 42 'single_length',
