| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
package cluster; |
|---|
| 5 |
use strict; |
|---|
| 6 |
use vars qw(@ISA @EXPORT $VERSION); |
|---|
| 7 |
use Exporter; |
|---|
| 8 |
use PostData; |
|---|
| 9 |
use FileHandle; |
|---|
| 10 |
use PostData; |
|---|
| 11 |
use Exporter; |
|---|
| 12 |
use PhatHit_utils; |
|---|
| 13 |
use clean; |
|---|
| 14 |
use compare; |
|---|
| 15 |
use SimpleCluster; |
|---|
| 16 |
use Shadower; |
|---|
| 17 |
|
|---|
| 18 |
@ISA = qw( |
|---|
| 19 |
); |
|---|
| 20 |
|
|---|
| 21 |
|
|---|
| 22 |
|
|---|
| 23 |
sub clean_and_cluster { |
|---|
| 24 |
my $keepers = shift; |
|---|
| 25 |
my $seq = shift; |
|---|
| 26 |
my $depth = shift; |
|---|
| 27 |
|
|---|
| 28 |
$depth = 0 if (! defined($depth) || $depth < 0); |
|---|
| 29 |
|
|---|
| 30 |
my ($p, $m, $x, $z) = PhatHit_utils::seperate_by_strand('query', $keepers); |
|---|
| 31 |
|
|---|
| 32 |
my $p_clusters = shadow_cluster($depth, $seq, $p); |
|---|
| 33 |
my $m_clusters = shadow_cluster($depth, $seq, $m); |
|---|
| 34 |
|
|---|
| 35 |
my @clusters = (@{$p_clusters}, @{$m_clusters}); |
|---|
| 36 |
|
|---|
| 37 |
my $num_c = @clusters; |
|---|
| 38 |
print STDERR "cleaning clusters....\n" unless $main::quiet; |
|---|
| 39 |
my $counter = 0; |
|---|
| 40 |
my @clean_clusters; |
|---|
| 41 |
|
|---|
| 42 |
|
|---|
| 43 |
|
|---|
| 44 |
|
|---|
| 45 |
foreach my $c (@clusters){ |
|---|
| 46 |
print STDERR "total clusters:$num_c now processing $counter\n" |
|---|
| 47 |
unless $main::quiet; |
|---|
| 48 |
my $clean = clean::complexity_filter($c, $seq); |
|---|
| 49 |
my $alts = clean::get_best_alt_splices($clean, $seq, 10); |
|---|
| 50 |
my $i = 0; |
|---|
| 51 |
my @new_cluster; |
|---|
| 52 |
foreach my $a (@{$alts}){ |
|---|
| 53 |
push(@new_cluster, $a); |
|---|
| 54 |
last if ($i > $depth && $depth > 0); |
|---|
| 55 |
$i++; |
|---|
| 56 |
} |
|---|
| 57 |
next if(! @new_cluster); |
|---|
| 58 |
push(@{$clean_clusters[$counter]}, @new_cluster); |
|---|
| 59 |
$counter++; |
|---|
| 60 |
} |
|---|
| 61 |
|
|---|
| 62 |
return \@clean_clusters; |
|---|
| 63 |
} |
|---|
| 64 |
|
|---|
| 65 |
sub special_cluster_phat_hits { |
|---|
| 66 |
my $phat_hits = shift; |
|---|
| 67 |
my $ests = shift; |
|---|
| 68 |
my $seq = shift; |
|---|
| 69 |
my $flank = shift || 10; |
|---|
| 70 |
|
|---|
| 71 |
my ($p, $m, $x, $z) = PhatHit_utils::seperate_by_strand('query', $phat_hits); |
|---|
| 72 |
|
|---|
| 73 |
my $p_clusters = shadow_cluster(20, $seq, $p, $flank); |
|---|
| 74 |
my $m_clusters = shadow_cluster(20, $seq, $m, $flank); |
|---|
| 75 |
|
|---|
| 76 |
my @careful_clusters; |
|---|
| 77 |
foreach my $c (@{$p_clusters}){ |
|---|
| 78 |
my $cares = careful_cluster($seq, $c, $flank); |
|---|
| 79 |
push(@careful_clusters, @{$cares}); |
|---|
| 80 |
} |
|---|
| 81 |
|
|---|
| 82 |
foreach my $c (@{$m_clusters}){ |
|---|
| 83 |
my $cares = careful_cluster($seq, $c, $flank); |
|---|
| 84 |
push(@careful_clusters, @{$cares}); |
|---|
| 85 |
} |
|---|
| 86 |
|
|---|
| 87 |
my $temp_id = 0; |
|---|
| 88 |
foreach my $est (@{$ests}){ |
|---|
| 89 |
$est->{temp_id} = $temp_id; |
|---|
| 90 |
$temp_id++; |
|---|
| 91 |
} |
|---|
| 92 |
my @special_clusters; |
|---|
| 93 |
my %done; |
|---|
| 94 |
my $i = 0; |
|---|
| 95 |
foreach my $c (@careful_clusters){ |
|---|
| 96 |
my $coors = PhatHit_utils::to_begin_and_end_coors($c, 'query'); |
|---|
| 97 |
my $pieces = Shadower::getPieces($seq, $coors, $flank); |
|---|
| 98 |
|
|---|
| 99 |
push(@{$special_clusters[$i]}, @{$c}); |
|---|
| 100 |
die "multiple pieces in cluster::special_cluster_phat_hits!\n" |
|---|
| 101 |
if defined($pieces->[1]); |
|---|
| 102 |
|
|---|
| 103 |
foreach my $est (@{$ests}){ |
|---|
| 104 |
my ($eB, $eE) = PhatHit_utils::get_span_of_hit($est, 'query'); |
|---|
| 105 |
($eB, $eE) = ($eE, $eB) if $eB > $eE; |
|---|
| 106 |
my $s_code = compare::get_shadow_code($pieces, |
|---|
| 107 |
[[$eB, $eE]], |
|---|
| 108 |
$flank, |
|---|
| 109 |
); |
|---|
| 110 |
|
|---|
| 111 |
|
|---|
| 112 |
if ($s_code ne '0'){ |
|---|
| 113 |
push(@{$special_clusters[$i]}, $est); |
|---|
| 114 |
$done{$est->{temp_id}}++; |
|---|
| 115 |
} |
|---|
| 116 |
} |
|---|
| 117 |
$i++; |
|---|
| 118 |
} |
|---|
| 119 |
|
|---|
| 120 |
my @remains; |
|---|
| 121 |
foreach my $est (@{$ests}){ |
|---|
| 122 |
push(@remains, $est) |
|---|
| 123 |
unless defined($done{$est->{temp_id}}); |
|---|
| 124 |
} |
|---|
| 125 |
my $new_e_clusters = shadow_cluster(20, $seq, \@remains, $flank); |
|---|
| 126 |
|
|---|
| 127 |
my $start = @special_clusters; |
|---|
| 128 |
|
|---|
| 129 |
foreach my $c (@{$new_e_clusters}){ |
|---|
| 130 |
push(@{$special_clusters[$start]}, @{$c}); |
|---|
| 131 |
$start++; |
|---|
| 132 |
} |
|---|
| 133 |
|
|---|
| 134 |
return \@special_clusters; |
|---|
| 135 |
|
|---|
| 136 |
} |
|---|
| 137 |
|
|---|
| 138 |
sub careful_cluster_phat_hits { |
|---|
| 139 |
my $phat_hits = shift; |
|---|
| 140 |
my $seq = shift; |
|---|
| 141 |
my $flank = shift || 10; |
|---|
| 142 |
my ($p, $m, $x, $z) = PhatHit_utils::seperate_by_strand('query', $phat_hits); |
|---|
| 143 |
|
|---|
| 144 |
my $p_clusters = shadow_cluster(20, $seq, $p, $flank); |
|---|
| 145 |
my $m_clusters = shadow_cluster(20, $seq, $m, $flank); |
|---|
| 146 |
|
|---|
| 147 |
my @careful_clusters; |
|---|
| 148 |
foreach my $c (@{$p_clusters}){ |
|---|
| 149 |
my $cares = careful_cluster($seq, $c, $flank); |
|---|
| 150 |
push(@careful_clusters, @{$cares}); |
|---|
| 151 |
} |
|---|
| 152 |
|
|---|
| 153 |
foreach my $c (@{$m_clusters}){ |
|---|
| 154 |
my $cares = careful_cluster($seq, $c, $flank); |
|---|
| 155 |
push(@careful_clusters, @{$cares}); |
|---|
| 156 |
} |
|---|
| 157 |
|
|---|
| 158 |
return \@careful_clusters; |
|---|
| 159 |
} |
|---|
| 160 |
|
|---|
| 161 |
sub mani_sort { |
|---|
| 162 |
criteria($b) <=> criteria($a) || criteria_2($b) <=> criteria_2($a) || criteria_3($b) <=> criteria_3($a); |
|---|
| 163 |
} |
|---|
| 164 |
|
|---|
| 165 |
sub criteria { |
|---|
| 166 |
my $hit = shift; |
|---|
| 167 |
|
|---|
| 168 |
my $ref = $hit->algorithm; |
|---|
| 169 |
|
|---|
| 170 |
return 0 if $ref =~ /^repeat/i; |
|---|
| 171 |
return 0 if $ref =~ /^blastx\:repeatmask$/i; |
|---|
| 172 |
return 0 if $ref =~ /^repeat_gff\:/i; |
|---|
| 173 |
return 1 if $ref =~ /^snap$/i; |
|---|
| 174 |
return 1 if $ref =~ /^augustus$/i; |
|---|
| 175 |
return 1 if $ref =~ /^fgenesh$/i; |
|---|
| 176 |
return 1 if $ref =~ /^twinscan$/i; |
|---|
| 177 |
return 1 if $ref =~ /^jigsaw$/i; |
|---|
| 178 |
return 1 if $ref =~ /^pred_gff\:/i; |
|---|
| 179 |
return 2 if $ref =~ /^blastn$/i; |
|---|
| 180 |
return 2 if $ref =~ /^tblastx$/i; |
|---|
| 181 |
return 2 if $ref =~ /^altest_gff\:/i; |
|---|
| 182 |
return 3 if $ref =~ /^blastx$/i; |
|---|
| 183 |
return 3 if $ref =~ /^protein_gff\:/i; |
|---|
| 184 |
return 4 if $ref =~ /2genome$/i; |
|---|
| 185 |
return 4 if $ref =~ /^est_gff\:/i; |
|---|
| 186 |
return 5 if $ref =~ /^maker$/i; |
|---|
| 187 |
return 5 if $ref =~ /^model_gff\:/i; |
|---|
| 188 |
die "UNKNOWN CLASS(".ref($hit)."), ALGORITHM($ref), in cluster::criteria\n"; |
|---|
| 189 |
} |
|---|
| 190 |
|
|---|
| 191 |
sub criteria_2 { |
|---|
| 192 |
my $hit = shift; |
|---|
| 193 |
|
|---|
| 194 |
my $score = $hit->score(); |
|---|
| 195 |
$score = '' if(! defined $score); |
|---|
| 196 |
|
|---|
| 197 |
if($score !~ /^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:e[-+]?\d+)?$/){ |
|---|
| 198 |
$score = $hit->bits(); |
|---|
| 199 |
$score = '' if(! defined $score); |
|---|
| 200 |
} |
|---|
| 201 |
if($score !~ /^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:e[-+]?\d+)?$/){ |
|---|
| 202 |
$score = $hit->significance; |
|---|
| 203 |
$score = '' if(! defined $score); |
|---|
| 204 |
if($score =~ /^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:e[-+]?\d+)?$/){ |
|---|
| 205 |
$score = 10000 - (10000 * $score); |
|---|
| 206 |
$score = 0 if($score < 0); |
|---|
| 207 |
} |
|---|
| 208 |
else{ |
|---|
| 209 |
$score = 0; |
|---|
| 210 |
} |
|---|
| 211 |
} |
|---|
| 212 |
|
|---|
| 213 |
return $score; |
|---|
| 214 |
} |
|---|
| 215 |
|
|---|
| 216 |
|
|---|
| 217 |
sub criteria_3 { |
|---|
| 218 |
my $hit = shift; |
|---|
| 219 |
|
|---|
| 220 |
my ($lAq) = $hit->getLengths(); |
|---|
| 221 |
return $lAq; |
|---|
| 222 |
} |
|---|
| 223 |
|
|---|
| 224 |
sub shadow_cluster { |
|---|
| 225 |
my $depth = shift; |
|---|
| 226 |
my $seq = shift; |
|---|
| 227 |
my $phat_hits = shift; |
|---|
| 228 |
my $flank = shift; |
|---|
| 229 |
|
|---|
| 230 |
$depth = 0 if (! defined($depth) || $depth < 0); |
|---|
| 231 |
|
|---|
| 232 |
my $coors = PhatHit_utils::to_begin_and_end_coors($phat_hits, 'query'); |
|---|
| 233 |
my $pieces = Shadower::getPieces($seq, $coors, $flank); |
|---|
| 234 |
|
|---|
| 235 |
my $temp_id = 0; |
|---|
| 236 |
foreach my $hit (@{$phat_hits}){ |
|---|
| 237 |
$hit->{temp_id} = $temp_id; |
|---|
| 238 |
$temp_id++; |
|---|
| 239 |
} |
|---|
| 240 |
|
|---|
| 241 |
my @clusters; |
|---|
| 242 |
my $i_size = @{$phat_hits}; |
|---|
| 243 |
my $j_size = @{$pieces}; |
|---|
| 244 |
|
|---|
| 245 |
print STDERR " in cluster:shadow cluster...\n" unless $main::quiet; |
|---|
| 246 |
print STDERR " i_size:$i_size j_size:$j_size\n" |
|---|
| 247 |
unless $main::quiet; |
|---|
| 248 |
|
|---|
| 249 |
my $i = 0; |
|---|
| 250 |
my %c_size; |
|---|
| 251 |
|
|---|
| 252 |
print STDERR " sorting hits in shadow cluster...\n" |
|---|
| 253 |
unless $main::quiet; |
|---|
| 254 |
|
|---|
| 255 |
my @sorted = ($depth == 0) ? @{$phat_hits} : sort mani_sort @{$phat_hits}; |
|---|
| 256 |
|
|---|
| 257 |
print STDERR "... finished.\n" unless $main::quiet; |
|---|
| 258 |
|
|---|
| 259 |
foreach my $hit (@sorted){ |
|---|
| 260 |
|
|---|
| 261 |
my ($nB, $nE) = PhatHit_utils::get_span_of_hit($hit, 'query'); |
|---|
| 262 |
($nB, $nE) = ($nE, $nB) if $nB > $nE; |
|---|
| 263 |
|
|---|
| 264 |
my $j = 0; |
|---|
| 265 |
foreach my $s (@{$pieces}){ |
|---|
| 266 |
|
|---|
| 267 |
if ($depth != 0 && |
|---|
| 268 |
defined($c_size{$j}) && |
|---|
| 269 |
$c_size{$j} > $depth |
|---|
| 270 |
){ |
|---|
| 271 |
$j++; |
|---|
| 272 |
next; |
|---|
| 273 |
} |
|---|
| 274 |
|
|---|
| 275 |
my $sB = $s->{b}; |
|---|
| 276 |
my $sE = $s->{e}; |
|---|
| 277 |
|
|---|
| 278 |
my $class = compare::compare($sB, $sE, $nB, $nE); |
|---|
| 279 |
|
|---|
| 280 |
if ($class ne '0'){ |
|---|
| 281 |
push(@{$clusters[$j]}, $hit); |
|---|
| 282 |
$c_size{$j}++; |
|---|
| 283 |
last; |
|---|
| 284 |
} |
|---|
| 285 |
|
|---|
| 286 |
$j++; |
|---|
| 287 |
} |
|---|
| 288 |
|
|---|
| 289 |
print STDERR " i_size:$i_size current i:$i\n" unless $main::quiet; |
|---|
| 290 |
$i++; |
|---|
| 291 |
} |
|---|
| 292 |
|
|---|
| 293 |
return \@clusters; |
|---|
| 294 |
} |
|---|
| 295 |
|
|---|
| 296 |
sub shadow_cluster_old { |
|---|
| 297 |
my $depth = shift; |
|---|
| 298 |
my $seq = shift; |
|---|
| 299 |
my $phat_hits = shift; |
|---|
| 300 |
my $flank = shift; |
|---|
| 301 |
|
|---|
| 302 |
my $coors = PhatHit_utils::to_begin_and_end_coors($phat_hits, 'query'); |
|---|
| 303 |
my $pieces = Shadower::getPieces($seq, $coors, $flank); |
|---|
| 304 |
|
|---|
| 305 |
my $temp_id = 0; |
|---|
| 306 |
foreach my $hit (@{$phat_hits}){ |
|---|
| 307 |
$hit->{temp_id} = $temp_id; |
|---|
| 308 |
$temp_id++; |
|---|
| 309 |
} |
|---|
| 310 |
my @clusters; |
|---|
| 311 |
my $i = 0; |
|---|
| 312 |
my $i_size = @{$pieces}; |
|---|
| 313 |
my $j_size = @{$phat_hits}; |
|---|
| 314 |
print STDERR " in cluster:shadow cluster...\n" unless $main::quiet; |
|---|
| 315 |
print STDERR " i_size:$i_size j_size:$j_size\n" unless $main::quiet; |
|---|
| 316 |
my %clustered; |
|---|
| 317 |
foreach my $s (@{$pieces}){ |
|---|
| 318 |
my $sB = $s->{b}; |
|---|
| 319 |
my $sE = $s->{e}; |
|---|
| 320 |
|
|---|
| 321 |
my $n = 0; |
|---|
| 322 |
foreach my $hit (@{$phat_hits}){ |
|---|
| 323 |
|
|---|
| 324 |
next if defined($clustered{$hit->{temp_id}}); |
|---|
| 325 |
|
|---|
| 326 |
print STDERR " i:$i i_size:$i_size j_size:$j_size\n" |
|---|
| 327 |
unless $main::quiet; |
|---|
| 328 |
print STDERR " n:$n\n" unless $main::quiet; |
|---|
| 329 |
my ($nB, $nE) = |
|---|
| 330 |
PhatHit_utils::get_span_of_hit($hit, 'query'); |
|---|
| 331 |
|
|---|
| 332 |
($nB, $nE) = ($nE, $nB) if $nB > $nE; |
|---|
| 333 |
|
|---|
| 334 |
my $class = compare::compare($sB, $sE, $nB, $nE); |
|---|
| 335 |
|
|---|
| 336 |
if ($class ne '0'){ |
|---|
| 337 |
|
|---|
| 338 |
push(@{$clusters[$i]}, $hit) if $n < $depth; |
|---|
| 339 |
$clustered{$hit->{temp_id}}++; |
|---|
| 340 |
$n++; |
|---|
| 341 |
} |
|---|
| 342 |
|
|---|
| 343 |
} |
|---|
| 344 |
$i++; |
|---|
| 345 |
} |
|---|
| 346 |
|
|---|
| 347 |
|
|---|
| 348 |
return \@clusters; |
|---|
| 349 |
} |
|---|
| 350 |
|
|---|
| 351 |
|
|---|
| 352 |
sub gene_cluster { |
|---|
| 353 |
my $gff_hits = shift; |
|---|
| 354 |
|
|---|
| 355 |
my %index; |
|---|
| 356 |
my @clusters; |
|---|
| 357 |
|
|---|
| 358 |
foreach my $trans (@{$gff_hits}){ |
|---|
| 359 |
my $gene_id = $trans->gene_id(); |
|---|
| 360 |
|
|---|
| 361 |
|
|---|
| 362 |
die "ERROR: No gene id in hit\n" if (! defined $gene_id); |
|---|
| 363 |
|
|---|
| 364 |
if(! exists $index{$gene_id}){ |
|---|
| 365 |
push(@clusters, [$trans]); |
|---|
| 366 |
$index{$gene_id} = @clusters - 1; |
|---|
| 367 |
} |
|---|
| 368 |
else{ |
|---|
| 369 |
my $c_id = $index{$gene_id}; |
|---|
| 370 |
|
|---|
| 371 |
push(@{$clusters[$c_id]}, $trans); |
|---|
| 372 |
} |
|---|
| 373 |
} |
|---|
| 374 |
|
|---|
| 375 |
return \@clusters; |
|---|
| 376 |
} |
|---|
| 377 |
|
|---|
| 378 |
sub get_overlap_evidence { |
|---|
| 379 |
my $p_hit = shift; |
|---|
| 380 |
my $phat_hits = shift; |
|---|
| 381 |
my $depth = shift; |
|---|
| 382 |
my $flank = shift; |
|---|
| 383 |
|
|---|
| 384 |
$flank = 0 if (!$flank || $flank < 0); |
|---|
| 385 |
$depth = 0 if (!$depth || $depth < 0); |
|---|
| 386 |
|
|---|
| 387 |
my ($sB, $sE) = PhatHit_utils::get_span_of_hit($p_hit, 'query'); |
|---|
| 388 |
($sB, $sE) = ($sE, $sB) if $sB > $sE; |
|---|
| 389 |
|
|---|
| 390 |
$sB = $sB - $flank; |
|---|
| 391 |
$sE = $sE + $flank; |
|---|
| 392 |
|
|---|
| 393 |
my @cluster; |
|---|
| 394 |
push(@cluster, $p_hit); |
|---|
| 395 |
|
|---|
| 396 |
my $i = 0; |
|---|
| 397 |
foreach my $hit (@{$phat_hits}){ |
|---|
| 398 |
if ($depth > 0 && $i >= $depth){ |
|---|
| 399 |
last; |
|---|
| 400 |
} |
|---|
| 401 |
|
|---|
| 402 |
my ($nB, $nE) = PhatHit_utils::get_span_of_hit($hit, 'query'); |
|---|
| 403 |
($nB, $nE) = ($nE, $nB) if $nB > $nE; |
|---|
| 404 |
|
|---|
| 405 |
my $class = compare::compare($sB, $sE, $nB, $nE); |
|---|
| 406 |
|
|---|
| 407 |
if ($class ne '0'){ |
|---|
| 408 |
push(@cluster, $hit); |
|---|
| 409 |
$i++; |
|---|
| 410 |
} |
|---|
| 411 |
} |
|---|
| 412 |
|
|---|
| 413 |
return \@cluster; |
|---|
| 414 |
} |
|---|
| 415 |
|
|---|
| 416 |
sub careful_cluster { |
|---|
| 417 |
my $seq = shift; |
|---|
| 418 |
my $phat_hits = shift; |
|---|
| 419 |
my $flank = shift; |
|---|
| 420 |
|
|---|
| 421 |
|
|---|
| 422 |
my @careful_clusters; |
|---|
| 423 |
if (!defined($phat_hits->[1])){ |
|---|
| 424 |
push(@careful_clusters, $phat_hits); |
|---|
| 425 |
return \@careful_clusters; |
|---|
| 426 |
} |
|---|
| 427 |
my $temp_id = 0; |
|---|
| 428 |
foreach my $hit (@{$phat_hits}){ |
|---|
| 429 |
$hit->{temp_id} = $temp_id; |
|---|
| 430 |
|
|---|
| 431 |
$temp_id++; |
|---|
| 432 |
} |
|---|
| 433 |
print STDERR "now careful_clustering....\n" |
|---|
| 434 |
unless $main::quiet; |
|---|
| 435 |
my %lookup; |
|---|
| 436 |
my %matrix; |
|---|
| 437 |
for (my $i = 0; $i < @{$phat_hits} - 1;$i++){ |
|---|
| 438 |
my $hit_i = $phat_hits->[$i]; |
|---|
| 439 |
$lookup{$hit_i->{temp_id}} = $hit_i; |
|---|
| 440 |
for (my $j = $i +1 ; $j < @{$phat_hits}; $j++){ |
|---|
| 441 |
my $hit_j = $phat_hits->[$j]; |
|---|
| 442 |
$lookup{$hit_j->{temp_id}} = $hit_j; |
|---|
| 443 |
if (compare::hsps_overlap($hit_i, $hit_j, 'query', $flank)){ |
|---|
| 444 |
$matrix{$hit_i->{temp_id}}{$hit_j->{temp_id}}++; |
|---|
| 445 |
} |
|---|
| 446 |
else { |
|---|
| 447 |
|
|---|
| 448 |
} |
|---|
| 449 |
|
|---|
| 450 |
} |
|---|
| 451 |
} |
|---|
| 452 |
|
|---|
| 453 |
my $pairs = SimpleCluster::pairs(\%matrix); |
|---|
| 454 |
my $map = SimpleCluster::singleLinkageClusters($pairs); |
|---|
| 455 |
|
|---|
| 456 |
my %clustered; |
|---|
| 457 |
my $i = 0; |
|---|
| 458 |
foreach my $c (keys %{$map}){ |
|---|
| 459 |
next if(! defined $map->{$c}); |
|---|
| 460 |
foreach my $m (@{$map->{$c}}){ |
|---|
| 461 |
my $hit = $lookup{$m}; |
|---|
| 462 |
die "name not found in careful cluster!\n" |
|---|
| 463 |
unless defined $m; |
|---|
| 464 |
push(@{$careful_clusters[$i]}, $hit); |
|---|
| 465 |
$clustered{$m}++; |
|---|
| 466 |
} |
|---|
| 467 |
$i++; |
|---|
| 468 |
} |
|---|
| 469 |
|
|---|
| 470 |
foreach my $temp_id (keys %lookup){ |
|---|
| 471 |
next if defined($clustered{$temp_id}); |
|---|
| 472 |
push(@{$careful_clusters[$i]}, $lookup{$temp_id}); |
|---|
| 473 |
$i++; |
|---|
| 474 |
} |
|---|
| 475 |
|
|---|
| 476 |
return \@careful_clusters; |
|---|
| 477 |
} |
|---|
| 478 |
|
|---|
| 479 |
sub show_clusters { |
|---|
| 480 |
my $c = shift; |
|---|
| 481 |
|
|---|
| 482 |
for (my $i = 0; $i < @{$c}; $i++){ |
|---|
| 483 |
print "cluster:$i\n"; |
|---|
| 484 |
foreach my $m (@{$c->[$i]}){ |
|---|
| 485 |
my $num_hsps = $m->hsps(); |
|---|
| 486 |
|
|---|
| 487 |
print " name:".$m->name; |
|---|
| 488 |
print " type:".ref($m); |
|---|
| 489 |
print " num hsps:$num_hsps"; |
|---|
| 490 |
print " length:".$m->length(); |
|---|
| 491 |
print " nB:".$m->nB('query')."\n"; |
|---|
| 492 |
} |
|---|
| 493 |
} |
|---|
| 494 |
} |
|---|
| 495 |
|
|---|
| 496 |
1; |
|---|
| 497 |
|
|---|
| 498 |
|
|---|