| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
package FastaChunker; |
|---|
| 5 |
use strict; |
|---|
| 6 |
use vars qw(@ISA @EXPORT $VERSION); |
|---|
| 7 |
use Exporter; |
|---|
| 8 |
use PostData; |
|---|
| 9 |
use FileHandle; |
|---|
| 10 |
use FastaChunk; |
|---|
| 11 |
use Fasta; |
|---|
| 12 |
use FastaFile; |
|---|
| 13 |
use POSIX qw(ceil); |
|---|
| 14 |
|
|---|
| 15 |
@ISA = qw( |
|---|
| 16 |
); |
|---|
| 17 |
|
|---|
| 18 |
|
|---|
| 19 |
|
|---|
| 20 |
|
|---|
| 21 |
sub new { |
|---|
| 22 |
my $class = shift; |
|---|
| 23 |
|
|---|
| 24 |
my $self = {}; |
|---|
| 25 |
bless($self, $class); |
|---|
| 26 |
|
|---|
| 27 |
$self->min_size(0); |
|---|
| 28 |
|
|---|
| 29 |
return $self; |
|---|
| 30 |
} |
|---|
| 31 |
|
|---|
| 32 |
sub load_chunks { |
|---|
| 33 |
my $self = shift; |
|---|
| 34 |
|
|---|
| 35 |
die " you must assign a parent_fasta!\n" |
|---|
| 36 |
unless defined($self->parent_fasta); |
|---|
| 37 |
|
|---|
| 38 |
die " you must assign a chunk_size!\n" |
|---|
| 39 |
unless defined($self->chunk_size); |
|---|
| 40 |
|
|---|
| 41 |
die " chunk_size must be greater than 0!\n" |
|---|
| 42 |
unless $self->chunk_size > 0; |
|---|
| 43 |
|
|---|
| 44 |
my $parent_def = Fasta::getDef($self->parent_fasta); |
|---|
| 45 |
my $parent_seq = Fasta::getSeq($self->parent_fasta); |
|---|
| 46 |
|
|---|
| 47 |
$self->parent_seq_length(length($parent_seq)); |
|---|
| 48 |
my $fasta = ''; |
|---|
| 49 |
|
|---|
| 50 |
my $t_c = ceil($self->parent_seq_length/$self->chunk_size); |
|---|
| 51 |
|
|---|
| 52 |
$self->total_chunks($t_c); |
|---|
| 53 |
my $l = $self->chunk_size(); |
|---|
| 54 |
|
|---|
| 55 |
$self->{INDEX} = 0; |
|---|
| 56 |
|
|---|
| 57 |
my $c = 0; |
|---|
| 58 |
for (my $i=0; $i< $self->parent_seq_length; $i+=$l){ |
|---|
| 59 |
my $def = $parent_def. " CHUNK number:$c size:$l offset:$i"; |
|---|
| 60 |
|
|---|
| 61 |
my $is_last = ($t_c - 1 == $c) ? 1 : 0; |
|---|
| 62 |
|
|---|
| 63 |
my $chunk = new FastaChunk(); |
|---|
| 64 |
$chunk->seq(substr($parent_seq, $i, $l)); |
|---|
| 65 |
$chunk->def($def); |
|---|
| 66 |
$chunk->parent_def($parent_def); |
|---|
| 67 |
$chunk->seqid(Fasta::def2SeqID($parent_def)); |
|---|
| 68 |
$chunk->size($l); |
|---|
| 69 |
$chunk->length(length($chunk->seq())); |
|---|
| 70 |
$chunk->offset($i); |
|---|
| 71 |
$chunk->number($c); |
|---|
| 72 |
$chunk->is_last($is_last); |
|---|
| 73 |
$chunk->parent_seq_length($self->parent_seq_length()); |
|---|
| 74 |
|
|---|
| 75 |
if($chunk->length > $self->min_size || $chunk->number == 0){ |
|---|
| 76 |
$self->add_chunk($chunk); |
|---|
| 77 |
} |
|---|
| 78 |
else{ |
|---|
| 79 |
$self->last_chunk->seq($self->last_chunk->seq . $chunk->seq); |
|---|
| 80 |
$self->last_chunk->length($self->last_chunk->length + $chunk->length); |
|---|
| 81 |
$self->last_chunk->is_last(1); |
|---|
| 82 |
$self->total_chunks($t_c - 1); |
|---|
| 83 |
} |
|---|
| 84 |
$c++; |
|---|
| 85 |
} |
|---|
| 86 |
|
|---|
| 87 |
} |
|---|
| 88 |
|
|---|
| 89 |
sub get_chunk { |
|---|
| 90 |
my $self = shift; |
|---|
| 91 |
my $i = shift; |
|---|
| 92 |
|
|---|
| 93 |
return $self->{chunks}->[$i]; |
|---|
| 94 |
} |
|---|
| 95 |
|
|---|
| 96 |
sub next_chunk { |
|---|
| 97 |
my $self = shift; |
|---|
| 98 |
my $i = $self->{INDEX}++; |
|---|
| 99 |
|
|---|
| 100 |
return $self->{chunks}->[$i] || undef; |
|---|
| 101 |
} |
|---|
| 102 |
|
|---|
| 103 |
sub last_chunk { |
|---|
| 104 |
my $self = shift; |
|---|
| 105 |
|
|---|
| 106 |
return $self->{chunks}->[-1]; |
|---|
| 107 |
} |
|---|
| 108 |
|
|---|
| 109 |
sub add_chunk { |
|---|
| 110 |
my $self = shift; |
|---|
| 111 |
my $chunk = shift; |
|---|
| 112 |
|
|---|
| 113 |
push(@{$self->{chunks}}, $chunk); |
|---|
| 114 |
} |
|---|
| 115 |
|
|---|
| 116 |
|
|---|
| 117 |
|
|---|
| 118 |
|
|---|
| 119 |
|
|---|
| 120 |
|
|---|
| 121 |
|
|---|
| 122 |
sub AUTOLOAD { |
|---|
| 123 |
my ($self, $arg) = @_; |
|---|
| 124 |
|
|---|
| 125 |
my $caller = caller(); |
|---|
| 126 |
use vars qw($AUTOLOAD); |
|---|
| 127 |
my ($call) = $AUTOLOAD =~/.*\:\:(\w+)$/; |
|---|
| 128 |
$call =~/DESTROY/ && return; |
|---|
| 129 |
|
|---|
| 130 |
|
|---|
| 131 |
|
|---|
| 132 |
|
|---|
| 133 |
|
|---|
| 134 |
if (defined($arg)){ |
|---|
| 135 |
$self->{$call} = $arg; |
|---|
| 136 |
} |
|---|
| 137 |
else { |
|---|
| 138 |
return $self->{$call}; |
|---|
| 139 |
} |
|---|
| 140 |
} |
|---|
| 141 |
|
|---|
| 142 |
1; |
|---|
| 143 |
|
|---|
| 144 |
|
|---|