root/bin/iprscan_batch

Revision 213, 3.6 kB (checked in by bmoore, 7 months ago)

minor changes to iprscan_batch and fasta_merge

  • Property svn:executable set to *
Line 
1 #! /usr/bin/perl -w
2 use strict;
3 use FindBin;
4 use lib "$FindBin::Bin/../lib";
5 use Iterator::Fasta;
6 use Fasta;
7 use Datastore::MD5;
8 use Cwd;
9 use threads;
10 use threads::shared;
11 use Thread::Semaphore;
12
13 my $usage = "
14 Usage:
15
16      iprscan_batch <file_name> <cpus> <log_file>
17
18      Runs iprscan on the given input file.  Output goes into a datastore.
19
20      The cpus option is optional for multi-threading.
21
22      The log file is optional.  The optional log file is not for creating that log
23      file but rather to parse an existing log file.  iprscan_batch will then only
24      re-run jubs that the log file says are not yet finished.
25
26
27 ";
28
29 my $file = shift;
30 my $cpus = shift || 1;
31 my $log  = shift;
32
33 if(! $file){
34     print $usage;
35     exit;
36 }
37
38 my @files :shared;
39 my @failed :shared;
40 my @finished :shared;
41 my $go :shared;
42 $go = 1;
43
44 my %log_f;
45 if($log){
46     open(LOG, "< $log");
47     while(defined(my $line = <LOG>)){
48         chomp $line;
49         my @F = split(/[\s\t]/, $line);
50         if($F[0] eq 'FINISHED'){
51             $log_f{$F[1]}++;
52         }
53     }
54 }
55
56
57 my $s = new Thread::Semaphore;
58 my $cwd = Cwd::cwd;
59 my $log_file = "$cwd/$file\_master_datastore.index";
60 open(LOG, "> $log_file");
61 close(LOG);
62
63 my @threads;
64 for(my $i = 0; $i < $cpus; $i++){
65     my $thr = threads->create(\&launch);
66     push(@threads, $thr);
67 }
68
69 my $iterator = new Iterator::Fasta($file);
70 my $DS = new Datastore::MD5('root' => "$cwd/$file\_datastore",
71                             'depth' => 2
72                             );
73
74 while (my $fasta = $iterator->nextEntry){
75     my $seq_id = Fasta::getSeqID(\$fasta);
76     my $safe_id = Fasta::seqID2SafeID($seq_id);
77     my $seq = Fasta::getSeq(\$fasta);
78
79     my $dir = $DS->id_to_dir($safe_id);
80     $DS->mkdir($safe_id) || die "ERROR: could not make datastore directory\n";
81
82     my $fasta_file = "$dir/$safe_id.fasta";
83
84     if($log && ! $log_f{"$dir/$safe_id.fasta"} && -e "$dir/$safe_id.fasta"){
85         system("rm $dir/*;");
86     }
87
88     if(-e $fasta_file){
89         my $it = new Iterator::Fasta($fasta_file);
90         my $fa = $it->nextEntry;
91         my $seq_old = Fasta::getSeq(\$fa);
92
93         if($seq ne $seq_old){
94             system("rm $fasta_file*");
95         }
96     }
97
98     open(OUT, "> $fasta_file");
99     print OUT $fasta;
100     close(OUT);
101
102     push(@files, $fasta_file);
103 }
104
105 $go = 0;
106
107 my $count = @files;
108 my %seen;
109 while(my $thr = shift @threads){
110     if($thr->is_running){
111         push(@threads, $thr);
112         sleep 1;
113     }
114     else{
115         $thr->join();
116     }
117
118     while(my $f = shift @finished){
119         open(LOG, ">> $log_file");
120         print LOG "FINISHED $f\n";
121         close(LOG);
122         $count--;
123     }
124
125     $s->down;
126     for(my $i = 0; $i < @failed; $i++){
127         if(defined $failed[$i]){
128             if(exists $seen{$failed[$i]}){
129                 $failed[$i] = undef;
130                 $count--;
131             }
132             else{
133                 $seen{$failed[$i]}++;
134             }
135         }
136     }
137     $s->up;
138 }
139
140 #-------------SUBS--------
141
142 sub launch{
143     while ($go || @files || @failed){
144         if (my $f = shift @files){
145             if(-e "$f.error"){
146                 my $check = `ls -al $f.error`;
147                 my @data = split(/[\s\t]/, $check);
148                 if($data[4] == 36){
149                     push(@finished, $f);
150                     next;
151                 }
152             }
153
154             system("iprscan -cli -i $f -iprlookup -goterms -format raw -nocrc 1> $f.out 2> $f.error");
155             my $check = `ls -al $f.error`;
156             my @data = split(/[\s\t]/, $check);
157
158             if($data[4] == 36){
159                 push(@finished, $f);
160             }
161             else{
162                 push(@failed, $f);
163             }
164         }
165         elsif(@failed){
166             $s->down;
167             my $f = shift @failed;
168             $s->up;
169
170             next if(! defined $f);
171        
172             system("iprscan -cli -i $f -iprlookup -goterms -format raw -nocrc 1> $f.out 2> $f.error");
173             my $check = `ls -al $f.error`;
174             my @data = split(/[\s\t]/, $check);
175
176             if($data[4] == 36){
177                 push(@finished, $f);
178             }
179             else{
180                 push(@failed, $f);
181             }
182         }
183         else{
184             sleep 1;
185         }
186     }
187 }
Note: See TracBrowser for help on using the browser.