root/bin/maker2chado

Revision 267, 8.8 kB (checked in by cholt, 2 months ago)

small changes to make maker2chado fail when no chado

  • Property svn:executable set to *
Line 
1 #! /usr/bin/perl -w
2
3 use strict;
4 use FindBin;
5 use lib "$FindBin::Bin/../lib";
6 use lib "$FindBin::Bin/../perl/lib";
7
8 use DBI;
9 use Getopt::Long;
10 use File::Temp;
11 use vars qw($DBH $GMOD);
12
13 BEGIN{
14     #what to do on ^C
15     $SIG{'INT'} = sub {
16         $DBH->disconnect;
17         exit (1);
18     };
19
20     my $loc = `which gmod_bulk_load_gff3.pl 2> /dev/null`;
21     chomp $loc;
22     if ($loc =~ /^no gmod_bulk_load_gff3\.pl/ || ! $loc) {
23         die "FATAL: Can not find gmod_bulk_load_gff3.pl\n".
24             "Make sure CHADO is installed and the executables are in your PATH.\n";
25
26     }
27     else {
28         $GMOD = [split("\n", $loc)]->[-1];
29     }
30 }
31
32 use IO::Prompt;
33 use DBD::Pg;
34
35 my $usage = '
36 USAGE:
37      maker2chado [OPTION] <database_name> <gff3file1> <gff3file2> ...
38      maker2chado [OPTION] -d <datastore_index> <database_name>
39
40      This script takes MAKER produced GFF3 files and dumps them into a
41      CHADO database.  You must set the database up first according to
42      CHADO installation instructions.  CHADO provides its own methods
43      for loading GFF3, but this script makes it easier for MAKER
44      specific data.  You can either provide the datastore index file
45      produced by MAKER to the script or add the GFF3 files as command
46      line arguments.
47
48
49 OPTIONS:
50      ds_index|d  <file>  Provide MAKER produced datastore index
51
52      host|h      <host>  Database host address
53
54      port|p      <port>  Database port
55
56      username|U  <user>  Username to access database
57
58      password|W          Promp for password to access database
59
60      organism|o  <name>  Organism in CHADO database to add to
61
62      jobid|j     <id>    JobID (only for MAKER web server use)
63
64      help|?              Displays this usage statement
65
66
67 ';
68
69 my $host = '';
70 my $user = '';
71 my $pass = '';
72 my $organism;
73 my $port;
74 my $dstore;
75 my $jid;
76
77 GetOptions("host|h=s" => \$host,
78            "port|p=i" => \$port,
79            "username|U=s" => \$user,
80            "password|W" => sub{$pass = prompt("Please enter your password: ", -echo => '*')},
81            "organism|o=s" => \$organism,
82            "jobid|j=i" => \$jid,
83            "ds_index|d=s" => \$dstore,
84            "help|?" => sub {print $usage; exit()}
85            );
86
87 my $dbname = shift;
88 my @files;
89
90 die "ERROR: The file ds_index\'$dstore\' does not exist\n" if ($dstore && ! -e $dstore);
91 if($dstore){
92     open(IN, "< $dstore");
93
94     #uniq the entries
95     my %seen;
96     while(my $e = <IN>){
97         next unless ($e =~ /FINISHED/);
98         next if $seen{$e};
99         $seen{$e}++;
100         chomp $e;
101         my ($id, $dir, $status) = split("\t", $e);
102         $dir =~ s/\/$//;
103         push(@files, $dir);
104     }
105
106     foreach my $file (@files){
107         $file =~ /([^\/]+)$/;
108         $file = "$file/$1.gff";
109     }
110 }
111 else{
112     @files = @ARGV;
113     undef @ARGV;
114 }
115
116 if(!$dbname || ! @files){
117     print $usage;
118     exit();
119 }
120
121 my $error;
122 my $base = $dstore;
123 $base =~ s/[^\/]+$// if($base);
124
125 foreach my $file (@files){
126     if (! -f $file){
127         if($base && -f "$base/$file"){
128             $file = "$base/$file";
129             next;
130         }
131
132         $error .= "ERROR: The GFF3 file \'$file\' does not exist\n";
133     }
134 }
135 die $error if $error;
136
137 $dbname .= "\@$host" if($host);
138 $dbname .= "\:$port" if($port);
139
140
141 $DBH = DBI->connect("dbi:Pg:dbname=$dbname",$user,$pass,{AutoCommit => 0}) or die $DBI::errstr;
142 my $o_list = $DBH->selectcol_arrayref(qq{SELECT abbreviation FROM organism});
143 my $c_list = $DBH->selectcol_arrayref(qq{SELECT common_name FROM organism});
144
145 warn "WARNING: This does not seem to be a chado database\n\n" if(! @$o_list || ! @$c_list);
146
147 #check user defined organism
148 unless(! $organism || grep {$_ eq $organism} (@$o_list, @$c_list)){
149     warn "WARNING: The organism you specified \'$organism\' does not exist in the database\n\n";
150     undef $organism;
151 }
152
153 #add maker jobids automatically to the database
154 if($jid){
155     $organism = $jid;
156     if(! @{[grep {$_ eq $organism} (@$o_list, @$c_list)]}){
157         $DBH->do(qq{ INSERT INTO organism (abbreviation, genus, species, common_name) VALUES ('$jid', 'JOB', '$jid', '$jid') });
158         $DBH->commit;
159         $o_list = $DBH->selectcol_arrayref(qq{SELECT abbreviation FROM organism});
160         $c_list = $DBH->selectcol_arrayref(qq{SELECT common_name FROM organism});
161     }
162 }
163
164 #promt user to select organism
165 if(!$organism || ! @{[grep {$_ eq $organism} (@$o_list, @$c_list)]}){
166     die "FATAL: This script must be run interactively if no valid organism is supplied\n\n" if(! -t);
167
168     for(my $i = 0; $i <= abs(@$c_list/10); $i ++){
169         my @menu;
170         for(my $j = $i*10; $j < $i*10+10; $j++){
171             push(@menu, $c_list->[$j]) if(exists $c_list->[$j]);
172         }
173        
174         push(@menu, "Show more organisms -->") if($i + 1 < abs(@$c_list/10));
175         push(@menu, "<-- Previous list") if($i > 0);
176         push(@menu, "<Add a new organism to the database>");
177         push(@menu, "<Remove an organism from the database>");
178         push(@menu, "<Quit>");
179
180         system("clear");
181         $organism = prompt ("Please select an existing organism from the database or add a new one: ",
182                             -menu => \@menu
183                            );
184        
185         if($organism eq 'Show more organisms -->'){
186             undef $organism;
187         }
188         elsif($organism eq '<-- Previous list'){
189             $i -= 2;
190             undef $organism;
191         }
192         elsif($organism eq '<Add a new organism to the database>'){
193             my $ab = '';
194             my $gn = '';
195             my $sp = '';
196             my $cn = '';
197
198             while(1){
199                 system("clear");
200                 do{
201                     $ab = prompt ("Enter abbreviation, (i.e. H.sapiens) [$ab]: ", -default => $ab);
202                     $ab = $ab->{value};
203                 }while(!$ab);
204                 do{
205                     $gn = prompt ("Enter genus, (i.e. Homo) [$gn]: ", -default => $gn);
206                     $gn = $gn->{value};
207                 }while(!$gn);
208                 do{
209                     $sp = prompt ("Enter species, (i.e. sapiens) [$sp]: ", -default => $sp);
210                     $sp = $sp->{value};
211                 }while(!$sp);
212                 do{
213                     $cn = prompt ("Enter common name, (i.e. human) [$cn]: ", -default => $cn);
214                     $cn = $cn->{value};
215                 }while(!$cn);
216                
217                 system("clear");
218                 last if(prompt ("Abbreviation: $ab\n".
219                                 "Genus: $gn\n".
220                                 "Species: $sp\n".
221                                 "Common Name: $cn\n\n".
222                                 "Is this correct?",
223                                 -yes_no
224                                 )
225                         );
226             }
227
228             $DBH->do(qq{ INSERT INTO organism (abbreviation, genus, species, common_name) VALUES ('$ab', '$gn', '$sp', '$cn') });
229             $DBH->commit;
230             $o_list = $DBH->selectcol_arrayref(qq{SELECT abbreviation FROM organism});
231             $c_list = $DBH->selectcol_arrayref(qq{SELECT common_name FROM organism});
232             $i = -1;
233             next;
234         }
235         elsif($organism eq '<Remove an organism from the database>'){
236             remove_db_menu();
237             $i = -1;
238         }
239         elsif($organism eq '<Quit>'){
240             warn "\n\nWARNING: The user exited the program without doing anything\n\n";
241             $DBH->disconnect;
242             exit;
243         }
244         else{
245             last;
246         }
247     }
248 }
249
250 $DBH->disconnect;
251
252
253 #build command line
254 $GMOD .= " --dbhost $host" if($host);
255 $GMOD .= " --dbport $port" if($port);
256 $GMOD .= " --dbuser $user" if($user);
257 $GMOD .= " --dbpass $pass" if($pass);
258 $GMOD .= " --dbname $dbname";
259 $GMOD .= " --organism $organism";
260 $GMOD .= " --skip_vacuum";
261 $GMOD .= " --recreate_cache";
262
263 foreach my $file (@files){
264     my $annotations;
265     my $analysis;
266     my $fasta;
267
268     open(IN, "< $file");
269     my $ff; #fasta flag
270     while(defined(my $line = <IN>)){
271         if($ff){
272             $fasta .= $line;
273             next;
274         }
275         elsif($line =~ /^\#\#FASTA/){
276             $ff = 1;
277             $fasta .= $line;
278             next;
279         }
280         elsif($line =~ /^\#/){
281             next;
282         }
283         elsif($line =~ /\tmaker\t|\tcontig\t/){
284             $annotations .= $line;
285             next;
286         }
287         else{
288             $analysis .= $line;
289             next;
290         }
291     }
292     close(IN);
293
294     if($annotations){
295         my ($fh, $fname) = File::Temp::tempfile();
296         print $fh "\#\#gff-version 3\n";
297         print $fh $annotations;
298         print $fh $fasta;
299         close($fh);
300
301         my $command = $GMOD;
302         $command .= " --gfffile $fname";
303         $command .= " --noexon";
304
305         system ($command);
306         unlink($fname);
307     }
308
309     if($analysis){
310         my ($fh, $fname) = File::Temp::tempfile();
311         print $fh "\#\#gff-version 3\n";
312         print $fh $analysis;
313         print $fh $fasta;
314         close($fh);
315
316         my $command = $GMOD;
317         $command .= " --gfffile $fname";
318         $command .= " --analysis";
319
320         system ($command);
321         unlink($fname);
322     }
323 }
324
325
326 #-------subs--------
327 sub remove_db_menu{
328     for(my $i = 0; $i < abs(@$c_list/10); $i ++){
329         my @menu;
330         for(my $j = $i*10; $j < $i*10+10; $j++){
331             push(@menu, $c_list->[$j]) if(exists $c_list->[$j]);
332         }
333        
334         push(@menu, "Show more organisms -->") if($i + 1 < abs(@$c_list/10));
335         push(@menu, "<-- Previous list") if($i > 0);
336         push(@menu, "<Go back to main menu>");
337
338         my $selection = prompt ("Please select an organism to remove from the database: ",
339                                 -menu => \@menu
340                                 );
341        
342         if($selection eq 'Show more organisms -->'){
343             undef $selection;
344         }
345         elsif($selection eq '<-- Previous list'){
346             $i -= 2;
347             undef $selection;
348         }
349         elsif($selection eq '<Go back to main menu>'){
350             return;
351         }
352         else{
353             if(prompt ("Are you sure you want to delete \'$selection\' from the database?", -yes_no)){
354                 $DBH->do(qq{ DELETE FROM organism WHERE (abbreviation = '$selection') });
355                 $DBH->commit;
356                 $o_list = $DBH->selectcol_arrayref(qq{SELECT abbreviation FROM organism});
357                 $c_list = $DBH->selectcol_arrayref(qq{SELECT common_name FROM organism});
358
359                 return;
360             }
361             else{
362                 $i--;
363             }
364         }
365     }
366 }
Note: See TracBrowser for help on using the browser.