root/bin/maker2jbrowse

Revision 263, 7.1 kB (checked in by cholt, 2 months ago)

include JBrowse configuration files

  • Property svn:executable set to *
Line 
1 #! /usr/bin/perl -w
2
3 use strict;
4 use FindBin;
5 use lib "$FindBin::Bin/../lib";
6 use lib "$FindBin::Bin/../perl/lib";
7
8 use Getopt::Long;
9 use File::Temp;
10 use vars qw($JB $RS);
11
12 BEGIN{
13     my $loc = `which flatfile-to-json.pl 2> /dev/null`;
14     chomp $loc;
15     if ($loc =~ /^no flatfile-to-json.pl/ || ! $loc) {
16         die "FATAL: Can not find flatfile-to-json.pl\n".
17             "Make sure JBrowse is installed and the executables are in your PATH.\n";
18
19     }
20     else {
21         $JB = [split("\n", $loc)]->[-1];
22     }
23
24     $loc = `which prepare-refseqs.pl 2> /dev/null`;
25     chomp $loc;
26     if ($loc =~ /^no prepare-refseqs.pl/ || ! $loc) {
27         die "FATAL: Can not find prepare-refseqs.pl\n".
28             "Make sure JBrowse is installed and the executables are in your PATH.\n";
29
30     }
31     else {
32         $RS = [split("\n", $loc)]->[-1];
33     }
34 }
35
36 my $usage = '
37 USAGE:
38      maker2jbrowse [OPTION] <gff3file1> <gff3file2> ...
39      maker2jbrowse [OPTION] -d <datastore_index>
40
41      This script takes MAKER produced GFF3 files and dumps them into a
42      JBrowse for you using pre-configured JSON tracks.
43
44 OPTIONS:
45      ds_index|d  <file>  Provide MAKER produced datastore index
46
47      help|?              Displays this usage statement
48
49 ';
50
51 my $dstore;
52
53 GetOptions("ds_index|d=s" => \$dstore,
54            "help|?" => sub {print $usage; exit()}
55            );
56
57 my @files;
58
59 die "ERROR: The file ds_index\'$dstore\' does not exist\n" if ($dstore && ! -e $dstore);
60 if($dstore){
61     open(IN, "< $dstore");
62
63     #uniq the entries
64     my %seen;
65     while(my $e = <IN>){
66         next unless ($e =~ /FINISHED/);
67         next if $seen{$e};
68         $seen{$e}++;
69         chomp $e;
70         my ($id, $dir, $status) = split("\t", $e);
71         $dir =~ s/\/$//;
72         push(@files, $dir);
73     }
74
75     foreach my $file (@files){
76         $file =~ /([^\/]+)$/;
77         $file = "$file/$1.gff";
78     }
79 }
80 else{
81     @files = @ARGV;
82     undef @ARGV;
83 }
84
85 if(!@files){
86     print $usage;
87     exit();
88 }
89
90 my $error;
91 my $base = $dstore;
92 $base =~ s/[^\/]+$// if($base);
93
94 foreach my $file (@files){
95     if (! -f $file){
96         if($base && -f "$base/$file"){
97             $file = "$base/$file";
98             next;
99         }
100
101         $error .= "ERROR: The GFF3 file \'$file\' does not exist\n";
102     }
103 }
104 die $error if $error;
105
106 #--build command lines
107 my %commands;
108
109 #MAKER anotations
110 $commands{gene}           = ' --tracklabel "Genes" --key "Genes" --getType --getLabel --autocomplete label --cssclass feature5 --type gene';
111 $commands{maker}          = ' --tracklabel "Transcripts" --key "Transcripts" --getType --getSubs --getLabel --autocomplete label --cssclass transcript --subfeatureClasses \'{"exon": "transcript-exon", "CDS": "transcript-CDS", "UTR": "transcript-UTR"}\' --arrowheadClass transcript-arrowhead --type mRNA';
112
113 #ab initio gene predictions
114 $commands{snap}           = ' --tracklabel "SNAP" --key "SNAP" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon2"}\' --arrowheadClass transcript-arrowhead --type match:snap';
115 $commands{snap_masked}    = ' --tracklabel "SNAP" --key "SNAP" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon2"}\' --arrowheadClass transcript-arrowhead --type match:snap_masked';
116 $commands{augustus}       = ' --tracklabel "Augustus" --key "Augustus" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon3"}\' --arrowheadClass transcript-arrowhead --type match:augustus';
117 $commands{augustus_masked} = ' --tracklabel "Augustus" --key "Augustus" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon3"}\' --arrowheadClass transcript-arrowhead --type match:augustus_masked';
118 $commands{genemark}       = ' --tracklabel "GeneMark" --key "GeneMark" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon4"}\' --arrowheadClass transcript-arrowhead --type match:genemark';
119 $commands{genemark_masked} = ' --tracklabel "GeneMark" --key "GeneMark" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon4"}\' --arrowheadClass transcript-arrowhead --type match:genemark_masked';
120 $commands{fgenesh}        = ' --tracklabel "FGENESH" --key "FGENESH" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon5"}\' --arrowheadClass transcript-arrowhead --type match:fgenesh';
121 $commands{fgenesh_masked} = ' --tracklabel "FGENESH" --key "FGENESH" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon5"}\' --arrowheadClass transcript-arrowhead --type match:fgenesh_masked';
122
123 #evidence alignments
124 $commands{blastn}         = ' --tracklabel "BLASTN" --key "BLASTN" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part4"}\' --type expressed_sequence_match:blastn';
125 $commands{blastx}         = ' --tracklabel "BLASTX" --key "BLASTX" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part5"}\' --type protein_match:blastx';
126 $commands{tblastx}        = ' --tracklabel "TBLASTX" --key "TBLASTX" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part6"}\' --type expressed_sequence_match:tblastx';
127 $commands{est2genome}     = ' --tracklabel "est2genome" --key "est2genome" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part3"}\' --type expressed_sequence_match:est2genome';
128 $commands{protein2genome} = ' --tracklabel "protein2genome" --key "protein2genome" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part2"}\' --type protein_match:protein2genome';
129
130 #repeats
131 $commands{repeatmasker}   = ' --tracklabel "RepeatMasker" --key "RepeatMasker" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part7"}\' --type match:repeatmasker';
132 $commands{'blastx:repeatmask'}  = ' --tracklabel "RepeatRunner" --key "RepeatRunner" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part7"}\' --type protein_match:blastx:repeatmask';
133
134
135 foreach my $file (@files){
136     my $gff;
137     my $fasta;
138     my %tracks;
139
140     open(IN, "< $file");
141     my $ff; #fasta flag
142     while(defined(my $line = <IN>)){
143         if($ff){
144             $fasta .= $line;
145             next;
146         }
147         elsif($line =~ /^\#\#FASTA/){
148             $ff = 1;
149             next;
150         }
151         elsif($line =~ /[^\t]*\t[^\t]*\tcontig\t/){
152             next; #skip contig line
153         }
154         else{
155             if($line !~ /^\#/ && $line =~ /[^\t]*\t([^\t]*)\t/){
156                 $tracks{$1}++;
157                 $tracks{gene}++ if($1 eq 'maker'); #add gene locus track
158             }
159
160             $gff .= $line;
161             next;
162         }
163     }
164     close(IN);
165
166     if($fasta){
167         my ($fh, $fname) = File::Temp::tempfile();
168         print $fh $fasta;
169         close($fh);
170
171         my $command = $RS;
172         $command .= " --fasta $fname";
173
174         system ($command);
175         unlink($fname);
176     }
177
178     if($gff){
179         my ($fh, $fname) = File::Temp::tempfile();
180         print $fh $gff;
181         close($fh);
182
183         while(my $track = each %tracks){
184             my $command = $JB;
185
186             die "ERROR: No track information for source \'$track\'\n\n" if(! exists $commands{$track});
187
188             $command .= $commands{$track};
189             $command .= " --gff $fname";
190
191             system ($command);
192         }
193
194         unlink($fname);
195     }
196 }
Note: See TracBrowser for help on using the browser.