| 1 |
#! /usr/bin/perl -w |
|---|
| 2 |
|
|---|
| 3 |
use strict; |
|---|
| 4 |
use FindBin; |
|---|
| 5 |
use lib "$FindBin::Bin/../lib"; |
|---|
| 6 |
use lib "$FindBin::Bin/../perl/lib"; |
|---|
| 7 |
|
|---|
| 8 |
use Getopt::Long; |
|---|
| 9 |
use File::Temp; |
|---|
| 10 |
use vars qw($JB $RS); |
|---|
| 11 |
|
|---|
| 12 |
BEGIN{ |
|---|
| 13 |
my $loc = `which flatfile-to-json.pl 2> /dev/null`; |
|---|
| 14 |
chomp $loc; |
|---|
| 15 |
if ($loc =~ /^no flatfile-to-json.pl/ || ! $loc) { |
|---|
| 16 |
die "FATAL: Can not find flatfile-to-json.pl\n". |
|---|
| 17 |
"Make sure JBrowse is installed and the executables are in your PATH.\n"; |
|---|
| 18 |
|
|---|
| 19 |
} |
|---|
| 20 |
else { |
|---|
| 21 |
$JB = [split("\n", $loc)]->[-1]; |
|---|
| 22 |
} |
|---|
| 23 |
|
|---|
| 24 |
$loc = `which prepare-refseqs.pl 2> /dev/null`; |
|---|
| 25 |
chomp $loc; |
|---|
| 26 |
if ($loc =~ /^no prepare-refseqs.pl/ || ! $loc) { |
|---|
| 27 |
die "FATAL: Can not find prepare-refseqs.pl\n". |
|---|
| 28 |
"Make sure JBrowse is installed and the executables are in your PATH.\n"; |
|---|
| 29 |
|
|---|
| 30 |
} |
|---|
| 31 |
else { |
|---|
| 32 |
$RS = [split("\n", $loc)]->[-1]; |
|---|
| 33 |
} |
|---|
| 34 |
} |
|---|
| 35 |
|
|---|
| 36 |
my $usage = ' |
|---|
| 37 |
USAGE: |
|---|
| 38 |
maker2jbrowse [OPTION] <gff3file1> <gff3file2> ... |
|---|
| 39 |
maker2jbrowse [OPTION] -d <datastore_index> |
|---|
| 40 |
|
|---|
| 41 |
This script takes MAKER produced GFF3 files and dumps them into a |
|---|
| 42 |
JBrowse for you using pre-configured JSON tracks. |
|---|
| 43 |
|
|---|
| 44 |
OPTIONS: |
|---|
| 45 |
ds_index|d <file> Provide MAKER produced datastore index |
|---|
| 46 |
|
|---|
| 47 |
help|? Displays this usage statement |
|---|
| 48 |
|
|---|
| 49 |
'; |
|---|
| 50 |
|
|---|
| 51 |
my $dstore; |
|---|
| 52 |
|
|---|
| 53 |
GetOptions("ds_index|d=s" => \$dstore, |
|---|
| 54 |
"help|?" => sub {print $usage; exit()} |
|---|
| 55 |
); |
|---|
| 56 |
|
|---|
| 57 |
my @files; |
|---|
| 58 |
|
|---|
| 59 |
die "ERROR: The file ds_index\'$dstore\' does not exist\n" if ($dstore && ! -e $dstore); |
|---|
| 60 |
if($dstore){ |
|---|
| 61 |
open(IN, "< $dstore"); |
|---|
| 62 |
|
|---|
| 63 |
#uniq the entries |
|---|
| 64 |
my %seen; |
|---|
| 65 |
while(my $e = <IN>){ |
|---|
| 66 |
next unless ($e =~ /FINISHED/); |
|---|
| 67 |
next if $seen{$e}; |
|---|
| 68 |
$seen{$e}++; |
|---|
| 69 |
chomp $e; |
|---|
| 70 |
my ($id, $dir, $status) = split("\t", $e); |
|---|
| 71 |
$dir =~ s/\/$//; |
|---|
| 72 |
push(@files, $dir); |
|---|
| 73 |
} |
|---|
| 74 |
|
|---|
| 75 |
foreach my $file (@files){ |
|---|
| 76 |
$file =~ /([^\/]+)$/; |
|---|
| 77 |
$file = "$file/$1.gff"; |
|---|
| 78 |
} |
|---|
| 79 |
} |
|---|
| 80 |
else{ |
|---|
| 81 |
@files = @ARGV; |
|---|
| 82 |
undef @ARGV; |
|---|
| 83 |
} |
|---|
| 84 |
|
|---|
| 85 |
if(!@files){ |
|---|
| 86 |
print $usage; |
|---|
| 87 |
exit(); |
|---|
| 88 |
} |
|---|
| 89 |
|
|---|
| 90 |
my $error; |
|---|
| 91 |
my $base = $dstore; |
|---|
| 92 |
$base =~ s/[^\/]+$// if($base); |
|---|
| 93 |
|
|---|
| 94 |
foreach my $file (@files){ |
|---|
| 95 |
if (! -f $file){ |
|---|
| 96 |
if($base && -f "$base/$file"){ |
|---|
| 97 |
$file = "$base/$file"; |
|---|
| 98 |
next; |
|---|
| 99 |
} |
|---|
| 100 |
|
|---|
| 101 |
$error .= "ERROR: The GFF3 file \'$file\' does not exist\n"; |
|---|
| 102 |
} |
|---|
| 103 |
} |
|---|
| 104 |
die $error if $error; |
|---|
| 105 |
|
|---|
| 106 |
#--build command lines |
|---|
| 107 |
my %commands; |
|---|
| 108 |
|
|---|
| 109 |
#MAKER anotations |
|---|
| 110 |
$commands{gene} = ' --tracklabel "Genes" --key "Genes" --getType --getLabel --autocomplete label --cssclass feature5 --type gene'; |
|---|
| 111 |
$commands{maker} = ' --tracklabel "Transcripts" --key "Transcripts" --getType --getSubs --getLabel --autocomplete label --cssclass transcript --subfeatureClasses \'{"exon": "transcript-exon", "CDS": "transcript-CDS", "UTR": "transcript-UTR"}\' --arrowheadClass transcript-arrowhead --type mRNA'; |
|---|
| 112 |
|
|---|
| 113 |
#ab initio gene predictions |
|---|
| 114 |
$commands{snap} = ' --tracklabel "SNAP" --key "SNAP" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon2"}\' --arrowheadClass transcript-arrowhead --type match:snap'; |
|---|
| 115 |
$commands{snap_masked} = ' --tracklabel "SNAP" --key "SNAP" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon2"}\' --arrowheadClass transcript-arrowhead --type match:snap_masked'; |
|---|
| 116 |
$commands{augustus} = ' --tracklabel "Augustus" --key "Augustus" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon3"}\' --arrowheadClass transcript-arrowhead --type match:augustus'; |
|---|
| 117 |
$commands{augustus_masked} = ' --tracklabel "Augustus" --key "Augustus" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon3"}\' --arrowheadClass transcript-arrowhead --type match:augustus_masked'; |
|---|
| 118 |
$commands{genemark} = ' --tracklabel "GeneMark" --key "GeneMark" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon4"}\' --arrowheadClass transcript-arrowhead --type match:genemark'; |
|---|
| 119 |
$commands{genemark_masked} = ' --tracklabel "GeneMark" --key "GeneMark" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon4"}\' --arrowheadClass transcript-arrowhead --type match:genemark_masked'; |
|---|
| 120 |
$commands{fgenesh} = ' --tracklabel "FGENESH" --key "FGENESH" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon5"}\' --arrowheadClass transcript-arrowhead --type match:fgenesh'; |
|---|
| 121 |
$commands{fgenesh_masked} = ' --tracklabel "FGENESH" --key "FGENESH" --getType --getSubs --getLabel --cssclass transcript --subfeatureClasses \'{"match_part": "transcript-exon5"}\' --arrowheadClass transcript-arrowhead --type match:fgenesh_masked'; |
|---|
| 122 |
|
|---|
| 123 |
#evidence alignments |
|---|
| 124 |
$commands{blastn} = ' --tracklabel "BLASTN" --key "BLASTN" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part4"}\' --type expressed_sequence_match:blastn'; |
|---|
| 125 |
$commands{blastx} = ' --tracklabel "BLASTX" --key "BLASTX" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part5"}\' --type protein_match:blastx'; |
|---|
| 126 |
$commands{tblastx} = ' --tracklabel "TBLASTX" --key "TBLASTX" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part6"}\' --type expressed_sequence_match:tblastx'; |
|---|
| 127 |
$commands{est2genome} = ' --tracklabel "est2genome" --key "est2genome" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part3"}\' --type expressed_sequence_match:est2genome'; |
|---|
| 128 |
$commands{protein2genome} = ' --tracklabel "protein2genome" --key "protein2genome" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part2"}\' --type protein_match:protein2genome'; |
|---|
| 129 |
|
|---|
| 130 |
#repeats |
|---|
| 131 |
$commands{repeatmasker} = ' --tracklabel "RepeatMasker" --key "RepeatMasker" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part7"}\' --type match:repeatmasker'; |
|---|
| 132 |
$commands{'blastx:repeatmask'} = ' --tracklabel "RepeatRunner" --key "RepeatRunner" --getType --getSubs --cssclass generic_parent --subfeatureClasses \'{"match_part": "match_part7"}\' --type protein_match:blastx:repeatmask'; |
|---|
| 133 |
|
|---|
| 134 |
|
|---|
| 135 |
foreach my $file (@files){ |
|---|
| 136 |
my $gff; |
|---|
| 137 |
my $fasta; |
|---|
| 138 |
my %tracks; |
|---|
| 139 |
|
|---|
| 140 |
open(IN, "< $file"); |
|---|
| 141 |
my $ff; #fasta flag |
|---|
| 142 |
while(defined(my $line = <IN>)){ |
|---|
| 143 |
if($ff){ |
|---|
| 144 |
$fasta .= $line; |
|---|
| 145 |
next; |
|---|
| 146 |
} |
|---|
| 147 |
elsif($line =~ /^\#\#FASTA/){ |
|---|
| 148 |
$ff = 1; |
|---|
| 149 |
next; |
|---|
| 150 |
} |
|---|
| 151 |
elsif($line =~ /[^\t]*\t[^\t]*\tcontig\t/){ |
|---|
| 152 |
next; #skip contig line |
|---|
| 153 |
} |
|---|
| 154 |
else{ |
|---|
| 155 |
if($line !~ /^\#/ && $line =~ /[^\t]*\t([^\t]*)\t/){ |
|---|
| 156 |
$tracks{$1}++; |
|---|
| 157 |
$tracks{gene}++ if($1 eq 'maker'); #add gene locus track |
|---|
| 158 |
} |
|---|
| 159 |
|
|---|
| 160 |
$gff .= $line; |
|---|
| 161 |
next; |
|---|
| 162 |
} |
|---|
| 163 |
} |
|---|
| 164 |
close(IN); |
|---|
| 165 |
|
|---|
| 166 |
if($fasta){ |
|---|
| 167 |
my ($fh, $fname) = File::Temp::tempfile(); |
|---|
| 168 |
print $fh $fasta; |
|---|
| 169 |
close($fh); |
|---|
| 170 |
|
|---|
| 171 |
my $command = $RS; |
|---|
| 172 |
$command .= " --fasta $fname"; |
|---|
| 173 |
|
|---|
| 174 |
system ($command); |
|---|
| 175 |
unlink($fname); |
|---|
| 176 |
} |
|---|
| 177 |
|
|---|
| 178 |
if($gff){ |
|---|
| 179 |
my ($fh, $fname) = File::Temp::tempfile(); |
|---|
| 180 |
print $fh $gff; |
|---|
| 181 |
close($fh); |
|---|
| 182 |
|
|---|
| 183 |
while(my $track = each %tracks){ |
|---|
| 184 |
my $command = $JB; |
|---|
| 185 |
|
|---|
| 186 |
die "ERROR: No track information for source \'$track\'\n\n" if(! exists $commands{$track}); |
|---|
| 187 |
|
|---|
| 188 |
$command .= $commands{$track}; |
|---|
| 189 |
$command .= " --gff $fname"; |
|---|
| 190 |
|
|---|
| 191 |
system ($command); |
|---|
| 192 |
} |
|---|
| 193 |
|
|---|
| 194 |
unlink($fname); |
|---|
| 195 |
} |
|---|
| 196 |
} |
|---|