root/bin/gff3_2_gtf

Revision 222, 3.2 kB (checked in by cholt, 5 months ago)

update maker

  • Property svn:executable set to *
Line 
1 #! /usr/bin/perl -w
2 use strict;
3
4 my $file = shift;
5
6 #parse file
7 open (IN, "< $file");
8 my @genes;
9 my @mRNAs;
10 my @stuff;
11
12 while (defined(my $line = <IN>)){
13     #skip comments and fasta
14     last if($line =~ /^\#\#FASTA/);
15     next if($line =~ /^\#/);
16
17     #add contig lines to header
18     my $f = parse($line);
19     next if($f->{type} eq 'contig');
20
21     push(@genes, $f) if($f->{type} eq 'gene');
22     push(@mRNAs, $f) if($f->{type} eq 'mRNA');
23     push(@stuff, $f) if($f->{type} eq 'CDS');
24     push(@stuff, $f) if($f->{type} eq 'five_prime_UTR');
25     push(@stuff, $f) if($f->{type} eq 'three_prime_UTR');
26     push(@stuff, $f) if($f->{type} eq 'start_codon');
27     push(@stuff, $f) if($f->{type} eq 'stop_codon');
28 }
29 close(IN);
30
31 #organize transcript features
32 my %stuffinx;
33 foreach my $f (@stuff){
34     my $parent = $f->{Parent};
35     push (@{$stuffinx{$parent}}, $f);
36 }
37
38 #organize mRNAs
39 my %mRNAinx;
40 foreach my $f (@mRNAs){
41     my $parent = $f->{Parent};
42     push (@{$mRNAinx{$parent}}, $f);
43 }
44
45 foreach my $g (@genes){
46     my $gene_id = $g->{ID};
47     my $g_mRNAs = $mRNAinx{$g->{ID}};
48
49     foreach my $t (@$g_mRNAs){
50         my $transcript_id = $t->{ID};
51         my $t_stuff = $stuffinx{$t->{ID}};
52
53         my @UTRs   = grep {$_->{type} =~ /UTR$/} @$t_stuff;
54         my @CDSs   = grep {$_->{type} eq 'CDS'} @$t_stuff;
55         my @stops  = grep {$_->{type} eq 'stop_codon'} @$t_stuff;
56         my @starts = grep {$_->{type} eq 'start_codon'} @$t_stuff;
57
58         @CDSs = fix_CDS(\@CDSs,\@stops);
59        
60         print_it($gene_id, $transcript_id, @$t_stuff);
61     }
62 }
63
64 #------------------------SUBS------------------------
65 sub fix_CDS {
66     my $CDSs  = shift;
67     my $stops = shift;
68
69     my @keepers;
70     foreach my $CDS (@$CDSs){
71         my $cB = $CDS->{start};
72         my $cE = $CDS->{end};
73
74         foreach my $stop (@$stops){
75             my $sB = $stop->{start};
76             my $sE = $stop->{end};
77
78             if($cB == $sB){
79                 $CDS->{start} += 3;
80             }
81             elsif($cE == $sE){
82                 $CDS->{end} -= 3;
83             }
84         }
85        
86         push(@keepers, $CDS);
87     }
88
89     return @keepers
90 }
91
92 sub print_it {
93     my $gene_id = shift;
94     my $transcript_id = shift;
95    
96     ($transcript_id) = $transcript_id =~ /^([^\s\t\n]+)/;
97     while(my $f = shift @_){
98         $f->{type} = '5UTR' if($f->{type} eq 'five_prime_UTR');
99         $f->{type} = '3UTR' if($f->{type} eq 'three_prime_UTR');
100        
101         print $f->{seqid} . "\t";
102         print $f->{source} . "\t";
103         print $f->{type} . "\t";
104         print $f->{start} . "\t";
105         print $f->{end} . "\t";
106         print $f->{score} . "\t";
107         print $f->{strand} . "\t";
108         print $f->{phase} . "\t";
109         print "gene_id \"".$gene_id."\"\;";
110         print " transcript_id \"".$transcript_id."\"\;";
111         print "\n";
112     }
113 }
114
115 sub parse {
116     my $l = shift;
117    
118     chomp $l;
119     my @F = split("\t", $l);
120     my @att = attrib($F[8]);
121    
122     my $f = {seqid => $F[0],
123              source => $F[1],
124              type => $F[2],
125              start => $F[3],
126              end => $F[4],
127              score => $F[5],
128              strand => $F[6],
129              phase => $F[7],
130              ID => $att[0],
131              Name => $att[1],
132              Parent => $att[2]};
133    
134     return $f;
135 }
136
137 sub attrib {
138     my $att = shift;
139
140     my ($ID) = $att =~ /ID\=([^\;\n]+)/;
141     my ($name) = $att =~ /Name\=([^\;\n]+)/;
142     my ($parent) = $att =~ /Parent\=([^\;\n]+)/;   
143    
144     $ID = $name if(! defined $ID);
145     $name = $ID if(! defined $name);
146    
147     return ($ID, $name, $parent);
148 }
Note: See TracBrowser for help on using the browser.