Vulgar2GFF program


#!/usr/bin/perl -w

use strict;

# Program that reads vulgar line from Exonerate output and translate it to GFF format with the correct annotations
# This program is not yet correct. We were not able to set it up before load the web site to the server. We've got problems on end and ori annotations. 

my ($gene,@f,@GFF,@FIRST,$ori,$end,$ori1,$end1);

$gene = 0;

while (<>) {
    next unless /^vulgar/o;
    $gene++;
    chomp;
    @f = split /\s+/, $_;
    @GFF = ();		# exons excepting the inicial one
    @FIRST = ();		# includes the first exon that has different treatment 
    $ori = $f[6] + 1;
    $end = $ori + $f[12] - 1;
    push @FIRST, $ori, $end;
    $ori1 = $ori;
   for (my $i = 13; $i < scalar(@f); $i+=3) {
#	$f[$i] eq "M" && $f[($i-2)] eq "[+-]" && do {
#	  $end = $ori + $f[($i+2)] - 1;
#	  push @FIRST, $ori, $end;
#	  $ori = $end;
#	};
	

	$f[$i] eq "M" && do {
	    $end1 = $ori1 + $f[($i+2)];
	    
	    $f[($i-3)] eq "S" && do {		# on vulgar line "S" indicates split codon
		$end1 += $f[($i-1)];
	    };
	  push @GFF, $ori1, $end1;
	    
	    $ori1 = $end1;
	};

       $f[$i] eq "I" && do {
           $ori1 += $f[($i+2)] + 4;
	   
      	};

    }; 
    printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
	       $f[1], "exonerate:protein2genome", "exon1", $FIRST[0],
               $FIRST[1], $f[9], $f[8], ".", "gene_id $gene";
    for (my $i = 0; $i < scalar(@GFF); $i += 2) {
	printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
	       $f[1], "exonerate:protein2genome", "exon", $GFF[$i],
               $GFF[($i+1)], $f[9], $f[8], ".", "gene_id $gene";
    };
};