#!/usr/bin/perl -w use strict; use warnings; my $genome = '/mnt/NFS_UPF/soft/genomes/2019/Craseonycteris_thonglongyai'; #Here you can change your genome of interest. print "Welcome to PrediProt!\n"; print "\n"; if (-e "Query"){ system ("ls Query/ > protnames.txt"); system ("sed -i 's/.aa.fa//g' protnames.txt"); } else { print "Before start, please be sure that a folder called 'Query' exists in the current directory, within the files of the protein sequence you want to compare with your interested genome. This files should be named as (protein.name).aa.\n"; print "Press enter to continue: \n"; my $hola = ; } my $file = 'protnames.txt'; open my $info, $file or die "Could not open $file: $!"; my $c = 0; print "This is the list of proteins that you can analyse:\n"; while(my $line = <$info>) { $c = $c + 1; print "$c $line"; } close $info; print "Please type the name of the protein you want to predict:\n"; my $nom = ; #Entra nom fitxer (nom proteïna) chomp ($nom); system ("mkdir $nom"); system ("cp ./Query/$nom.aa.fa ./$nom"); chdir($nom); #TBLASTN print "TBLASTN is looking for alignments... Be patient!\n"; system ("tblastn -query $nom.aa.fa -db $genome/genome.fa -out $nom.blast -outfmt 7"); print "TBLASTN was performed.\n"; my $file_blast = "$nom.blast"; my $n = 0; my $namescaff = 'nothing'; my $numscaff = 0; my $wantedscaffolds; while ($n == 0) { open my $fh, $file_blast or die "Could not open $file: $!"; print "What is the minimum identity you are looking for in the hits?\n"; my $id = ; chomp ($id); print "What is the maxinum e-value you are looking for in the hits?\n"; my $ev = ; chomp ($ev); while (my $line2 = <$fh>) { if ($line2 !~ /#/) { my @linearray=split (/\t/, $line2); if ($linearray[2] >= $id && $linearray[10] <= $ev){ print "$line2"; if ($namescaff ne $linearray[1]) { $namescaff = $linearray[1]; $numscaff = $numscaff + 1; }; $n = $n + 1; } } } if ($n == 0) { print "We could not find any hit with identity > $id% and e-value < $ev. Please type again:\n"; } else { print "TBLASTN found $numscaff scaffold(s). How many scaffolds would you like to analyse? (Type '0' if you would like to search for hits with different criteria.)"; $wantedscaffolds = ; print "\n"; if ($wantedscaffolds == 0){ $n = 0; $numscaff = 0; } } close $fh; } my $performedscaff = 0; my $scaffold; my $sstart; while ($performedscaff < $wantedscaffolds) { $performedscaff = $performedscaff + 1; print ("Please type the name of the scaffold you are interested in: "); $scaffold = ; print "\n"; chomp ($scaffold); print ("Please type the start position of aligment in the scaffold: "); $sstart = ; print "\n"; chomp ($sstart); mkdir ($nom.$scaffold); chdir ($nom.$scaffold); #FASTAFETCH system ("fastafetch $genome/genome.fa $genome/genome.index $scaffold > $nom.$scaffold.fastafetch.fa"); print "Fastafetch was performed.\n"; #FASTASUBSEQ my $l = `cat $nom.$scaffold.fastafetch.fa | wc -l`; #calculation of the lenght of the scaffold $l = $l - 2; $l = $l * 70; my $inici = $sstart - 50000; if ($inici < 0) { $inici = 0; } my $longitud = 100000; if ($longitud + $inici > $l) {$longitud = $l - $inici}; #To avoid positions longer than the scaffold lenght system ("fastasubseq $nom.$scaffold.fastafetch.fa $inici $longitud > $nom.$scaffold.subseq.fa"); print "Fastasubseq was performed.\n"; #EXONERATE system ("exonerate -m p2g --showtargetgff --exhaustive yes -q ../$nom.aa.fa -t $nom.$scaffold.subseq.fa | egrep -w exon > $nom.$scaffold.exon.gff"); print "Exonerate was performed.\n"; #FastaseqfromGFF system ("fastaseqfromGFF.pl $nom.$scaffold.subseq.fa $nom.$scaffold.exon.gff > $nom.$scaffold.pred.nuc.fa"); print "FastaseqfromGFF was performed.\n"; #Fastatranslate system ("fastatranslate -f $nom.$scaffold.pred.nuc.fa -F 1 > $nom.$scaffold.pred.aa.fa"); system ("sed -i 's/*/X/g' $nom.$scaffold.pred.aa.fa"); print "Fastatranslate was performed.\n"; #T_COFFEE system ("t_coffee ../$nom.aa.fa $nom.$scaffold.pred.aa.fa > results.$nom.$scaffold 2> t.coffee.log"); my $error = `egrep -i ERROR t.coffee.log`; if ($error ne ''){ print "$error"; } else { print "T_COFFEE was performed.\n"; } chdir (".."); } print "All the scaffolds have been analysed! Thank you for using PrediProt!";