#!/usr/bin/perl -w use strict; open (FILEHANDLE,"<$ARGV[0]"); my %motif = ("A" => [0,0,0,0,0,0,0,0,0], "C" => [0,0,0,0,0,0,0,0,0], "G" => [0,0,0,0,0,0,0,0,0], "T" => [0,0,0,0,0,0,0,0,0]); my $nombre;#nombre del factor de transcripcion my $posicion = 0; my $count = 0;#para contar las lineas my $i;#filas del hash my $j;#columnas del hash while (){ #####1. Lectura de matrices (hash de vectores)#### if ($_ =~ m/FA/){ $nombre = $_; chomp ($nombre); print "$nombre\n"; } if ($_=~ m/(\d+)\t(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/){ $motif{"A"}[$posicion]= $2; $motif{"C"}[$posicion]= $3; $motif{"G"}[$posicion]= $4; $motif{"T"}[$posicion]= $5; $posicion = $posicion +1; $count = $count +1; } #impresión de las matrices if (($_ =~ m/(\/\/)\Z/) && ($count > 1)){ my @k= ("A","C","G","T"); my $i = 0; while ($i < scalar(@k)){ my $j = 0; print $k[$i]; while ($j < $count){ print "\t $motif{$k[$i]}[$j]"; $j = $j + 1; } print "\n"; $i = $i + 1; } $count=0; ##### 2. Matriz de pesos #### my %positionweight = ("A" => [0, 0, 0, 0, 0, 0, 0, 0, 0], "C" => [0, 0, 0, 0, 0, 0, 0, 0, 0], "G" => [0, 0, 0, 0, 0, 0, 0, 0, 0], "T" => [0, 0, 0, 0, 0, 0, 0, 0, 0]); open (FILEHANDLE2,"<$ARGV[1]"); my $a = 0; my $c = 0; my $g = 0; my $t = 0; my $propa; my $propc; my $propg; my $propt; my @genoma; my $seq; while (){ if(/>/){} else{ chomp $_; $seq = $seq . $_; } } @genoma = split(//,$seq); for (my $is=0;$is<=$#genoma;$is++){ if ($genoma[$is] eq "a") { $a = $a+1; } if ($genoma[$is] eq "c") { $c = $c+1; } if ($genoma[$is] eq "g") { $g = $g+1; } if ($genoma[$is] eq "t") { $t = $t+1; } } $propa = $a/scalar(@genoma); $propc = $c/scalar(@genoma); $propg = $g/scalar(@genoma); $propt = $t/scalar(@genoma); for(my $is=0;$is<$posicion;$is++){ my $div=$motif{"A"}[$is]+$motif{"C"}[$is]+$motif{"G"}[$is]+$motif{"T"}[$is]; my $s = $motif{"A"}[$is]; my $weight; if ($s == 0){ $weight = -999; } else{ $weight = log($s/$div) - log($propa); } $positionweight{"A"}[$is] = $weight; $s = $motif{"C"}[$is]; if ($s == 0){ $weight = -999; } else{ $weight = log($s/$div) - log($propc); } $positionweight{"C"}[$is] = $weight; $s = $motif{"G"}[$is]; if ($s == 0){ $weight = -999; } else{ $weight = log($s/$div) - log($propg); } $positionweight{"G"}[$is] = $weight; $s = $motif{"T"}[$is]; if ($s == 0){ $weight = -999; } else{ $weight = log($s/$div) - log($propt); } $positionweight{"T"}[$is] = $weight; } my @m = keys(%positionweight); my $iss = 0; while ($iss < scalar(@k)){ my $j = 0; print "$k[$iss]:" ; while ($j < $posicion){ printf("\t%.2f", $positionweight{$k[$iss]}[$j] ); $j = $j + 1; } print "\n"; $iss = $iss + 1; } #####3. Posición y máxima puntuación #### my $total = 0; my $fpos = 0; for (my $i=0;$i<=$#genoma-$posicion;$i++){ my $puntuacion = 0; for (my $j=0;$j<$posicion;$j++){ if (($genoma[$i+$j])eq "a"){$puntuacion = $puntuacion + $positionweight{"A"}[$j];} if (($genoma[$i+$j])eq "c"){$puntuacion = $puntuacion + $positionweight{"C"}[$j];} if (($genoma[$i+$j])eq "g"){$puntuacion = $puntuacion + $positionweight{"G"}[$j];} if (($genoma[$i+$j])eq "t"){$puntuacion = $puntuacion + $positionweight{"T"}[$j];} } if ($puntuacion>$total){ $total = $puntuacion; $fpos = $i; } } print "máxima puntuación: $total\n"; print "posición: $fpos\n"; my $pvalue; my $amount = 0; for (my $z=0;$z<100;$z++){ my $n = scalar(@genoma); my $i = $n - 1; while ($i >= 0) { my $j = int(rand($i+1)); if ($i != $j) { my $tmp = $genoma[$i]; $genoma[$i] = $genoma[$j]; $genoma[$j] = $tmp; } $i = $i - 1; } my $max = 0; my $puntuacion; for (my $i=0;$i<=$#genoma-$posicion;$i++){ $puntuacion = 0; for (my $j=0;$j<$posicion;$j++){ if (($genoma[$i+$j])eq "a"){$puntuacion = $puntuacion + $positionweight{"A"}[$j];} if (($genoma[$i+$j])eq "c"){$puntuacion = $puntuacion + $positionweight{"C"}[$j];} if (($genoma[$i+$j])eq "g"){$puntuacion = $puntuacion + $positionweight{"G"}[$j];} if (($genoma[$i+$j])eq "t"){$puntuacion = $puntuacion + $positionweight{"T"}[$j];} } if ($puntuacion>$max){ $max = $puntuacion; } } if ($max>=$total){ $amount = $amount +1; } } $pvalue = $amount/100; print "El valor p-value asociado al factor de transcripción es $pvalue\n"; %positionweight = ("A" => [0, 0, 0, 0, 0, 0, 0, 0, 0], "C" => [0, 0, 0, 0, 0, 0, 0, 0, 0], "G" => [0, 0, 0, 0, 0, 0, 0, 0, 0], "T" => [0, 0, 0, 0, 0, 0, 0, 0, 0]); $posicion=0; } } close (FILEHANDLE2); close(FILEHANDLE);