#!/usr/bin/perl -w package runyinoyang; use strict; use IO::File; use base 'Exporter'; our @EXPORT = qw/runyinoyang/; use formatAlleleSeq; #http://www.cbs.dtu.dk/services/YinOYang/output.php # size max protein: 4000 amino acids!!!! #For each sequence, this function will create a new file : Mitoprot work with only one sequence in one file. sub runyinoyang($$$$$$$){ my ($delta,$workpath,$outpath,$infile,$execommand,$encodingfile,$BASE_SEQ_NAME) = @_; my @args = (); my ($name,$seq,$seqname,$fh_temp,$outfile,$len); print "infile:$infile\n"; my $prog = "yinoyang"; my $input_file = "temp-".$prog.".fasta"; my $output_file = "temp-".$prog.".".$prog; # get the names of the parameter files my ($command) = $execommand =~ /\/([^\/]+)$/; # get the base name of the input file to create the ouput file name ($outfile) = $infile =~ /\/([^\/]+)$/; $outfile =~ s/\..*/_$prog\_results.res/; $outfile = $outpath."/".$outfile; print "OUTFILE: $outfile\n"; # create the symbolic links for the executable to the working directory # otherwise the resulting file cannot be obtained @args = ("ln", "-s", $execommand, "$workpath/."); unless (-e "$workpath/$command") {system(@args) == 0 or die "system @args failed: $?"}; # go to the working directory to run the command chdir "$workpath" or die "cannot change dir runyinoyang::runyinoyang chdir $workpath : $?"; # get the encoing information my ($h_encodingName,$h_encodingID,$h_name_allele,$h_name_position,$h_name_var_type) = getEncoding($encodingfile); # open the input and output files my $fh_in = new IO::File $infile || die "Can not open file $infile : $!";#Data my $fh_out = new IO::File ">>$workpath/$prog\_results.txt" || die "Can not create the result file (mitoprot) $workpath/$prog\_results.txt: $! \n"; while($name = <$fh_in>){ # check teh fasta format print "$name\n"; if($name =~ /^>.*/){ $fh_temp = new IO::File ">$workpath/temp-$prog.fasta" || die "Can not create $workpath/temp-$prog.fasta: $! \n"; print $fh_temp $name;#print the name of the SNP $seq = <$fh_in>; $len = length($seq); if($len >= 21 && $len <= 4000){ print $fh_temp $seq;#Print the sequence of the SNP $fh_temp->close(); # RUN the program in the working dir # the output file is localized where the command is launched (!?) @args = ("$workpath/$command", "$workpath/$input_file" , ">", "$workpath/$output_file"); system("$workpath/$command $workpath/$input_file > $workpath/$output_file") ;# == 0 or die "system @args failed runyinoyang: $?"; #~ @args = ("rm", "-f", "$workpath/temp-mitoprot.fasta"); #~ system(@args) == 0 or die "system @args failed: $?"; # generate an ouput file with results from mitoprot concatenated extract_yinoyang_results("$workpath/$output_file",$fh_out,$BASE_SEQ_NAME,$h_name_var_type); } else{ print "$name : protein size too long : $len => not analysed\n"; } } else{ print "Error file format not correct runyinoyang::runyinoyang: $infile should be fasta format\n"; } } $fh_in->close(); $fh_out->close(); ############################################################# # compare results for the 2 alleles and interpret the results getyinoyangVariations($delta,"$workpath/$prog\_results.txt",$h_encodingID,$h_encodingName,$h_name_position,$h_name_allele,$outfile,"$outpath/$prog\_title.txt",$h_name_var_type,$h_name_allele); } # Concatenation sub extract_yinoyang_results($$$$){ my ($resfile, $fh_out, $BASE_SEQ_NAME,$h_name_var_type) = @_; my $prog="yinoyang"; my ($name,$position,$signal,$score); my $fh_in = new IO::File $resfile || die "Can not open the output file of $prog $resfile : $! \n"; while(<$fh_in>){ if($_ =~ m/^$BASE_SEQ_NAME\d+\s*\d*\s*\S*\s*\S*\s*\S*/){ ($name,$position,$signal,$score) = $_ =~/^([^\s]*)\s*(\d*)\s*\S*\s*(\S*)\s*(\S*)/; if($h_name_var_type->{$name} eq "snp"){ ### print $fh_out "$name\t$position\t$signal\t$score\n"; } } } $fh_in->close(); } #Find the transcript who win/lost phosphorylation signal sub getyinoyangVariations($$$$$$$$){ my ($delta,$infile,$h_encodingID,$h_encodingName,$h_name_position,$h_name_allele,$outfile,$titlefile,$h_name_var_type,$h_name_allele)) = @_; my $fh_in = new IO::File $infile || die "Can not open nrunyinoyang::getyinoyangVariations : $infile : $! \n"; my $fh_out = new IO::File ">$outfile" || die "Can not open $outfile : $! \n"; my ($line,$allele,$pos,$enzyme,$score,$name,$seq_name,$signal,$basis_name,$key_name,$diff_score); my ($nameA1,$nameA2,$score_diff); my ($start,$end,$ref,$alt,$info,$end_alt,$i); my %score_allele1 = (); # key: seq_name my %score_allele2 = (); my %signal_allele1 = (); my %signal_allele2 = (); my %seq_list = (); my %netcglyc_res = (); my %change_position = (); my @tab1 = ""; my @tab2 = ""; my $c = 2; my %ref_pos = (); my %alt_pos = (); while($line=<$fh_in>){ # get the information ($seq_name,$pos,$signal,$score) = $line =~ /([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\n]*)/; # check that the id is existing if(defined $h_encodingID->{$seq_name}){ ($basis_name,$allele) = $h_encodingID->{$seq_name} =~ /(.*)_(allele\d)$/; if($h_name_var_type->{$seq_name} eq "snp"){ ### if($pos eq $h_name_position->{$seq_name}){ if($h_name_allele->{$seq_name} eq "allele1"){ $signal_allele1{$basis_name} = $signal; $score_allele1{$basis_name} = $score; } elsif($h_name_allele->{$seq_name} eq "allele2"){ $signal_allele2{$basis_name} = $signal; $score_allele2{$basis_name} = $score; print "$basis_name : $signal_allele2{$basis_name} - $score_allele2{$basis_name}\n\n"; } else{ print "PB: $seq_name : $basis_name:$h_name_allele->{$seq_name} function nrunyinoyang::getyinoyangVariations\n"; exit(12); } } } # INDELS else{ # get the [REF/ALT-POS] information #~ print "$h_encodingID->{$name}\n"; ($info) = $h_encodingID->{$seq_name} =~ /\[([^\/]+\/.+\-\d*\-*\d+)\]_allele\d*/; # retrives the amino acids to check on ($start,$end,$end_alt) = analyseIndelChange($info); #~ print "coordinates: $start,$end,$end_alt\nPOSITION: $pos\n"; # get the signals from the reference %ref_pos = (); for($i=$start;$i<=$end;$i++){ $ref_pos{$i} = ""; #~ print "ref:$i\n"; } # get the signals from the alternative %alt_pos = (); for($i=$start;$i<=$end_alt;$i++){ $alt_pos{$i} = ""; #~ print "alt:$i\n"; } print "--> POS FROM RES: $pos\n"; if((defined $ref_pos{$pos}) and ($h_name_allele->{$seq_name} eq "allele1")){ $score_allele1{$basis_name} = "indel"; if(not defined $signal_allele1{$basis_name}){ $signal_allele1{$basis_name} = 1; } else{ $signal_allele1{$basis_name} = $signal_allele1{$basis_name} + 1; # number of signals on the reference for the indel } print "REF:$basis_name:$signal_allele1{$basis_name}\n"; } if((defined $alt_pos{$pos}) and ($h_name_allele->{$seq_name} eq "allele2")){ $score_allele2{$basis_name} = "indel"; if(not defined $signal_allele2{$basis_name}){ $signal_allele2{$basis_name} = 1; } else{ $signal_allele2{$basis_name} = $signal_allele2{$basis_name} + 1; # number of signals on the alternative for the indel } print "ALT:$basis_name:$signal_allele2{$basis_name}\n"; } } } else{ print "PB: No encoding name for *$seq_name* in runyinoyang::getyinoyangVariations\n"; exit(11); } } $fh_in->close(); # analyse the information # to analyse the difference between the 2 alleles my $fh_title = new IO::File ">$titlefile" || die "Can not open $titlefile : $! \n"; print $fh_title ("Sequence Name\tCase\tscore allele1\tscore allele 2\tsignal allele 1\tsignal allele 2\n"); $fh_title->close(); foreach $basis_name (keys %signal_allele1){ #~ ($basis_name) = $h_encodingID->{$seq_name} =~ /(.*)_allele\d$/; #~ $nameA1 = $seq_name; #~ $name = $h_encodingID->{$seq_name}; #~ $name =~ s/allele1/allele2/; #~ $nameA2 = $h_encodingName->{$name}; if($score_allele1{$basis_name} eq "indel" ){ print "$basis_name : $signal_allele1{$basis_name} != $signal_allele2{$basis_name}\n"; # the number of signals is different between reference and alternative at the indel part if ($signal_allele1{$basis_name} != $signal_allele2{$basis_name}){ print $fh_out ("$basis_name\tdiff\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); } delete $signal_allele2{$basis_name}; } # snp else{ if(not defined $signal_allele2{$basis_name}){ print $fh_out ("$basis_name\tloss\t$score_allele1{$basis_name}\t\t$signal_allele1{$basis_name}\t\n"); } else{ $score_diff = $score_allele1{$basis_name} - $score_allele2{$basis_name}; $score_diff =~ s/-//; if($score_diff >= $delta && $score_allele1{$basis_name} > $score_allele2{$basis_name}){ print $fh_out ("$basis_name\tloss?\t$score_allele1{$basis_name}\t$score_allele2{$basis_name}\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); } elsif($score_diff >= $delta && $score_allele1{$basis_name} < $score_allele2{$basis_name}){ print $fh_out ("$basis_name\tgain?\t$score_allele1{$basis_name}\t$score_allele2{$basis_name}\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); } } delete $signal_allele2{$basis_name}; } } foreach $basis_name (keys %signal_allele2){ #~ ($basis_name) = $h_encodingID->{$seq_name} =~ /(.*)_allele\d$/; #~ #~ $nameA2 = $seq_name; #~ $name = $h_encodingID->{$seq_name}; #~ $name =~ s/allele2/allele1/; #~ $nameA1 = $h_encodingName->{$name}; if(not defined $signal_allele1{$basis_name}){ print $fh_out ("$basis_name\tgain\t\t$score_allele2{$basis_name}\t\t$signal_allele2{$basis_name}\n"); } else{ $score_allele1{$basis_name} =~ s /\,/\./; $score_allele2{$basis_name} =~ s /\,/\./; $score_diff = $score_allele1{$basis_name} - $score_allele2{$basis_name}; $score_diff =~ s/-//; if($score_diff >= $delta && $score_allele1{$basis_name} > $score_allele2{$basis_name}){ print $fh_out ("$basis_name\tloss?\t$score_allele1{$basis_name}\t$score_allele2{$basis_name}\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); } elsif($score_diff >= $delta && $score_allele1{$basis_name} < $score_allele2{$basis_name}){ print $fh_out ("$basis_name\tgain?\t$score_allele1{$basis_name}\t$score_allele2{$basis_name}\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); } } } $fh_out->close(); } #~ #For each sequence, this function will create a new file : YinOYang work with only one sequence in one file. #~ sub init($$$){ #~ my ($delta, $path, $date) = @_; #~ print "Running YinOYang.\n"; #~ unless (-e "$path/results/YinOYang"){ #~ system("mkdir results/YinOYang"); #~ } #~ my $sub = new IO::File "fic/sub_$date" || die "Can not open file sub : $!\n"; #~ my $i = 1; #~ while(<$sub>){ #~ if($_ =~ /^>(.*)/){ #~ my $temp = new IO::File ">fic/temp-yinoyang_$date" || die "Can not create fic/temp-yinoyang_$date : $! \n"; #~ my $name = $1; #~ print $temp ">@"."$name\n"; #~ $_ =<$sub>; #~ print $temp $_; #~ $temp->close(); #~ system("mv fic/temp-yinoyang_$date $path/Programmes/YinOYang/yinOyang-1.2");#Move file for run YinOYang #~ chdir "$path/Programmes/YinOYang/yinOyang-1.2"; #~ `./yinOyang temp-yinoyang_$date >results-yinoyang_$date`; #~ system("rm -f tempfile");#Clean #~ my $temp2 = new IO::File "results-yinoyang_$date" || die "Can not open the result file of YinOYang : $! \n"; #~ my $res = new IO::File ">>$path/results/YinOYang/yinoyang_$date" || die "Can not create the result file (yinoyang) : $! \n"; #~ while(<$temp2>){ #~ extract_results($res); #~ } #~ $temp2->close(); #~ $res->close(); #~ #system("mv tempfile.tmp $path/results/YinOYang/seq$i");#Rename and replace file (optional) #~ $i++; #~ chdir "$path"; #~ } #~ else{ #~ print "Error\n"; #~ } #~ } #~ $sub->close(); #~ search_var($delta,$path,$date); #~ } #~ #~ #Concatenation in results/YinOYang/yinoyang #~ sub extract_results($){ #~ my $res = shift; #~ if($_ =~ m/^__(\d*)\s*(\d*)\s*\S*\s*(\S*)\s*(\S*)/){ #~ my $name = $1; #~ my $position = $2; #~ my $agree = $3; #~ my $score = $4; #~ print $res ">$name $position $score $agree\n"; #~ } #~ } #~ #~ sub search_var($$$){ #~ my ($delta, $path, $date) = @_; #~ #~ my $sav = new IO::File "fic/sav_$date" || die "Can not open sav_$date : $! \n"; #~ my %id;#Hash with all name / id #~ while(<$sav>){ #~ if($_ =~ m/^>(\S*)\s*(.*)/){ #~ $id{$2} .= $1; #~ } #~ } #~ $sav->close(); #~ #~ my $tmp = new IO::File "$path/results/YinOYang/yinoyang_$date" || die "$! \n"; #~ my $yinoyang = new IO::File ">$path/results/programs/yinoyang_$date" || die "$! \n"; #~ my @tab1 = ""; #~ my @tab2 = ""; #~ my $allele = ""; #~ my $c = 2; #~ while(<$tmp>){ #~ my $line = $_; #~ if($line =~ s/^>(\d*)/$id{$1}/){#Substitute the ID by his coresponding name #~ if($line =~ m/(\S*)_allele(\d)\s(\d*)\s(\S*)\s(\S*)/){ #~ if($2 eq $allele){ #~ if($c == 0){#Same allele1 #~ $allele = $2; #~ my $position = $3; #~ my $score = $4; #~ if($5 =~ m/\++/){ #~ push(@tab1,$position,$score); #~ } #~ } #~ elsif($c == 1){#Same allele2 #~ $allele = $2; #~ my $position = $3; #~ my $score = $4; #~ if($5 =~ m/\++/){ #~ push(@tab2,$position,$score); #~ } #~ } #~ } #~ else{ #~ if($c == 0){#Other allele #~ $c++; #~ my $name = $1; #~ $allele = $2; #~ my $position = $3; #~ my $score = $4; #~ push(@tab2,$name); #~ if($5 =~ m/\++/){ #~ push(@tab2,$position,$score); #~ } #~ } #~ elsif($c == 1){#Other transcript #~ compare($yinoyang,$delta,\@tab1,\@tab2); #~ @tab1 = "";#Clean tab1 #~ @tab2 = "";#Clean tab2 #~ my $name = $1; #~ $allele = $2; #~ my $position = $3; #~ my $score = $4; #~ push(@tab1,$name); #~ if($5 =~ m/\++/){ #~ push(@tab1,$position,$score); #~ } #~ $c = 0; #~ } #~ elsif($c == 2){#Initialisation #~ my $name = $1; #~ $allele = $2; #~ my $position = $3; #~ my $score = $4; #~ push(@tab1,$name); #~ if($5 =~ m/\++/){ #~ push(@tab1,$position,$score); #~ } #~ $c = 0; #~ } #~ } #~ } #~ } #~ } #~ compare($yinoyang,$delta,\@tab1,\@tab2);#For the last transcript #~ $tmp->close(); #~ $yinoyang->close(); #~ } #~ #~ sub compare{ #~ my ($yinoyang,$delta,$ref1,$ref2) = @_; #~ my @tab1 = @$ref1; #~ my @tab2 = @$ref2; #~ my $j = 2; #~ for(my $i = 2; $i<$#tab1+1; $i=$i+2){ #~ if(defined($tab1[$i]) && defined($tab2[$j])){ #~ if($tab1[$i] == $tab2[$j]){#Same position #~ my $diffscore = $tab1[$i+1] - $tab2[$j+1]; #~ if($diffscore >= $delta){ #~ print $yinoyang "$tab1[1] : Signal lost at position $tab1[$i]. Score allele 1 = $tab1[$i+1] - Score allele 2 = $tab2[$j+1]\n"; #~ } #~ elsif($diffscore <= -$delta){ #~ print $yinoyang "$tab1[1] : Signal gain at position $tab1[$i]. Score allele 1 = $tab1[$i+1] - Score allele 2 = $tab2[$j+1]\n"; #~ } #~ } #~ elsif($tab1[$i] > $tab2[$j]){ #~ print $yinoyang "$tab1[1] : Signal gain at position $tab2[$j]. Score : $tab2[$j+1]\n"; #~ $i = $i-2;#Stay in same column tab1 #~ } #~ elsif($tab1[$i] < $tab2[$j]){ #~ print $yinoyang "$tab1[1] : Signal lost at position $tab1[$i]. Score : $tab1[$i+1]\n"; #~ $j = $j-2;#Stay in same column tab2 #~ } #~ $j=$j+2; #~ } #~ } #~ $j=0; #~ } 1;