#!/usr/bin/perl -w package runnetphos; use strict; use IO::File; use base 'Exporter'; our @EXPORT = qw/runnetphos/; use formatAlleleSeq; #For each sequence, this function will create a new file : netphos works with only one sequence in one file. sub runnetphos($$$$$$$){ my ($delta,$outpath,$infile,$execommand,$outfile,$encodingfile,$BASE_SEQ_NAME) = @_; my @args = (); my ($name,$seq,$seqname,$fh_tempmitoprot,$name2); my $prog = "netphos"; my $result_file = "$outpath/$prog\_output.txt"; # get the names of the parameter files my ($command) = $execommand =~ /\/([^\/]+)$/; $command = "netphos-3.1"; # create the symbolic links for the executable to the working directory # otherwise the resulting file cannot be obtained @args = ("ln", "-s", $execommand, "$outpath/$command"); unless (-e "$outpath/$command") {system(@args) == 0 or die "system @args failed: $?"}; # go to the working directory to run the command chdir "$outpath" or die "cannot change dir rumiptoprot::runmitoprot chdir $outpath : $?"; # get the encoing information my ($h_encodingName,$h_encodingID,$h_name_allele,$h_name_position,$h_name_var_type) = getEncoding($encodingfile); # open the input and output files my $fh_in = new IO::File $infile || die "Can not open file $infile : $!";#Data my $fh_out = new IO::File ">$result_file" || die "Can not create the result file ($prog) $result_file: $! \n"; my $fh_stderr = new IO::File ">$outpath/errors.txt" || die "Can not open file $outpath/errors.txt : $!";#Errors file print $fh_stderr "\nErrors for $prog:\n"; my $i = 1; my $k = 0; while($name = <$fh_in>){ # check the fasta format if($name =~ /^>(.*)/){ $seqname = $1; # short version of teh name ex: _seq $fh_tempmitoprot = new IO::File ">$outpath/temp.fasta" || die "Can not create $outpath/temp.fasta: $! \n"; print $fh_tempmitoprot $name;#print the name of the SNP $seq = <$fh_in>; # check on the protein sequence length, sequences longer than 6000 aa cannot be used if(length($seq) > 4000){#Sequences too longs. ($name2) = $name =~ />(.*)/; print $fh_stderr $h_encodingID->{$name2}." - sequence too long\n"; $k++; next; } # check that the sequence contains only standard AA if($seq =~ m/[^>ACDEFGHIKLMNPQRSTVWY\n]/i){#Sequence contain non-standard protein symbol. ($name2) = $name =~ />(.*)/; print $fh_stderr $h_encodingID->{$name2}." - sequence contain non-standard protein symbol\n"; $k++; next; } print $fh_tempmitoprot $seq;#Print the sequence of the SNP $fh_tempmitoprot->close(); # RUN the program in the working dir # th output file is localized where the command is launched (!?) @args = ("$outpath/$command", "$outpath/temp.fasta", ">", "$outpath/temp.netphos"); system(@args) == 0 or die "system @args failed runnetphos: $?"; # generate an ouput file with results from mitoprot concatenated extract_netphos_results($fh_out, "$outpath/temp.netphos", $seqname,$h_encodingID,$BASE_SEQ_NAME,$h_name_var_type); $i++; } else{ print "Error file format not correct runmitoprot::runmitoprot: $infile\n"; } } $fh_in->close(); $fh_out->close(); $fh_stderr->close(); ############################################################# # compare results for the 2 alleles and interpret the results getNetphosVariations($delta,$result_file,$h_encodingID,$h_name_position,$h_name_allele,$outfile,"$outpath/$prog\_title.txt",$h_name_var_type,$h_name_allele); print "$k errors found. Consult $outpath/$prog\_errors.txt\n"; } # Concatenation sub extract_netphos_results($$$$$$){ my ($fh_out, $resfile,$seqname,$h_encodingID,$BASE_SEQ_NAME,$h_name_var_type) = @_; my ($id,$position,$score,$enzyme,$signal); my $prog = "netphos"; my $fh_in = new IO::File $resfile || die "Can not open the output file of $prog $resfile : $! \n"; while(<$fh_in>){ if($_ =~ m/($BASE_SEQ_NAME\d*)\s*(\d*)\s*\S*\s*\S*\s*(\d\.\d*)\s*(\S*)\s*(\S*)/){ #Output of netphos ($id,$position,$score,$enzyme,$signal) = $_ =~ m/($BASE_SEQ_NAME\d*)\s*(\d*)\s*\S*\s*\S*\s*(\d\.\d*)\s*(\S*)\s*(\S*)/; print $fh_out "$id\t$position\t$enzyme\t$score\t$signal\n"; } } $fh_in->close(); } #Find the transcript who win/lost phosphorylation signal sub getNetphosVariations($$$$$$$$$){ my ($delta,$infile,$h_encodingID,$name_position,$name_allele,$outfile,$titlefile,$h_name_var_type,$h_name_allele) = @_; my $fh_in = new IO::File $infile || die "Can not open runnetphos::getNetphosVariations : $infile : $! \n"; my $fh_out = new IO::File ">$outfile" || die "Can not open $outfile : $! \n"; my ($line,$allele,$pos,$enzyme,$score,$name,$seq_name,$signal,$basis_name,$key_name,$diff_score); my ($start,$end,$ref,$alt,$info,$end_alt,$i,$info); my %enzyme_list = (); $enzyme_list{"CKII"} = ""; $enzyme_list{"unsp"} = ""; $enzyme_list{"GSK3"} = ""; $enzyme_list{"cdc2"} = ""; $enzyme_list{"CaM-II"} = ""; $enzyme_list{"CKI"} = ""; $enzyme_list{"p38MAPK"} = ""; $enzyme_list{"DNAPK"} = ""; $enzyme_list{"ATM"} = ""; $enzyme_list{"RSK"} = ""; $enzyme_list{"cdk5"} = ""; $enzyme_list{"PKG"} = ""; $enzyme_list{"PKA"} = ""; $enzyme_list{"PKC"} = ""; $enzyme_list{"PKB"} = ""; my %score_allele1 = (); # key: seq_name my %score_allele2 = (); my %signal_allele1 = (); my %signal_allele2 = (); my %seq_list = (); my %netcglyc_res = (); my %change_position = (); my @tab1 = ""; my @tab2 = ""; my $c = 2; my %ref_pos = (); my %alt_pos = (); while($line=<$fh_in>){ # get the information ($seq_name,$pos,$enzyme,$score,$signal) = $line =~ /([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\n]*)/; if(defined $h_encodingID->{$seq_name}){ ($basis_name,$allele) = $h_encodingID->{$seq_name} =~ /(.*)_(allele\d)$/; } else{ print "PB: No encoding name for $seq_name in runnetphos::getNetphosVariations\n"; exit(11); } # key name for the current information analyzed $key_name = $basis_name.",".$enzyme; # for the display, enzymes in order # list of sequences $seq_list{$basis_name} = ""; # list of basis sequence names #~ my $a = $name_position->{$seq_name}; #~ my $b = $name_allele->{$seq_name} ; #~ print "hash_pos:$a\nhash_allele:$b\n"; print ">>seq_name:*$seq_name*, pred_pos:$pos, pred_score:$score, $signal : $key_name\n\n"; # check that the id is existing if(defined $h_encodingID->{$seq_name}){ # snp if($h_name_var_type->{$seq_name} eq "snp"){ ### if($pos eq $name_position->{$seq_name}){ if($name_allele->{$seq_name} eq "allele1"){ $signal_allele1{$key_name} = $signal; $score_allele1{$key_name} = $score; } elsif($name_allele->{$seq_name} eq "allele2"){ $signal_allele2{$key_name} = $signal; $score_allele2{$key_name} = $score; print "$key_name : $signal_allele2{$key_name} - $score_allele2{$key_name}\n\n"; } else{ print "PB: $seq_name :$name_allele->{$seq_name} function runnetphos::getNetphosVariations\n"; exit(12); } } } # indel else{ # get the [REF/ALT-POS] information #~ print "$h_encodingID->{$name}\n"; ($info) = $h_encodingID->{$seq_name} =~ /\[([^\/]+\/.+\-\d*\-*\d+)\]_allele\d*/; # retrives the amino acids to check on ($start,$end,$end_alt) = analyseIndelChange($info); #~ print "coordinates: $start,$end,$end_alt\nPOSITION: $pos\n"; # get the signals from the reference %ref_pos = (); for($i=$start;$i<=$end;$i++){ $ref_pos{$i} = ""; #~ print "ref:$i\n"; } # get the signals from the alternative %alt_pos = (); for($i=$start;$i<=$end_alt;$i++){ $alt_pos{$i} = ""; #~ print "alt:$i\n"; } print "--> POS FROM RES: $pos\n"; if((defined $ref_pos{$pos}) and ($h_name_allele->{$seq_name} eq "allele1") and ($signal eq "YES")){ $score_allele1{$key_name} = "indel"; if(not defined $signal_allele1{$key_name}){ $signal_allele1{$key_name} = 1; } else{ $signal_allele1{$key_name} = $signal_allele1{$key_name} + 1; # number of signals on the reference for the indel } print "REF:$key_name:$signal_allele1{$key_name}\n"; } if((defined $alt_pos{$pos}) and ($h_name_allele->{$seq_name} eq "allele2") and ($signal eq "YES")){ $score_allele2{$key_name} = "indel"; if(not defined $signal_allele2{$key_name}){ $signal_allele2{$key_name} = 1; } else{ $signal_allele2{$key_name} = $signal_allele2{$key_name} + 1; # number of signals on the alternative for the indel } print "ALT:$key_name:$signal_allele2{$key_name}\n"; } } } else{ print "PB: $seq_name is not a short name found to be encoded: function runnetphos::getNetphosVariations\n"; exit(12); } } # analyse the information # to analyse the difference between the 2 alleles my $fh_title = new IO::File ">$titlefile" || die "Can not open $titlefile : $! \n"; print $fh_title ("Name"); foreach $enzyme (sort keys %enzyme_list){ print $fh_title ("\t$enzyme result\t$enzyme allele1 score\t$enzyme allele2 score"); } print $fh_title ("\n"); $fh_title->close(); foreach $basis_name (sort keys %seq_list){ print $fh_out ("$basis_name"); foreach $enzyme (sort keys %enzyme_list){ $key_name = $basis_name.",".$enzyme; # SNP if($h_name_var_type->{$seq_name} eq "snp"){ #~ if((defined $score_allele1{$key_name}) && (not defined $score_allele2{$key_name}) && ($signal_allele1{$key_name} eq "YES")){ if((defined $score_allele1{$key_name}) && ($signal_allele2{$key_name} ne "YES") && ($signal_allele1{$key_name} eq "YES")){ print $fh_out ("\tloss\t$score_allele1{$key_name}\t$score_allele2{$key_name}"); } elsif(($signal_allele1{$key_name} ne "YES") && (defined $score_allele2{$key_name}) && ($signal_allele2{$key_name} eq "YES")){ #~ elsif((not defined $score_allele1{$key_name}) && (defined $score_allele2{$key_name}) && ($signal_allele2{$key_name} eq "YES")){ print $fh_out ("\tgain\t$score_allele1{$key_name}\t$score_allele2{$key_name}"); } elsif((defined $score_allele1{$key_name}) && (defined $score_allele2{$key_name})){ #~ elsif((defined $score_allele1{$key_name}) && (defined $score_allele2{$key_name})){ $diff_score = $score_allele1{$key_name} - $score_allele2{$key_name}; $diff_score =~ s/-//; if($diff_score >= $delta && $score_allele1{$key_name} > $score_allele2{$key_name}){ print $fh_out ("\tloss?\t$score_allele1{$key_name}\t$score_allele2{$key_name}"); } elsif($diff_score >= $delta && $score_allele1{$key_name} < $score_allele2{$key_name}){ print $fh_out ("\tgain?\t$score_allele1{$key_name}\t$score_allele2{$key_name}"); } else{ print $fh_out ("\t\t\t"); #~ print $fh_out ("\t$key_name\t\t"); } } elsif((not defined $score_allele1{$key_name}) && (not defined $score_allele2{$key_name})){ print $fh_out ("\t\t\t"); #~ print $fh_out ("\t$key_name\t\t"); } else{ print $fh_out ("\t\t\t"); #~ print $fh_out ("\t$key_name\t\t"); } #~ else{ #~ #~ print "PB: unknown case: $key_name runnetphos::getNetphosVariations\n"; #~ } } # INDEL else{ # the number of signals is different between reference and alternative at the indel part if ($signal_allele1{$key_name} != $signal_allele2{$key_name}){ print $fh_out ("\tdiff\t$signal_allele1{$key_name}\t$signal_allele2{$key_name}"); } else{ print $fh_out ("\t\t\t"); } } } print $fh_out ("\n"); } $fh_in->close(); $fh_out->close(); } #~ sub compare($$$$){ #~ #~ my ($fh_result,$delta,$ref1,$ref2) = @_; #~ #~ my @tab1 = @$ref1; #~ my @tab2 = @$ref2; #~ my $j = 2; #~ my ($i,$diffscore); #~ #~ for($i = 2; $i<$#tab1+1; $i=$i+3){ #~ #~ if(defined($tab1[$i]) && defined($tab2[$j])){ #~ #~ if($tab1[$i] == $tab2[$j]){#Same position #~ #~ if($tab1[$i+1] eq $tab2[$j+1]){#Same enzym #~ #~ $diffscore = $tab1[$i+2] - $tab2[$j+2]; #~ #~ if($diffscore >= $delta){ #~ #~ print $fh_result "$tab1[1] : Signal $tab1[$i+1] lost at position $tab1[$i]. Score allele 1 = $tab1[$i+2] - Score allele 2 = $tab2[$j+2]\n"; #~ } #~ elsif($diffscore <= -$delta){ #~ #~ print $fh_result "$tab1[1] : Signal $tab1[$i+1] gain at position $tab1[$i]. Score allele 1 = $tab1[$i+2] - Score allele 2 = $tab2[$j+2]\n"; #~ } #~ } #~ } #~ elsif($tab1[$i] > $tab2[$j]){ #~ #~ print $fh_result "$tab1[1] : Signal $tab2[$j+1] gain at position $tab2[$j]. Score : $tab2[$j+2]\n"; #~ $i = $i-3;#Stay in same column tab1 #~ } #~ elsif($tab1[$i] < $tab2[$j]){ #~ #~ print $fh_result "$tab1[1] : Signal $tab1[$i+1] lost at position $tab1[$i]. Score : $tab1[$i+2]\n"; #~ $j = $j-3;#Stay in same column tab2 #~ } #~ $j=$j+3; #~ } #~ } #~ $j=0; #~ } 1;