#!/usr/bin/perl -w =pod =head1 NAME runnetoglyc Module =head1 SYNOPSIS use runnetoglyc; =head1 OPTIONS =head1 DESCRIPTION =head1 VERSION Version 1 =head1 DATE 20 janvier 2012 =head1 AUTHORS Sabrina Rodriguez (sabrina.rodriguez@jouy.inra.fr) Johann Beghain =cut package runnetoglyc; use strict; use IO::File; use base 'Exporter'; our @EXPORT = qw/netoglyc_extract_results/; use formatAlleleSeq; sub netoglyc_extract_results($$$$$){ my ($delta, $netoglyc_result_file, $name_encoding_file, $output_file, $BASE_SEQ_NAME) = @_; my ($fh_in,$fh_out,$fh_tmp,$allele,$name,$line,$seq_name,$pos,$Gscore,$Iscore,$signal); my ($nameA1,$nameA2,$score_diff,$score,$basis_name); my ($start,$end,$ref,$alt,$info,$end_alt,$i); my %score_allele1 = (); # key : initial complete name => score value (for the alléle 1 position) my %score_allele2 = (); # key : initial complete name => signal value (for the alléle 1 position : ++) my %signal_allele1 = (); # key : initial complete name => score value (for the alléle 2 position) my %signal_allele2 = (); # key : initial complete name => signal value (for the alléle 2 position : ++) my %ref_pos = (); my %alt_pos = (); my ($h_encodingName,$h_encodingID,$h_name_allele,$h_name_position,$h_name_var_type) = getEncoding($name_encoding_file); my $prog = "netoglyc"; my $outpath = $output_file; $outpath =~ s/\/([^\/]+)$//; my $fh_title = new IO::File ">$outpath/$prog\_title.txt" || die "Can not open $outpath/$prog\_title.txt : $! \n"; print $fh_title ("Sequence Name\tCase\tscore allele1\tscore allele2\tsignal allele1\tsignal allele2\n"); $fh_title->close(); #Find variations $fh_in = new IO::File "$netoglyc_result_file" || die "Can not opennetoglyc_result_file $netoglyc_result_file : $! \n"; $fh_out = new IO::File ">$output_file" || die "Can not create final results of netoglyc $output_file: $! \n"; print "$netoglyc_result_file\n"; while($line = <$fh_in>){ #~ print "$line\n"; if($line =~ m/^$BASE_SEQ_NAME\d+\s*\S*\s*(\d*)\s*(\S*)\s*(\S*)\s*(\S*)/){ ($seq_name,$pos,$Gscore,$Iscore,$signal) = $line =~ /^($BASE_SEQ_NAME\d+)\s*\S*\s*(\d*)\s*(\S*)\s*(\S*)\s*(\S*)/; #~ print "$seq_name,$pos,$Gscore,$Iscore,$signal\n"; ($basis_name) = $h_encodingID->{$seq_name} =~ /(.*)_allele\d$/; if($line =~ s/^($BASE_SEQ_NAME\d+)/$h_encodingID->{$seq_name}/){#Substitute the ID by his coresponding name #~ my $a = $h_name_position->{$seq_name}; #~ my $b = $h_name_allele->{$seq_name} ; #~ print "hash_pos:$a\nhash_allele:$b\n"; if(($signal eq "S") or ($signal eq "T")){ #~ print ">>seq_name:*$seq_name*, pred_pos : $pos, pred_score : $Gscore , $signal\n\n"; if($h_name_var_type->{$seq_name} eq "snp"){ ### if($pos eq $h_name_position->{$seq_name}){ if($h_name_allele->{$seq_name} eq "allele1"){ $signal_allele1{$basis_name} = $signal; $score_allele1{$basis_name} = $Gscore; #~ print "===> $seq_name : $signal || $Gscore \n"; } elsif($h_name_allele->{$seq_name} eq "allele2"){ $signal_allele2{$basis_name} = $signal; $score_allele2{$basis_name} = $Gscore; #~ print "=======> $seq_name : $signal || $Gscore \n"; } else{ print "PB: $seq_name :$h_name_allele->{$seq_name}: $basis_name\n"; exit(12); } } } else{ #~ print "HERE\n"; # get the [REF/ALT-POS] information #~ print "$h_encodingID->{$name}\n"; ($info) = $h_encodingID->{$seq_name} =~ /\[([^\/]+\/.+\-\d*\-*\d+)\]_allele\d*/; # retrives the amino acids to check on ($start,$end,$end_alt) = analyseIndelChange($info); #~ print "coordinates: $start,$end,$end_alt\nPOSITION: $pos\n"; # get the signals from the reference %ref_pos = (); for($i=$start;$i<=$end;$i++){ $ref_pos{$i} = ""; #~ print "ref:$i\n"; } # get the signals from the alternative %alt_pos = (); for($i=$start;$i<=$end_alt;$i++){ $alt_pos{$i} = ""; #~ print "alt:$i\n"; } #~ print "--> POS FROM RES: $pos\n"; if((defined $ref_pos{$pos}) and ($h_name_allele->{$seq_name} eq "allele1")){ $score_allele1{$basis_name} = "indel"; if(not defined $signal_allele1{$basis_name}){ $signal_allele1{$basis_name} = 1; } else{ $signal_allele1{$basis_name} = $signal_allele1{$basis_name} + 1; # number of signals on the reference for the indel } #~ print "REF:$basis_name:$signal_allele1{$basis_name}\n"; } if((defined $alt_pos{$pos}) and ($h_name_allele->{$seq_name} eq "allele2")){ $score_allele2{$basis_name} = "indel"; if(not defined $signal_allele2{$basis_name}){ $signal_allele2{$basis_name} = 1; } else{ $signal_allele2{$basis_name} = $signal_allele2{$basis_name} + 1; # number of signals on the alternative for the indel } #~ print "ALT:$basis_name:$signal_allele2{$basis_name}\n"; } } } } else{ print "PB: NON existing sequence name for id:$1\n"; } } elsif($line =~ /netOglyc: surface prediction failed in entry/){ print "Error: NETOGLYC FAILED:\n$line\n"; exit(11); } } foreach $basis_name (keys %signal_allele1){ #~ ($basis_name) = $h_encodingID->{$seq_name} =~ /(.*)_allele\d$/; print "=====>>> $seq_name:$basis_name\n"; #~ #~ $nameA1 = $seq_name; #~ $name = $h_encodingID->{$seq_name}; #~ $name =~ s/allele1/allele2/; #~ $nameA2 = $h_encodingName->{$name}; # indel if($score_allele1{$basis_name} eq "indel" ){ print "$basis_name : $signal_allele1{$basis_name} != $signal_allele2{$basis_name}\n"; # the number of signals is different between reference and alternative at the indel part if ($signal_allele1{$basis_name} != $signal_allele2{$basis_name}){ print $fh_out ("$basis_name\tdiff\t\t\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); ##### } delete $signal_allele2{$basis_name}; } # snp else{ if((not defined $signal_allele2{$basis_name}) && (($signal_allele1{$basis_name} eq 'S') || ($signal_allele1{$basis_name} eq 'T'))){ print $fh_out ("$basis_name\tloss\t$score_allele1{$basis_name}\t\t$signal_allele1{$basis_name}\t\n"); print ("$name\tloss\t$score_allele1{$basis_name}\t$signal_allele1{$basis_name}\n"); } elsif(defined $score_allele2{$basis_name}){ $score_allele1{$basis_name} =~ s /\,/\./; $score_allele2{$basis_name} =~ s /\,/\./; $score_diff = $score_allele1{$basis_name} - $score_allele2{$basis_name}; $score_diff =~ s/-//; if(($score_diff >= $delta) && ($score_allele1{$basis_name} > $score_allele2{$basis_name})){ print $fh_out ("$basis_name\tloss?\t$score_allele1{$basis_name}\t$score_allele2{$basis_name}\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); } elsif(($score_diff >= $delta) && ($score_allele1{$basis_name} < $score_allele2{$basis_name})){ print $fh_out ("$basis_name\tgain?\t$score_allele1{$basis_name}\t$score_allele2{$basis_name}\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); } } delete $signal_allele2{$basis_name}; } } foreach $basis_name (keys %signal_allele2){ #~ ($basis_name) = $h_encodingID->{$seq_name} =~ /(.*)_allele\d$/; #~ print "=====>>> $seq_name:$basis_name\n"; #~ $nameA2 = $seq_name; #~ $name = $h_encodingID->{$seq_name}; #~ $name =~ s/allele2/allele1/; #~ $nameA1 = $h_encodingName->{$name}; #~ # indel if($score_allele2{$basis_name} eq "indel" ){ #~ print ">>>$basis_name : $signal_allele1{$basis_name} != $signal_allele2{$basis_name}\n"; # the number of signals is different between reference and alternative at the indel part if ($signal_allele1{$basis_name} != $signal_allele2{$basis_name}){ print $fh_out ("$basis_name\tdiff\t\t\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); ##### } } # snp else{ if((not defined $signal_allele1{$basis_name}) && (($signal_allele2{$basis_name} eq 'S') || ($signal_allele2{$basis_name} eq 'T'))){ print $fh_out ("$basis_name\tgain\t\t$score_allele2{$basis_name}\t\t$signal_allele2{$basis_name}\n"); } elsif(defined $score_allele1{$basis_name}){ $score_allele1{$basis_name} =~ s /\,/\./; $score_allele2{$basis_name} =~ s /\,/\./; $score_diff = $score_allele1{$basis_name} - $score_allele2{$basis_name}; $score_diff =~ s/-//; if(($score_diff >= $delta) && ($score_allele1{$basis_name} > $score_allele2{$basis_name})){ print $fh_out ("$basis_name\tloss?\t$score_allele1{$basis_name}\t$score_allele2{$basis_name}\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); } elsif(($score_diff >= $delta) && ($score_allele1{$basis_name} < $score_allele2{$basis_name})){ print $fh_out ("$basis_name\tgain?\t$score_allele1{$basis_name}\t$score_allele2{$basis_name}\t$signal_allele1{$basis_name}\t$signal_allele2{$basis_name}\n"); } } } } $fh_out->close; $fh_in->close; } 1;