#!/usr/bin/perl -w package runnetoglyc; use strict; use IO::File; use base 'Exporter'; our @EXPORT = qw/netoglyc_extract_results/; use formatAlleleSeq; sub netoglyc_extract_results($$$$$){ my ($delta, $netoglyc_result_file, $name_encoding_file, $output_file, $BASE_SEQ_NAME) = @_; my ($fh_in,$fh_out,$fh_tmp,$allele,$name,$line,$seq_name,$pos,$Gscore,$Iscore,$signal); my ($nameA1,$nameA2,$score_diff,$score,$basis_name); my %score_allele1 = (); # key : initial complete name => score value (for the alléle 1 position) my %score_allele2 = (); # key : initial complete name => signal value (for the alléle 1 position : ++) my %signal_allele1 = (); # key : initial complete name => score value (for the alléle 2 position) my %signal_allele2 = (); # key : initial complete name => signal value (for the alléle 2 position : ++) my ($h_encodingName,$h_encodingID,$h_name_allele,$h_name_position) = getEncoding($name_encoding_file); my $prog = "netoglyc"; my $outpath = $output_file; $outpath =~ s/\/([^\/]+)$//; my $fh_title = new IO::File ">$outpath/$prog\_title.txt" || die "Can not open $outpath/$prog\_title.txt : $! \n"; print $fh_title ("Sequence Name\tCase\tscore allele1\tscore allele2\tsignal allele1\tsignal allele2\n"); $fh_title->close(); #Find variations $fh_in = new IO::File "$netoglyc_result_file" || die "Can not opennetoglyc_result_file $netoglyc_result_file : $! \n"; $fh_out = new IO::File ">$output_file" || die "Can not create final results of netoglyc $output_file: $! \n"; while($line = <$fh_in>){ #~ print "$line\n"; if($line =~ m/^$BASE_SEQ_NAME\d+\s*\S*\s*(\d*)\s*(\S*)\s*(\S*)\s*(\S*)/){ ($seq_name,$pos,$Gscore,$Iscore,$signal) = $line =~ /^($BASE_SEQ_NAME\d+)\s*\S*\s*(\d*)\s*(\S*)\s*(\S*)\s*(\S*)/; #~ print "$seq_name,$pos,$Gscore,$Iscore,$signal\n"; if($line =~ s/^($BASE_SEQ_NAME\d+)/$h_encodingID->{$seq_name}/){#Substitute the ID by his coresponding name my $a = $h_name_position->{$seq_name}; my $b = $h_name_allele->{$seq_name} ; print "hash_pos:$a\nhash_allele:$b\n"; print ">>seq_name:*$seq_name*, pred_pos:$pos, pred_score:$Gscore,$Iscore, $signal\n\n"; if($pos eq $h_name_position->{$seq_name}){ if($h_name_allele->{$seq_name} eq "allele1"){ $signal_allele1{$seq_name} = $signal; $score_allele1{$seq_name} = $Gscore; } elsif($h_name_allele->{$seq_name} eq "allele2"){ $signal_allele2{$seq_name} = $signal; $score_allele2{$seq_name} = $Gscore; } else{ print "PB: $seq_name :$h_name_allele->{$seq_name}\n"; exit(12); } } } else{ print "PB: NON existing sequence name for id:$1\n"; } } elsif($line =~ /netOglyc: surface prediction failed in entry/){ print "Error: NETOGLYC FAILED:\n$line\n"; exit(11); } } foreach $seq_name (keys %signal_allele1){ ($basis_name) = $h_encodingID->{$seq_name} =~ /(.*)_allele\d$/; $nameA1 = $seq_name; $name = $h_encodingID->{$seq_name}; $name =~ s/allele1/allele2/; $nameA2 = $h_encodingName->{$name}; if(not defined $signal_allele2{$nameA2}){ print $fh_out ("$basis_name\tloss\t$score_allele1{$nameA1}\t\t$signal_allele1{$nameA1}\t\n"); #~ print ("$name\tloss\t$score_allele1{$nameA1}\t$signal_allele1{$nameA1}\n"); } else{ $score_allele1{$nameA1} =~ s /\,/\./; $score_allele2{$nameA2} =~ s /\,/\./; $score_diff = $score_allele1{$nameA1} - $score_allele2{$nameA2}; $score_diff =~ s/-//; if($score_diff >= $delta && $score_allele1{$nameA1} > $score_allele2{$nameA2}){ print $fh_out ("$basis_name\tloss?\t$score_allele1{$nameA1}\t$score_allele2{$nameA2}\t$signal_allele1{$nameA1}\t$signal_allele2{$nameA2}\n"); } elsif($score_diff >= $delta && $score_allele1{$nameA1} < $score_allele2{$nameA2}){ print $fh_out ("$basis_name\tgain?\t$score_allele1{$nameA1}\t$score_allele2{$nameA2}\t$signal_allele1{$nameA1}\t$signal_allele2{$nameA2}\n"); } } delete $signal_allele2{$nameA2}; } foreach $seq_name (keys %signal_allele2){ ($basis_name) = $h_encodingID->{$seq_name} =~ /(.*)_allele\d$/; $nameA2 = $seq_name; $name = $h_encodingID->{$seq_name}; $name =~ s/allele2/allele1/; $nameA1 = $h_encodingName->{$name}; if(not defined $signal_allele1{$nameA1}){ print $fh_out ("$basis_name\tgain\t\t$score_allele2{$nameA2}\t\t$signal_allele2{$nameA2}\n"); } else{ $score_allele1{$nameA1} =~ s /\,/\./; $score_allele2{$nameA2} =~ s /\,/\./; $score_diff = $score_allele1{$nameA1} - $score_allele2{$nameA2}; $score_diff =~ s/-//; if($score_diff >= $delta && $score_allele1{$nameA1} > $score_allele2{$nameA2}){ print $fh_out ("$basis_name\tloss?\t$score_allele1{$nameA1}\t$score_allele2{$nameA2}\t$signal_allele1{$nameA1}\t$signal_allele2{$nameA2}\n"); } elsif($score_diff >= $delta && $score_allele1{$nameA1} < $score_allele2{$nameA2}){ print $fh_out ("$basis_name\tgain?\t$score_allele1{$nameA1}\t$score_allele2{$nameA2}\t$signal_allele1{$nameA1}\t$signal_allele2{$nameA2}\n"); } } } $fh_out->close; $fh_in->close; } 1;