#!/usr/bin/perl -w package runnetcglyc; use base 'Exporter'; our @EXPORT = qw/netcglyc_extract_results/; use strict; use IO::File; use formatAlleleSeq; # extract results after running netcglyc applied to 1 result file sub netcglyc_extract_results($$$$){ my ($delta, $netcglyc_result_file, $name_encoding_file, $output_file) = @_; print "runnetcglyc::netcglyc_extract_results: Searching C-mannosilation loss or gain between 2 alleles:\n"; print "Name\tgain/loss/diff\tscore_allele1\tscore_allele2...\n"; my $prog = "netcglyc"; my %netcglyc_res = (); my ($i,$fh_in,$name,$line,$pos,$score,$tag,$allele,$basis_name,$diff_score,$fh_out); my %score_allele1 = (); my %score_allele2 = (); my %tag_allele1 = (); my %tag_allele2 = (); # get the encoding names my ($h_encodingName,$h_encodingID,$h_name_allele,$h_name_position) = getEncoding($name_encoding_file); $fh_out = new IO::File ">$output_file" || die "Can not open $output_file : $! \n"; my $outpath = $output_file; $outpath =~ s/\/([^\/]+)$//; my $fh_title = new IO::File ">$outpath/$prog\_title.txt" || die "Can not open $outpath/$prog\_title.txt : $! \n"; print $fh_title ("Sequence Name\tCase\tscore allele1\tscore allele2\n"); $fh_title->close(); # get the C mannolisation predictions # extract name, position and score of C manosylation. print "FILE::*$netcglyc_result_file*\n"; $fh_in = new IO::File $netcglyc_result_file || die "Can not open file $netcglyc_result_file : $! \n"; $/="\n"; while($line=<$fh_in>){ if(not $line =~ /^\#/){ ($name,$pos,$score,$tag) = $line=~/([^\s]+)\s*[^\s]+\s*[^\s]+\s*([^\s]+)\s*[^\s]+\s*([^\s]+)\s*[^\s]+\s*([^\n]+)\n/; print "BASE_NAME: $name, POSITION: $pos, SCORE: $score, TAG_W: $tag\n"; ($basis_name,$allele) = $h_encodingID->{$name} =~ /(.*)_(allele\d)$/; print "NAME_POS: $h_name_position->{$name}, BASIS_NAME: *$basis_name*, ALLELE_NUM: $allele, ENCODING: $h_encodingID->{$name}\n"; if(defined $h_encodingID->{$name}){ if(defined $h_encodingID->{$name} && ($h_name_position->{$name} eq $pos) && ($allele eq "allele1")){ $score_allele1{$basis_name} = $score; $tag_allele1{$basis_name} = $tag; } elsif(defined $h_encodingID->{$name} && ($h_name_position->{$name} eq $pos) && ($allele eq "allele2")){ $score_allele2{$basis_name} = $score; $tag_allele2{$basis_name} = $tag; } } else{ print "runnetcglyc::netcglyc_extract_results unkown name: $name\n"; exit(11); } } } $fh_in->close(); # to analyse the difference between the 2 alleles foreach $name (keys %score_allele1){ if((not defined $score_allele2{$name}) && ($tag_allele1{$name} eq 'W')){ print $fh_out ("$name\tloss\t$score_allele1{$name}\t\n"); print (">>HEHO: $name\tloss\t$score_allele1{$name}\t\n"); } elsif(defined $score_allele2{$name}){ print "SCORES: $score_allele1{$basis_name} - $score_allele2{$basis_name}\n"; $diff_score = $score_allele1{$basis_name} - $score_allele2{$basis_name}; $diff_score =~ s/-//; print "DIIF_SCORE: $diff_score\n"; if(($diff_score >= $delta) && ($score_allele1{$basis_name} > $score_allele2{$basis_name})){ print $fh_out ("$name\tloss?\t$score_allele1{$name}\t$score_allele2{$name}\n"); } elsif(($diff_score >= $delta) && ($score_allele1{$basis_name} < $score_allele2{$basis_name})){ print $fh_out ("$name\tgain?\t$score_allele1{$name}\t$score_allele2{$name}\n"); } delete $score_allele2{$name}; } } foreach $name (keys %score_allele2){ if((not defined $score_allele1{$name}) && ($tag_allele2{$name} eq 'W')){ print ">>HEHA: $score_allele1{$name} | $tag_allele2{$name} | $score_allele2{$name} | $tag_allele1{$name}\n"; print $fh_out ("$name\tgain\t\t$score_allele2{$name}\n"); } elsif(defined $score_allele1{$name}){ $diff_score = $score_allele1{$basis_name} - $score_allele2{$basis_name}; $diff_score =~ s/-//; if(($diff_score >= $delta) && ($score_allele1{$basis_name} > $score_allele2{$basis_name})){ print $fh_out ("$name\tloss?\t$score_allele1{$name}\t$score_allele2{$name}\n"); } elsif(($diff_score >= $delta) && ($score_allele1{$basis_name} < $score_allele2{$basis_name})){ print $fh_out ("$name\tgain?\t$score_allele1{$name}\t$score_allele2{$name}\n"); } } } $fh_out->close; } #~ #Find variations #~ $tmp = new IO::File "$path/fic/temp-netcglyc_$date" || die "Can not open netcglyc temp file : $! \n"; #~ my $netcglyc = new IO::File ">output_file" || die "Can not create file output_file: $! \n"; #~ my %a1; #~ my %a2; #~ my %check; #~ my $allele = ""; #~ my $name = ""; #~ my $c = 2; #~ for($i=0;$i<=$#$netcglyc_res;$i++);{ #~ while(<$tmp>){ #~ my $line = $netcglyc_res[$i]; #~ if($line =~ s/^(\d*)/$id{$1}/){#Substitute the ID by his coresponding name #~ if($line =~ m/^(\S*)_allele(\d)\s*(\d*)\s(\S*)\s(\S*)/){ #~ if($2 eq $allele){ #~ if($c == 0){#Same allele1 #~ $allele = $2; #~ if($5 eq "W"){ #~ my $position = $3; #~ my $score = $4; #~ $a1{$position}.=$score; #~ $check{$position}.="1"; #~ } #~ } #~ elsif($c == 1){#Same allele2 #~ $allele = $2; #~ if($5 eq "W"){ #~ my $position = $3; #~ my $score = $4; #~ $a2{$position}.= $score; #~ $check{$position}.="2"; #~ } #~ } #~ } #~ else{ #~ if($c == 0){#Other allele #~ $c++; #~ $allele = $2; #~ if($5 eq "W"){ #~ my $position = $3; #~ my $score = $4; #~ $a2{$position}.=$score; #~ $check{$position}.="2"; #~ } #~ } #~ elsif($c == 1){#Other transcript #~ compare($netcglyc,$delta,\%a1,\%a2,\%check,$name); #~ %a1=(); #~ %a2=(); #~ %check=(); #~ $allele = $2; #~ $name = $1; #~ if($5 eq "W"){ #~ my $position = $3; #~ my $score = $4; #~ $a1{$position}.=$score; #~ $check{$position}.="1"; #~ } #~ $c = 0; #~ } #~ elsif($c == 2){#Initialisation #~ $name = $1; #~ $allele = $2; #~ if($5 eq "W"){ #~ my $position = $3; #~ my $score = $4; #~ $a1{$position}.=$score; #~ $check{$position}.="1"; #~ } #~ $c = 0; #~ } #~ } #~ } #~ } #~ } #~ compare($netcglyc,$delta,\%a1,\%a2,\%check,$name);#For the last transcript #~ $tmp->close(); #~ $netcglyc->close(); #~ } #~ #~ sub compare($$$$){ #~ #~ my ($netcglyc,$delta,$ref1,$ref2,$ref3,$name) = @_; #~ my %a1=%$ref1; #~ my %a2=%$ref2; #~ my %check=%$ref3; #~ foreach my $k (keys(%check)){ #~ my $v = $check{$k}; #~ if($v eq "12"){ #~ my $diffscore = $a1{$k}-$a2{$k}; #~ if($diffscore >= $delta){ #~ print $netcglyc "$name : Signal lost at position $k. Score allele 1 = $a1{$k} - Score allele 2 = $a2{$k}\n"; #~ } #~ elsif($diffscore <= -$delta){ #~ print $netcglyc "$name : Signal gain at position $k. Score allele 1 = $a1{$k} - Score allele 2 = $a2{$k}\n"; #~ } #~ } #~ elsif($v eq "1"){ #~ print $netcglyc "$name : Signal lost at position $k . Score : $a1{$k}\n"; #~ } #~ elsif($v eq "2"){ #~ print $netcglyc "$name : Signal gain at position $k. Score : $a2{$k}\n"; #~ } #~ } #~ } 1;