#!/usr/bin/perl -w package runmitoprot; use strict; use IO::File; use base 'Exporter'; our @EXPORT = qw/runmitoprot extract_mitoprot_results getMitoprotVariations/; use formatAlleleSeq; #For each sequence, this function will create a new file : Mitoprot work with only one sequence in one file. sub runmitoprot($$$$$$){ my ($delta,$workpath,$outpath,$infile,$execommand,$encodingfile) = @_; my $prog = "mitoprot"; my @args = (); my ($name,$seq,$seqname,$fh_tempmitoprot,$outfile); # get the names of the parameter files my ($command) = $execommand =~ /\/([^\/]+)$/; #~ print "$command\n"; # get the base name of the input file to create the ouput file name ($outfile) = $infile =~ /\/([^\/]+)$/; $outfile =~ s/\..*/_$prog\_results.res/; $outfile = $outpath."/".$outfile; print "OUTFILE: $outfile\n"; my $titlefile = $outpath."/$prog\_title.txt"; # create the symbolic links for the executable to the working directory # otherwise the resulting file cannot be obtained @args = ("ln", "-s", $execommand, "$workpath/."); unless (-e "$workpath/$command") {system(@args) == 0 or die "system @args failed: $?"}; # go to the working directory to run the command chdir "$workpath" or die "cannot change dir rumiptoprot::runmitoprot chdir $workpath : $?"; # get the encoing information my ($h_encodingName,$h_encodingID,$h_name_allele,$h_name_position,$h_name_var_type) = getEncoding($encodingfile); # open teh input and output files my $fh_in = new IO::File $infile || die "Can not open file $infile : $!";#Data my $fh_out = new IO::File ">$workpath/$prog\_results.txt" || die "Can not create the result file ($prog\) $workpath/$prog\_results.txt: $! \n"; my $fh_stderr = new IO::File ">$workpath/$prog\_errors.txt" || die "Can not open file $workpath/$prog\_errors.txt : $!";#Errors file print $fh_stderr "\nErrors for Mitoprot: A seq must begin by a M. The folowing sequences are untreated.\n"; my $i = 1; my $k = 0; while($name = <$fh_in>){ # check teh fasta format if($name =~ /^>(.*)/){ $seqname = $1; # short version of teh name ex: _seq $fh_tempmitoprot = new IO::File ">$workpath/temp-$prog.fasta" || die "Can not create $workpath/temp-$prog.fasta: $! \n"; print $fh_tempmitoprot $name;#print the name of the SNP $seq = <$fh_in>; if($seq !~ /^M/){#A seq must begin by a M in Mitoprot II print $fh_stderr "$name"; $k++; next; } print $fh_tempmitoprot $seq;#Print the sequence of the SNP $fh_tempmitoprot->close(); # RUN the program in the working dir # th output file is localized where the command is launched (!?) @args = ("$workpath/$command", "$workpath/temp-$prog.fasta"); system(@args) == 0 or die "system @args failed: $?"; # # Placing the output file in the working directory # @args = ("mv", "temp-mitoprot.mitoprot", "$workpath/."); # system(@args) == 0 or die "system @args failed: $?"; # @args = ("rm", "-f", "$workpath/temp-mitoprot.fasta"); # system(@args) == 0 or die "system @args failed: $?"; # generate an ouput file with results from mitoprot concatenated extract_mitoprot_results($fh_out,"$workpath/temp-$prog.$prog",$seqname,$h_encodingID); $i++; } else{ print "Error file format not correct runmitoprot::runmitoprot: $infile\n"; } } $fh_in->close(); $fh_out->close(); $fh_stderr->close(); ############################################################# # compare results for the 2 alleles and interpret the results getMitoprotVariations("$workpath/$prog\_results.txt",$delta,$outfile,$titlefile); print "$k errors found. Consult $workpath/$prog\_errors.txt\n"; } # Concatenation sub extract_mitoprot_results($$$$){ my ($fh_out, $resfile,$seqname,$h_encodingID) = @_; my $score; my $found = 0; my $fh_in = new IO::File $resfile || die "Can not open the output file of mitoprot $resfile : $! \n"; while(<$fh_in>){ if($_ =~ m/^DFM\s*:\s*.*\s*(\d\.\d*)/){ # Search a line with DFM (Discriminant function for mitochondrial proteins) $score = $1; } if($_ =~ m/This protein is probably imported in mitochondria./){ # Check if the protein is imported to the mitochondria. $found = 1; # $found is true } } if(defined $h_encodingID->{$seqname}){ if($found == 1){ # $found = true => this protein is imported to mitochondria. if(defined($score)){ print $fh_out ">$h_encodingID->{$seqname}\timported\n"."$score\n"; } } else{ if(defined($score)){ print $fh_out ">$h_encodingID->{$seqname}\tnot imported\n"."$score\n"; } } } else{ print "PB: $seqname is not a short name found to be encoded: function runmitoprot::runmitoprot\n"; exit(12); } $fh_in->close(); } #Find the transcript who win/lost mitochondrial signal sub getMitoprotVariations($$$$){ my ($temp_resufile,$delta,$outfile,$titlefile) = @_; my $fh_in = new IO::File $temp_resufile || die "Can not create the result file (mitoprot) $temp_resufile: $! \n"; my $fh_out = new IO::File ">$outfile" || die "Can not create the file $outfile : $! \n"; my $fh_title = new IO::File ">$titlefile" || die "Can not open $titlefile : $! \n"; print $fh_title ("Sequence Name\tCase\tscore allele1\tscore allele2\n"); $fh_title->close(); my ($oname, $allele1, $allele2,$score,$line,$seq_name); my $found = 0; my $vers = 0; while(<$fh_in>){ if($_ =~ m/>(.*)_allele.*?(imported|not imported)/){ my $type = $2; my $name = $1; if($type eq "imported"){ $found++; } else{ if($found == 0){ $vers++; } } $line = <$fh_in>;#Next line : score. $allele2 = $line; chomp $allele2; # same procedure for SNP and INDELS!!!! if((defined($oname)) && ($oname eq $name)){ # If previous name already exist (2 alleles) if($found == 2){ # If the 2 alleles are imported $score = $allele1 - $allele2;#Difference between the 2 alleles if($score >= $delta){ print $fh_out "$oname\tloss?\t$allele1\t$allele2\n";# - scores: allele1=$allele1 allele2=$allele2\n";#Mean : allele 1 have better signal than allele 2 } if($score <= -$delta){ print $fh_out "$oname\tgain?\t$allele1\t$allele2\n";# - scores: allele1=$allele1 allele2=$allele2\n";#Mean : allele 2 have better signal } $found = 0; } elsif($found == 1){#If 1 allele is non imported if($vers == 1){ print $fh_out "$oname\tgain\t$allele1\t$allele2\n"; } else{ print $fh_out "$oname\tloss\t$allele1\t$allele2\n"; } $vers = 0; $found = 0; } } else{#new name (first allele) $allele1 = $line; chomp $allele1; } $oname = $name; } } $fh_in->close(); $fh_out->close(); } 1;