#! /usr/bin/perl -w =pod =head1 NAME runDisorder package =head1 SYNOPSIS runDisorder::disorder_extract_result($workpath, $outpath, $infile, $execommand, $encodingfile, $param); =head1 OPTIONS $workpath : work directory where DisEMBL is launched $outpath : output directory $infile : protein fasta path $execommand : program path $encodingfile : path of encoding file $param : program parameters =head1 DESCRIPTION This script use fasta of proteins involved by non synonymous coding SNP to launch DisEMBL and predict effect of mutation in the intrinseq protein disorder. =head1 VERSION Version 1 =head1 DATE 23/04/2012 =head1 AUTHORS Rachel Legendre (rlegendre@jouy.inra.fr) =cut package runDisorder; use strict; use Bio::EnsEMBL::Registry; use Data::Dumper; use Getopt::Long; use Pod::Usage; use formatAlleleSeq; use base 'Exporter'; our @EXPORT = qw/disorder_extract_result/; #$param par default sub disorder_extract_result ($$$$$$){ my ($workpath, $outpath, $infile, $execommand, $encodingfile, $param) = @_; my ($fh_in,$fh_out,$fh_temp_file,$allele,$name,$line,$basis_name,$disorder_result_file); my @alt =(); my @ref = (); my %pos_allele1 = (); my %pos_allele2 = (); my $prog = "disembl"; my @args = (); my ($seqname,$outfile, $id, %result); # get the names of the parameter files my ($file) = $infile =~ /\/([^\/]+)$/; my ($command) = $execommand =~ /\/([^\/]+)$/; #~ $param |= "8 8 4 1.2 1.4 1.2"; my $temp_file = $workpath."/".$prog."temp.dis"; # final output file # get the base name of the input file to create the ouput file name ($disorder_result_file) = $infile =~ /\/([^\/]+)$/; $disorder_result_file =~ s/\..*/_$prog\_results.res/; $disorder_result_file = $outpath."/".$disorder_result_file; # create the symbolic links for the executable to the working directory # otherwise the resulting file cannot be obtained @args = ("ln", "-s", $execommand, "$workpath/."); #~ print "@args\n"; unless (-e "$workpath/$command") {`ln -s $execommand* $workpath/.`}; # copy input file to working directory @args = ("cp", "$infile", "$workpath/."); unless (-e "$workpath/$file") {system(@args) == 0 or die "system @args failed: $?"}; # go to the working directory to run the command chdir "$workpath" or die "cannot change dir disorder::disorder chdir $workpath : $?"; # get the encoing information my ($h_encodingName,$h_encodingID,$h_name_allele,$h_name_position,$h_name_var_type) = getEncoding($encodingfile); # RUN the program in the working dir # th output file is localized where the command is launched (!?) #~ @args = ("python2.6 $command ", "$param ", "$workpath/$file ", "> $temp_file"); #~ `export PYTHONPATH=/usr/local/bioinfo/src/biopython/latest; python $command $param $workpath/$file > $temp_file`; `python $command $param $workpath/$file > $temp_file`; my $fh_title = new IO::File ">$outpath/$prog\_title.txt" || die "Can not open $outpath/$prog\_title.txt : $! \n"; print $fh_title ("Sequence Name\tConsequence\tA1_info\tA2_info\n"); $fh_title->close(); $fh_temp_file = new IO::File $temp_file, "r" || die "Can not open $temp_file : $!"; $fh_out = new IO::File ">$disorder_result_file" || die "Can not create the result file ($prog) $outpath/$prog\_results.txt: $! \n"; while (my $li = <$fh_temp_file>){ chomp($li); # we get the encoding name $id = $li ; $id =~ s/> (_seq[0-9]*)(_REM465\s).*$/$1/g; $seqname = $id; if(defined $h_encodingID->{$seqname}){ if($h_name_var_type->{$seqname} eq "snp"){ ### ($basis_name,$allele) = $h_encodingID->{$seqname} =~ /(.*)_(allele\d)$/; if($h_name_allele->{$seqname} eq "allele1"){ $pos_allele1{$basis_name} = $seqname; } elsif($h_name_allele->{$seqname} eq "allele2"){ $pos_allele2{$basis_name} = $seqname; } else{ print "PB: $name :$h_name_allele->{$name}\n"; exit(12); } $h_encodingID->{$seqname} =~ s/(.*)_allele\d/$1/g; if ($li =~ /_seq[0-9]*[13579]_REM465/){ my $res = $li; $res =~ s/> _seq[0-9]*[13579](_REM465\s)(\d+\-\d+(,\s.*)?)$/$2/g; if ($res =~ m/seq/) { $res = "0"; }else{ $res =~ s/\s//g; } @ref = split(",",$res); $result{$h_encodingID->{$seqname}}{'ref'} = join(", ", @ref); }elsif ($li =~ /(_seq[0-9]*[02468]_REM465)/) { my $res = $li; $res =~ s/> _seq[0-9]*[02468](_REM465\s)(\d+\-\d+(,\s.*)?)$/$2/g; if ($res =~ m/seq/) { $res = "0"; }else{ $res =~ s/\s//g; } @alt = split(",",$res); $result{$h_encodingID->{$seqname}}{'alt'} = join(", ", @alt); } if (defined $result{$h_encodingID->{$seqname}}) { my $n = @ref; my $m = @alt; ## If mutated protein get more disorder than wild-type protein if ($n < $m){ $result{$h_encodingID->{$seqname}}{'cons'} = "create disorder"; ## If wild-type protein get more disorder than mutated protein }elsif ($n > $m){ $result{$h_encodingID->{$seqname}}{'cons'} = "remove disorder"; ## If disorder domains number are equal }else{ my $list_ref = join(";", @ref); my $list_alt = join(";", @alt); # If disorder domains list are equal if ($list_ref eq $list_alt){ $result{$h_encodingID->{$seqname}}{'cons'} = "none"; }else{ $result{$h_encodingID->{$seqname}}{'cons'} = "move limits disorder"; } } } } } } $fh_temp_file->close(); #write results in outfile foreach my $id_seq (keys %result){ print $fh_out "$id_seq\t$result{$id_seq}{'cons'}\t$result{$id_seq}{'ref'}\t$result{$id_seq}{'alt'}\n"; #~ foreach my $res (keys %{$result{$id_seq}}){ #~ print $fh_out $result{$id_seq}{'cons'}; #~ print $fh_out $result{$id_seq}{'ref'}; #~ print $fh_out $result{$id_seq}{'alt'}; #~ } #~ print $fh_out "\n"; } $fh_out->close(); } 1;