#!/usr/bin/perl -w package runESEfinder; use Bio::Seq; use Bio::PrimarySeq; use Bio::SeqFeatureI; use Bio::Tools::Analysis::DNA::ESEfinder; use strict; use IO::File; use base 'Exporter'; our @EXPORT = qw/runESEfinder ESEfind/; use formatAlleleSeq; # http://search.cpan.org/~cjfields/BioPerl-1.6.901/Bio/Tools/Analysis/DNA/ESEfinder.pm #For each sequence, this function will create a new file sub runESEfinder($$$$$$){ my ($delta,$workpath,$outpath,$infile,$encodingfile,$BASE_SEQ_NAME) = @_; my @args = (); my ($name,$seq,$seqname,$fh_temp,$outfile,$len); print "infile:$infile\n"; my $prog = "ESEfinder"; my $result_file = "$outpath/$prog\_output.txt"; my $input_file = "temp-".$prog.".fasta"; my $output_file = "temp-".$prog.".".$prog; # get the base name of the input file to create the ouput file name ($outfile) = $infile =~ /\/([^\/]+)$/; $outfile =~ s/\..*/_$prog\_results.res/; $outfile = $outpath."/".$outfile; print "OUTFILE: $outfile\n"; # get the encoing information my ($h_encodingName,$h_encodingID,$h_name_allele,$h_name_position,$h_name_var_type) = getEncoding($encodingfile); # open the input and output files my $fh_in = new IO::File $infile || die "Can not open file $infile : $!";#Data my $fh_out = new IO::File ">>result_file" || die "Can not create the result file $result_file : $! \n"; while($name = <$fh_in>){ # check the fasta format print "$name"; if($name =~ /^>.*/){ ## Temporary sequence: # protein sequence to temporary file / 1 sequence per file $fh_temp = new IO::File ">$workpath/temp-$prog.fasta" || die "Can not create $workpath/temp-$prog.fasta: $! \n"; print $fh_temp $name;#print the name of the SNP $seq = <$fh_in>; print $fh_temp $seq;#Print the sequence of the SNP $fh_temp->close(); # back transeq : from protein sequence back to transcript sequence (DNA) @args = ("backtranseq", "-sequence" , "$workpath/temp-$prog.fasta", "-auto", "", "-outfile", "$workpath/temp_adn-$prog.fasta"); system("backtranseq -sequence $workpath/temp-$prog.fasta -auto -outfile $workpath/temp_adn-$prog.fasta") ;# == 0 or die "system @args failed runyinoyang: $?"; # FUNCTION to get the ESEfinder prediction concerning the transcript (DNA!!!) sequence ESEfind("$workpath/temp_adn-$prog.fasta",$fh_out,$h_name_position,); } } } sub ESEfind($$$){ my ($infile,$fh_out,$h_name_position) = @_; my ($sequence, $name); # get the sequence my $fh_in = new IO::File "$infile" or die "cannot open file $infile, : $!\n"; $/=">"; while(<$fh_in>){ ($name,$sequence) = $_=~ /([^\n]+)\n([^>]+)/; } $sequence =~ s/\n//g; $fh_in->close(); #~ my ($name) = <$fh_in>; #~ chomp($name); #~ $name =~ s/>//; #~ print "NAME:*$name*\n"; #~ my ($sequence) = <$fh_in>; #~ print "sequence:$_ *$sequence*\n"; #~ print "$sequence\n"; # create the sequence object my $seq; # a Bio::PrimarySeqI or Bio::SeqI object #~ $seq = Bio::PrimarySeq->new( #~ -primary_id => 'test', #~ -seq=>'atgcatgctaggtgtgtgttttgtgggttgtactagctagtgat'. #~ -alphabet=>'dna'); $seq = Bio::PrimarySeq->new( -primary_id => $name, -seq=> $sequence. -alphabet=>'dna'); my $ese_finder = Bio::Tools::Analysis::DNA::ESEfinder->new(-seq => $seq); # run ESEfinder prediction on a DNA sequence $ese_finder->run(); die "Could not get a result" unless $ese_finder->status =~ /^COMPLETED/; print $ese_finder->result; # print raw prediction to STDOUT #~ foreach my $feat ( $ese_finder->result('Bio::SeqFeatureI') ) { # do something to SeqFeature # e.g. print as GFF #~ print $feat->gff_string, "\n"; #~ # or store within the sequence - if it is a Bio::SeqI #~ $seq->add_SeqFeature($feat) #~ } }