#!/usr/bin/perl -w

=pod

=head1  NAME
formatSNPeffect-protseq.pl
 
=head1  SYNOPSIS

formatSNPeffect-protseq.pl -arg1 <> -arg2 <> -arg3 <> -arg4 <>

=head1  OPTIONS

  --infile string, the path of the input file SNP_PREDICTOR_EFFECT FORMAT FILE
  --outdir string, number of sequences per fasta file for out_fastas_Nseq_dir_protlenmin directory NUMBER 
  --species string, specie name from ENSEMBL names (ex cow, sus crofa...) STRING
  --registry_file registry file to local ensembl database FILE WITH PATH
 
=head1 DESCRIPTION

formatSNPeffect-protseq.pl - This program is part of a pipeline of programs for SNP annotation. It formats the rsID into Ensembl format.
							

WARNING:
THE FASTA FILES SHOULD HAVE FORMAT: name.fasta
ALL FILES MUST HAVE ONLY 1 EXTENSION: .txt or .tab....


=head1 DATE

07/06/2012

=head1 AUTHORS

Sabrina Rodriguez

=cut

use strict;

# find the absolute path to the local library
use FindBin;
# return the absolute path to the local library
use lib "$FindBin::RealBin/../lib";
#~ use lib '/usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/lib';

use Getopt::Long;
use Pod::Usage;
use formatAlleleSeq;


#perl formatRSIDtoENSEMBL.pl -infile /home/sigenae/work/Sabrina/RSID_cow_test/test2.txt -registry_file /usr/local/bioinfo/src/ensembl-api/variant_effect_predictor.registry -species cow -outdir /home/sigenae/work/Sabrina/test_rs 

############################ OPTIONS / PARAMETERS ############################

my @getopt_args = (
                    '-infile=s'  ,             
                    '-registry_file=s'  ,
                    '-species=s'  ,                       
                    '-outdir=s'                                        
                  );

my %options = ();

unless ( GetOptions( \%options, @getopt_args ) ) {
  usage();
}

sub usage {
  exec "pod2text $0";
  exit( 1 );
}

usage() if ( !exists $options{'infile'} );
usage() if ( !exists $options{'registry_file'} );
usage() if ( !exists $options{'species'} );
usage() if ( !exists $options{'outdir'} );

############################ PROGRAM ############################

my $species = $options{'species'};
my $infile = $options{'infile'};
my $registry_file = $options{'registry_file'};
my $outdir = $options{'outdir'};

getEnsemblformat_from_rsID($infile, $outdir ,$registry_file, $species);
