#!/usr/bin/perl -w

=pod

=head1  NAME
extract_netoglyc_results.pl
 
=head1  SYNOPSIS

extract_netoglyc_results.pl -arg1 <> -arg2 <> -arg3 <> -arg4 <>

=head1  OPTIONS

  --infile string, the path to the protein sequence query file - !!!the sequence name should be the short name!!!
  --encodingfile string, the path of the file containing the encoded protein names (long name "..._allele1" <-> short name "_seq")
  --outputfile string, the path of the output file  for parsed results
  --delta string, value between 0 and 1. Discriminant for the score comparison. (0.5 per default)
  --base_name string, "base name" for sequence shorter name version (_seq)

=head1 DESCRIPTION

extract_netoglyc_results.pl - This program is part of a pipeline of programs for SNP annotation.
							It uses a program called netoglyc and predicts O-glycosylation sites in mammalian protein on amino acid
							http://www.cbs.dtu.dk/services/NetOGlyc/
							if the 2 alleles of a protein show a different signal then there is loss or gain of signal.
							if allele1 has a prediction and allele2 doesn't, then there is loss of signal.
							if allele2 has a prediction and allele1 doesn't, then there is gain of signal.
							Moreover, the delta of the score between allele1 and 2 is measured. If the delta is > to the delta set by
							the user, then los? or gain? are retrieved.

=head1 DATE

20/02/2012

=head1 AUTHORS

Sabrina Rodriguez

=cut

use strict;

# find the absolute path to the local library
use FindBin;
# return the absolute path to the local library
use lib "$FindBin::RealBin/../lib";
#~ use lib '/usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/lib';

use Getopt::Long;
use Pod::Usage;
use formatAlleleSeq;
use runnetoglyc;

#~ /usr/local/bioinfo/src/netOglyc/current/netOglyc /home/sigenae/work/Sabrina/fic/res/list_snps_coded6_1.fasta > /home/sigenae/work/Sabrina/fic/netoglyc_result.txt

#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile /home/sigenae/work/Sabrina/fic/netoglyc_result.txt --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/fic/saved_names.txt --outputfile /home/sigenae/work/Sabrina/fic/netoglyc_formatted.txt --base_name "_seq"

#~ /usr/local/bioinfo/src/netOglyc/current/netOglyc /home/sigenae/work/Sabrina/protseq_netoglycPB_input/SNP_proteins_encoded3000_1.fasta > /home/sigenae/work/Sabrina/fic/netoglycPB_result.txt

#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile /home/sigenae/work/Sabrina/fic/netoglycPB_result.txt --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/fic/saved_names.txt --outputfile /home/sigenae/work/Sabrina/fic/netoglycPB.res --base_name "_seq"


#~ /usr/local/bioinfo/src/netOglyc/current/netOglyc /home/sigenae/work/Sabrina/protseq_Horse/LESS-4000/SNP_proteins_encoded3000_1_l4000.fasta > /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_result.txt

#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_result.txt --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/protseq_Horse/names_encoding.txt --outputfile /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_parsed.res --base_name "_seq"

#~ /usr/local/bioinfo/src/netOglyc/current/netOglyc /home/sigenae/work/Sabrina/protseq_Horse/protNseqSizemin/list_snps_coded50_736.fasta > /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_result.txt

#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_result.txt --delta 0.5 --encodingfile /home/sigenae/work/Sabrina/protseq_Horse/names_encoding.txt --outputfile /home/sigenae/work/Sabrina/protseq_Horse/netoglyc_parsed.res --base_name "_seq"

#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile=/work/sigenae/vmergatisdev/output_repository/SR_netoglyc/162_default/output_netoglyc/SNP_proteins_encoded50_3.SR_netoglyc.res --encodingfile=/work/sigenae/vmergatisdev/output_repository/SR_formatSNPeffect-protseq/96_default/names_encoding.txt --delta=0.5 --base_name=_seq  --outputfile=/home/sigenae/work/Sabrina/TEST_suite/netoglyc_res.txt

# perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/extract_netoglyc_results.pl --infile=/home/sigenae/work/Sabrina/horse_outputs//SNP_proteins_encoded50_3.SR_netoglyc.res --encodingfile=/work/sigenae/vmergatisdev/output_repository/SR_formatSNPeffect-protseq/162_default/names_encoding.txt --delta=0.5 --base_name=_seq --outputfile=/home/sigenae/work/Sabrina/horse_outputs/SNP_proteins_encoded50_3.SR_netoglyc_parsed.res 

############################ OPTIONS / PARAMETERS ############################

my @getopt_args = (
                    '-infile=s'  ,
                    '-encodingfile=s'  ,
                    '-outputfile=s'  ,,    
                    '-delta=s',
                    '-base_name=s'
                  );

my %options = ();

unless ( GetOptions( \%options, @getopt_args ) ) {
  usage();
}

sub usage {
  exec "pod2text $0";
  exit( 1 );
}

usage() if ( !exists $options{'infile'} );
usage() if ( !exists $options{'encodingfile'} );
usage() if ( !exists $options{'outputfile'} );
usage() if ( !exists $options{'delta'} );
usage() if ( !exists $options{'base_name'} );

############################ PROGRAM ############################

my $encodingfile = $options{'encodingfile'};
my $infile = $options{'infile'};
my $outputfile = $options{'outputfile'};
my $delta = $options{'delta'};
my $base_name = $options{'base_name'};

print "OK\n";

netoglyc_extract_results($delta,$infile,$encodingfile,$outputfile,$base_name);



