#!/usr/bin/perl -w

# usage : perl indexation_fasta.pl
# Sarah Maman - 13/09/2013 - Mis en place dans le cadre du projet PhyloFish
# Copyright (C) 2013 INRA
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

use strict;
use File::Basename;

my $input_fasta          = $ARGV[0];
my $output_report        = $ARGV[1];
my $output_fasta_indexed = $ARGV[2];
my $NOM                  = $ARGV[3];
my $tailleFichier        = (stat($input_fasta))[7]; #Selon la taille du FASTA de reference, l indexation est differente



my ($nb1) = ($output_fasta_indexed=~/galaxy_dataset_(\d+)\.\S+$/);
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
my $ALL =  $mday."-".($mon+1)."_".$hour."h".$min."mn".$sec."_".$nb1."secindexPHYLO"; 



#pour eviter que la commande ne fonctionne plus si le $PATH est perdu suite a un restart de galaxy
my $BWA = '/usr/local/bioinfo/bin/bwa';
my $SAMTOOLS = '/usr/local/bioinfo/bin/samtools';
my $cmd1 = ''; 
my $cmd = ''; 

#fonction exit
sub control ()
{
if ($? != 0){print STDERR "Incident. Fin du job. Veuillez relancer votre outil\n"; exit (1);}
}


####################################################################################
#									           #			
#		INDEXATION FASTA REF						   #
#                                                                                  #
####################################################################################
#Indexation du genome de reference (ne pas placer cette commande en background)
#si le genome de référence est gros , plus de 10 MB, alors indexer avec l'option -a bwtsw
#si le genome est petit, indexer avec -a is 

if (! -e "${input_fasta}.ann"){#si le fasta est deja indexe
	if ($tailleFichier > 10485760) {
        	$cmd1 = "$BWA index -a bwtsw $input_fasta  >> ./bwaindex.log 2>&1";
print STDOUT "\nIndexation  gros fichier.\n\n";
     	}
	if ($tailleFichier < 2147483648){
        	$cmd1 = "$BWA index -a is $input_fasta  >> ./bwaindex.log 2>&1";
print STDOUT "\nIndexation  petit fichier.\n\n";
     	}
	else {
        	$cmd1 ="$BWA index $input_fasta  >> ./bwaindex.log 2>&1";
print STDOUT "\nIndexation  else \n\n";
     	}
	system $cmd1;

	#Information en STDOUT pour les biologistes
	print STDOUT "\nIndexation : $cmd1 \n\n";
	control();
}
else {print STDOUT "\nIndexation  deja realisee en amont.\n\n";}

#Recuperation du fasta et de ses indexs dans le /work/galaxy/
#-rw-r--r--  1 galaxy wbioinfo 1,7K 12 sept. 17:10 dataset_13233.dat.bwt
#-rw-r--r--  1 galaxy wbioinfo  782 12 sept. 17:10 dataset_13233.dat.pac
#-rw-r--r--  1 galaxy wbioinfo  122 12 sept. 17:10 dataset_13233.dat.ann
#-rw-r--r--  1 galaxy wbioinfo    9 12 sept. 17:10 dataset_13233.dat.amb
#-rw-r--r--  1 galaxy wbioinfo  832 12 sept. 17:10 dataset_13233.dat.sa

`cd /work/galaxy-dev/; mkdir $NOM/; cd $NOM/;`;
if (! -e "/work/galaxy-dev/indexed_fasta_$ALL.fasta"){`cp -a "$input_fasta" /work/galaxy-dev/$NOM/indexed_fasta_$ALL.fasta`;}
`cp -a "${input_fasta}.bwt" "/work/galaxy-dev/$NOM/indexed_fasta_$ALL.fasta.bwt";`;
`cp -a "${input_fasta}.pac" "/work/galaxy-dev/$NOM/indexed_fasta_$ALL.fasta.pac";`;
`cp -a "${input_fasta}.ann" "/work/galaxy-dev/$NOM/indexed_fasta_$ALL.fasta.ann";`;
`cp -a "${input_fasta}.amb" "/work/galaxy-dev/$NOM/indexed_fasta_$ALL.fasta.amb";`;
`cp -a "${input_fasta}.sa" "/work/galaxy-dev/$NOM/indexed_fasta_$ALL.fasta.sa"; `;



####################################################################################
#									           #			
#		OUTPUT avec le resultat de l INDEXATION                            #
#                                                                                  #
####################################################################################
#creation d'un fichier html
open (HTML, ">", "results_indexation_$ALL.html") or die "Cannot create results_indexation_$ALL.html";
print HTML ("<br><br>");
print HTML ("<br>Fasta file to index : $input_fasta <br>");
print HTML ("<br>Project name : $NOM <br>");
print HTML "Fasta indexation : Success.";
print HTML ("<br><br>");


####################################################################################
#									           #			
#		GALAXY OUTPUT         						   #
#                                                                                  #
####################################################################################
if (! -e "results_indexation_$ALL.html"){print STDERR "No indexation report. \n";}
else {$cmd = "(mv results_indexation_$ALL.html $output_report) >& ./cp_indexation_html.log 2>&1";
system $cmd;}

if (! -e "/work/galaxy-dev/$NOM/indexed_fasta_$ALL.fasta"){print STDERR "No fasta indexed \n";}
else {$cmd = "(cp -a /work/galaxy-dev/$NOM/indexed_fasta_$ALL.fasta $output_fasta_indexed) >& ./cp_indexation_fa.log 2>&1";
system $cmd;}
