#!/usr/bin/perl

=pod

=head1  NAME

join_annot_results.pl
 
=head1  SYNOPSIS

join_annot_results.pl

=head1  OPTIONS

  --infile_dir string, didrectory containing all the results and title files from the annotation pipeline
  --outfile_dir string, didrectory which will contain the result file

=head1 DESCRIPTION

join_annot_results.pl

=head1 VERSION

Version 1

=head1 DATE

19/03/12

=head1 AUTHORS

Sabrina Rodriguez (sabrina.rodriguez@jouy.inra.fr)

=cut


use strict;
use Getopt::Long;
use IO::File;

#use ParamParser;

#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/join_annot_results.pl --infile_dir /home/sigenae/work/Sabrina/RESULTS_ALL --outfile_dir /home/sigenae/work/Sabrina/FINAL

#~ perl /usr/local/bioinfo/src/ergatisdev/current/bin/AnnotationPipelines/bin/join_annot_results.pl --infile_dir /home/sigenae/work/Sabrina/result_horse_all_components --outfile_dir /home/sigenae/work/Sabrina/join_horse

############################ OPTIONS / PARAMETERS ############################

my @getopt_args = (
					'-infile_dir=s',					
					'-outfile_dir=s',
		  		);
		  

my %options = ();

unless ( GetOptions( \%options, @getopt_args ) ) {
  usage();
}

sub usage {
  exec "pod2text $0";
  exit( 1 );
}

usage() if ( !exists $options{'infile_dir'} );
usage() if ( !exists $options{'outfile_dir'} );

my $infile_dir = $options{'infile_dir'};
my $outfile_dir = $options{'outfile_dir'};

############################ VARIABLES ############################

# check which options are present
# be carefull not to make an error in spelling the option name

my ($name,$file,$check,$prog,$key,$fh_out,$tab,$i,$title);

my $list = `ls $infile_dir`;

my @tmp = split("\n",$list);
my %file_list = map { $_ => 1 } @tmp;


my $h_title_list = {}; # list of program titles
my $h_seq_list = {}; # list of sequences or SNPs
my $h_prog_list = {}; # list of programs
my $h_info_list = {}; # seq_prog => results

############################ PROGRAM ############################


# get the information
$check = 0;
foreach $file (sort keys %file_list){
			
	if($file =~ /results/ and $check == 0){
		
		# get the annotation results
		($h_info_list,$h_seq_list,$h_prog_list) = getAnnotInformation($file,$infile_dir,$h_seq_list,$h_info_list,$h_prog_list); 
		
		$check = 1;
	}
	elsif($file =~/title/ and $check == 1){
		
		# get the title
		($h_title_list) = getAnnotTitle($file,$infile_dir,$h_title_list);
		
		$check = 0;
	}
	else{
		
		print "join_annot_results : problem with directory content: $list\n\nNo result or title file for $file\n";
		exit(11);
	}
	
	print ">>$file => $file_list{$file}\n";
}



# print the information
$fh_out = new IO::File ">$outfile_dir/final_annotation_results.txt" or die "cannot create file: $outfile_dir/final_annotation_results.txt : $!\n";

# print title
print $fh_out "Name";

foreach $prog (sort keys %$h_title_list){
	
	$title = $h_title_list->{$prog};
	$title =~ s/\t/\t$prog /g;
	
	print $fh_out ("\t$prog $title");
}
print $fh_out "\n";

# print results
foreach $name (sort keys %$h_seq_list){
	
	print $fh_out("$name");
	
	foreach $prog (sort keys %$h_prog_list){
	
		$key = $name.";".$prog;		
		
		if(defined $h_info_list->{$key}){
		
			print $fh_out("\t$h_info_list->{$key}");
		}
		else{
			
			# get the number of tabs needed to keep the format
			@tmp = ();
			@tmp = split("\t",$h_title_list->{$prog});
			
			$tab = "\t";
			
			for($i=1;$i<=$#tmp;$i++){
				
				$tab = $tab."\t";
			}						
			
			print $fh_out("$tab");
		}	
	}	
	
	print $fh_out "\n";	
}

$fh_out->close();


###################################### SUBROUTINES #########################################

# get the number of 

# Get information for the programm
sub getAnnotTitle($$$){

	my ($titlefile,$infile_dir,$title_list) = @_;
	
	# get the program name
	my ($prog) = $titlefile =~/([^_]+)_title.txt$/;
	
	# Get the title for that program	
	my $fh_in = new IO::File "$infile_dir/$titlefile" or die "cannot open file $infile_dir/$titlefile:  $! \n";
	my $line = <$fh_in>;		
	chomp($line);	
	$fh_in->close();	
	$line =~ s/^[^\t]*\t//;
	
	$title_list->{$prog} = $line; # program => title
	
	print "$prog: $title_list->{$prog}\n";
	
	return $title_list;
	
}


# Get information for the programm
sub getAnnotInformation($$$$$){
	
	my ($infile,$dir,$seq_list,$prog_results,$prog_list) = @_;
	
	my ($line,$path,$name,$key);
		
	# get the program name
	my ($prog) = $infile =~/([^_]+)_results.txt$/;	
	$prog_list->{$prog} = $prog;
			
	# Get the results for that program
	my $fh_in = new IO::File "$dir/$infile" or die "cannot open file $dir/$infile:  $! \n";
	
	while($line =<$fh_in>){
	
		chomp($line);
	
		($name) = $line =~ /^([^\t]*)/;
		
		$line =~ s/^[^\t]*\t//;
		
		$seq_list->{$name} = $name;
		
		$key = $name.";".$prog;
		$prog_results->{$key} = $line;				
		
		print "ICI:*$name*$prog* => *$prog_results->{$key}*\n";
	}
	$fh_in->close();
	
	
	# send the results
	return($prog_results,$seq_list,$prog_list);
}



