#!/usr/bin/perl -w =pod =head1 NAME Compara Module =head1 SYNOPSIS use Compara; =head1 OPTIONS =head1 DESCRIPTION This module calculate conservation scores from Ensembl Compara and gravity mutation scores from BLOSUM62 matrix. =head1 VERSION Version 1 =head1 DATE 20 janvier 2012 =head1 AUTHORS Rachel Legendre (rlegendre@jouy.inra.fr) =cut package conservation; #~ #~ PERL5LIB=${PERL5LIB}:/usr/local/bioinfo/src/ensembl-api/bioperl-live #~ PERL5LIB=${PERL5LIB}:/usr/local/bioinfo/src/ensembl-api/current/ensembl/modules #~ PERL5LIB=${PERL5LIB}:/usr/local/bioinfo/src/ensembl-api/current/ensembl-compara/modules #~ export PERL5LIB use base 'Exporter'; our @EXPORT = qw/getComparaScore/; use strict; use Getopt::Long; use Pod::Usage; use strict; use IO::File; use Bio::EnsEMBL::Registry; #~ use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor; #~ use Bio::EnsEMBL::Compara::GenomicAlignBlock; #~ use Bio::EnsEMBL::Compara::ConservationScore; use Bio::Matrix::IO; sub getComparaScore($$$$$){ my ($inputfile,$species,$outpath,$registry_file,$compara_user) = @_; my ($registry,$seq_region_start,$seq_region_end,$feature,$score,$file_out_path,$name,$iterator,$fh_snp,$fh_out,$indic,$prog,@feat); $prog = "conservation"; #results directory $file_out_path = $outpath."/".$prog."_results.res"; #database connect $registry = 'Bio::EnsEMBL::Registry'; $registry->load_all($registry_file); #~ $registry->load_registry_from_db( #~ -host => 'ensembldb.ensembl.org', #~ -user => 'anonymous' #~ ); ## => GenomicAlignBlocks Objects # Getting the GenomicAlignBlock adaptor: my $genomic_align_block_adaptor = $registry->get_adaptor($compara_user, 'compara', 'GenomicAlignBlock'); # Getting the GenomeDB adaptor: my $genome_db_adaptor = $registry->get_adaptor($compara_user, 'compara', 'GenomeDB'); # Getting the MethodLinkSpeciesSet adaptor: my $method_link_species_set_adaptor = $registry->get_adaptor($compara_user, 'compara', 'MethodLinkSpeciesSet'); # Fetching the MethodLinkSpeciesSet object corresponding to GERP_CONSERVATION_SCORE for mammals my $mlss = $method_link_species_set_adaptor->fetch_by_method_link_type_species_set_name("GERP_CONSERVATION_SCORE", "mammals"); throw("Unable to find method_link_species_set") if (!defined($mlss)); # Getting the slice adaptor my $slice_adaptor = $registry->get_adaptor( $species, 'core', 'Slice'); # Getting the Conservation Score adaptor my $conservation_score_adaptor = $registry->get_adaptor($compara_user, "compara", "ConservationScore"); $fh_snp = new IO::File $inputfile, "r" || die "Can not open $inputfile : $!"; $fh_out = new IO::File $file_out_path, "w" || die "Can not open $file_out_path : $!"; my $fh_title = new IO::File ">$outpath/$prog\_title.txt" || die "Can not open $outpath/$prog\_title.txt : $! \n"; print $fh_title ("Sequence_Name\tGrantham_score\tObserved_score\tExpected_score\tDifference_score\n"); $fh_title->close(); while (<$fh_snp>){ #~ next if ( $_ ~ /^#/); my @line = split("\t", $_); #~ undef $indic; #~ if((defined($$feature{id})) && ($$feature{id} eq $line[0])){#If previous name already exist (severals transcripts exists) #~ $iterator++;#Increment $iterator #~ } #~ else{ #~ $iterator = 1; #~ } #~ print "line[0]:*$line[0]*\n"; my ($prefix) = $line[0] =~ /([^_]+_[^_]+_[^\/]+)/; #~ print "PREFIX: $prefix\n"; $$feature{id} = $prefix."/".$line[2]; #on vérifie que c'est bien un SNP et non un indel next if ( $$feature{id} !~ /\w+_\d+_[ATCG]{1}\/[ATCG]{1}/); @feat = split(":", $line[1]); $$feature{chrom} = $feat[0]; $$feature{pos} = $feat[1]; $$feature{mutation} = $line[2]; $$feature{transcript} = $line[4]; $$feature{amino} = $line[10]; #~ # Getting ref and alt amino-acides my @aa = split ("/", $$feature{amino}); my $ref_aa = $aa[0]; my $mut_aa = $aa[1]; $$feature{position} = $line[9]; $$feature{rsID} = $line[12]; $name = $$feature{id}."_".$$feature{rsID}."_".$$feature{transcript}."[".$$feature{amino}."-".$$feature{position}."]"; #~ $name = $$feature{id}."_".$$feature{rsID}."_".$$feature{transcript}."_transcript".$iterator."[".$$feature{amino}."-".$$feature{position}."]"; print $fh_out "$name\t"; #~ print "$name\t"; $score = get_gravity_score ($ref_aa, $mut_aa); print $fh_out "$score\t"; #~ print "$score\t"; # Fetching boundaries for this transcript my $transcript_adaptor = $registry->get_adaptor( $species, 'Core', 'Transcript' ); my $transcript = $transcript_adaptor->fetch_by_stable_id($$feature{transcript}); $seq_region_start = $$feature{pos}-10; $seq_region_end = $$feature{pos}+10; #~ print $$feature{chrom}."\t".$seq_region_start."\t".$seq_region_end."\n"; # Fetching a Slice object: my $slice = $slice_adaptor->fetch_by_region('toplevel', $$feature{chrom}, $seq_region_start,$seq_region_end); throw("No Slice can be created with coordinates $$feature{chrom}:$seq_region_start-$seq_region_end") if (!$slice); #~ my $seq = $slice->seq(); #~ print "SLICE_SEQ:$seq\n"; #~ my $transcripts = $slice->get_all_Transcripts; #~ foreach $score (@$transcripts) { #~ my $a = $score->stable_id; #~ #~ print "TX:$$feature{transcript} / $a\n"; #~ } my $conservation_scores = $conservation_score_adaptor->fetch_all_by_MethodLinkSpeciesSet_Slice($mlss, $slice, $slice->end-$slice->start+1); $indic = 0; foreach $score (@$conservation_scores) { #~ print "SCORE_POSITION: $score->position\n"; if($score->position == "11"){ printf $fh_out ("%.4f\t%.4f\t%.4f\n", $score->observed_score, $score->expected_score, $score->diff_score) ; #~ printf ("%.4f\t%.4f\t%.4f\n", $score->observed_score, $score->expected_score, $score->diff_score) if $score->position =="11"; $indic = 1; } } if ($indic == 0){ print $fh_out "\t\t\n"; #~ print "*TEST\t\t*\n"; } #~ print "INDIC>>$indic<<\n"; } $fh_snp->close(); $fh_out->close(); } # based on Grantham score matrix "Science-1974-Grantham-862-4.pdf" sub get_gravity_score ($$){ my $ref_aa = shift; my $mut_aa = shift; my @matrix = ( [0,110,145,114,58,99,124,56,142,155,144,112,89,68,46,121,65,80,135,177], [110,0,102,103,71,112,96,125,77,97,77,180,29,43,86,26,96,54,91,101], [145,102,0,98,92,96,82,138,5,22,36,198,99,113,153,107,172,138,15,61], [114,103,98,0,38,27,68,42,95,114,110,169,77,76,91,103,108,93,87,147], [58,71,92,38,0,58,69,59,89,103,92,149,47,42,65,78,85,65,81,128], [99,112,96,27,58,0,64,60,94,113,112,195,86,91,111,106,126,107,84,148], [124,96,82,68,69,64,0,109,29,50,55,192,84,96,133,97,152,121,21,88], [56,125,138,42,59,60,109,0,135,153,147,159,98,87,80,127,94,98,127,184], [142,77,5,95,89,94,29,135,0,21,33,198,94,109,149,102,168,134,10,61], [155,97,22,114,103,113,50,153,21,0,22,205,100,116,158,102,177,140,28,40], [144,77,36,110,92,112,55,147,33,22,0,194,83,99,143,85,160,122,36,37], [112,180,198,169,149,195,192,159,198,205,194,0,174,154,139,202,154,170,196,215], [89,29,99,77,47,86,84,98,94,100,83,174,0,24,68,32,80,40,87,115], [68,43,113,76,42,91,96,87,109,116,99,154,24,0,46,53,61,29,120,130], [46,86,153,91,65,111,133,80,149,158,143,139,68,46,0,94,23,42,142,174], [121,26,107,103,78,106,97,127,102,102,85,202,32,53,94,0,101,56,95,110], [65,96,172,108,85,126,152,94,168,177,160,154,80,61,23,101,0,45,160,180], [80,54,138,93,65,107,121,98,134,140,122,170,40,29,42,56,45,0,126,152], [135,91,15,87,81,84,21,127,10,28,36,196,87,120,142,95,160,126,0,67], [177,101,61,147,128,148,88,184,61,40,37,215,115,130,174,110,180,152,67,0] ); my %header = ("S" => 0, "R" =>1, "L"=>2, "P"=>3, "T"=>4, "A"=>5, "V"=>6, "G"=>7, "I"=>8, "F"=>9, "Y"=>10, "C"=>11, "H"=>12, "Q"=>13, "N"=>14, "K"=>15, "D"=>16, "E"=>17, "M"=>18, "W"=>19); return $matrix[$header{$ref_aa}][$header{$mut_aa}]; } 1;