=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2024] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut
=head1 CONTACT
Please email comments or questions to the public Ensembl
Questions may also be sent to the Ensembl help desk at
=cut
=head1 NAME
Bio::EnsEMBL::ProteinFeature
=head1 SYNOPSIS
my $feature = Bio::EnsEMBL::ProteinFeature->new(
-start => $start,
-end => $end,
-hstart => $hit_start,
-hend => $hit_end,
-hseqname => $hit_name
);
=head1 DESCRIPTION
ProteinFeature objects represent domains or other features of interest
on a peptide sequence.
=head1 METHODS
=cut
$Bio::EnsEMBL::ProteinFeature::VERSION = '113.0.0';
use strict;
=head2 new
Arg [IDESC] : (optional) string An interpro description
Arg [INTERPRO_AC] : (optional) string An interpro accession
Arg [TRANSLATION_ID] : (optional) integer A translation dbID
Arg [...] : named arguments to FeaturePair superclass
Example :
$pf =
Bio::EnsEMBL::ProteinFeature->new( -IDESC => $idesc,
-INTERPRO_AC => $iac,
@fp_args );
Description: Instantiates a Bio::EnsEMBL::ProteinFeature
Returntype : Bio::EnsEMBL::FeaturePair
Exceptions : none
Caller : general
Status : Stable
=cut
sub new {
my ($proto, @args) = @_;
my $class = ref($proto) || $proto;
my $self;
my ($idesc, $ilabel, $interpro_ac, $translation_id, $external_data, $hit_description, $cigar_string, $align_type, $slice) = rearrange(['IDESC', 'ILABEL', 'INTERPRO_AC', 'TRANSLATION_ID', 'EXTERNAL_DATA', 'HDESCRIPTION', 'CIGAR_STRING', 'ALIGN_TYPE', 'SLICE'], @args);
# BaseAlignFeature expects cigar_line or features
if($cigar_string && $align_type){
$self = $class->SUPER::new(@args);
}else{
#call the grand parent directly
$self = $class->Bio::EnsEMBL::FeaturePair::new(@args);
}
# the strand of protein features is always 0
$self->{'strand'} = 0;
$self->{'idesc'} = $idesc || '';
$self->{'ilabel'} = $ilabel || '';
$self->{'interpro_ac'} = $interpro_ac || '';
$self->{'translation_id'} = $translation_id || '';
$self->{'external_data'} = $external_data || '';
$self->{'hit_description'} = $hit_description || '';
$self->{'cigar_string'} = $cigar_string || '';
$self->{'align_type'} = $align_type;
return $self;
}
=head2 strand
Arg [1] : Ignored
Description: Overwrites Bio::EnsEMBL::Feature->strand to not allow
: the strand to be set.
Returntype : int
Status : Stable
=cut
#do not allow the strand to be set
sub strand {
my $self = shift;
return $self->{'strand'};
}
=head2 idesc
Arg [1] : (optional) string The interpro description
Example : print $protein_feature->idesc();
Description: Getter/Setter for the interpro description of this protein
feature.
Returntype : string
Exceptions : none
Caller : general
Status : Stable
=cut
sub idesc {
my $self = shift;
$self->{'idesc'} = shift if (@_);
return $self->{'idesc'};
}
=head2 ilabel
Arg [1] : (optional) string The interpro label
Example : print $protein_feature->ilabel();
Description: Getter/Setter for the interpro label of this protein
feature.
Returntype : string
Exceptions : none
Caller : general
Status : Stable
=cut
sub ilabel {
my $self = shift;
$self->{'ilabel'} = shift if (@_);
return $self->{'ilabel'};
}
=head2 interpro_ac
Arg [1] : (optional) string The interpro accession
Example : print $protein_feature->interpro_ac();
Description: Getter/Setter for the interpro accession of this protein
feature.
Returntype : string
Exceptions : none
Caller : general
Status : Stable
=cut
sub interpro_ac {
my $self = shift;
$self->{'interpro_ac'} = shift if (@_);
return $self->{'interpro_ac'};
}
=head2 translation_id
Arg [1] : (optional) integer The dbID of the translation
Example : print $protein_feature->translation_id();
Description: Getter/Setter for the translation dbID of this protein
feature.
Returntype : string
Exceptions : none
Caller : general
Status : Stable
=cut
sub translation_id {
my $self = shift;
$self->{'translation_id'} = shift if (@_);
return $self->{'translation_id'};
}
sub external_data {
my $self = shift;
$self->{'external_data'} = shift if (@_);
return $self->{'external_data'};
}
=head2 summary_as_hash
Example : $protein_feature_summary = $protein_feature->summary_as_hash();
Description : Retrieves a textual summary of this Protein feature.
Not inherited from Feature.
Returns : hashref of arrays of descriptive strings
Status : Intended for internal use
=cut
sub summary_as_hash {
my $self = shift;
my %summary;
$summary{'type'} = $self->analysis->db;
$summary{'id'} = $self->display_id;
$summary{'start'} = $self->start;
$summary{'end'} = $self->end;
$summary{'interpro'} = $self->interpro_ac;
$summary{'description'} = $self->idesc;
$summary{'hit_start'} = $self->hstart;
$summary{'hit_end'} = $self->hend;
$summary{'cigar_string'} = $self->cigar_string;
$summary{'align_type'} = $self->align_type;
$summary{'hseqname'} = $self->hseqname;
$summary{'translation_id'} = $self->translation_id;
return \%summary;
}
=head2 alignment_strings
Arg [1] : list of string $flags
Example : $pf->alignment_strings
Description: Allows to rebuild the alignment string of both the query and target sequence
using the sequence from translation object and
MD Z String for mismatching positions. Regex : [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)* (Refer: SAM/BAM specification)
eg: MD:Z:96^RHKTDSFVGLMGKRALNS0V14
Returntype : array reference containing 2 strings
the first corresponds to seq
the second corresponds to hseq
Exceptions : none
Caller : general
Status : Stable
=cut
sub alignment_strings {
my $self = shift;
#Translations
my $transl_adaptor = $self->adaptor->db->get_TranslationAdaptor();
my $transl_object = $transl_adaptor->fetch_by_dbID($self->translation_id);
my $seq;
if(defined $transl_object && $transl_object->isa('Bio::EnsEMBL::Translation')) {
$seq = $transl_object->transcript()->translate()->seq();
}
if ($self->align_type eq 'mdtag') {
if(defined $seq && defined $self->cigar_string){
return $self->_mdz_alignment_string($seq,$self->cigar_string);
}else{
warn "sequence or cigar_line not found for " . $self->translation_id;
}
} else {
throw("alignment_strings method not implemented for " . $self->align_type);
}
return;
}
sub transform {
my $self = shift;
$self->throw( "ProteinFeature cant be transformed directly as".
" they are not on EnsEMBL coord system" );
return;
}
=head2 _hit_unit
Arg [1] : none
Description: PRIVATE implementation of abstract superclass method. Returns
1 as the 'unit' used for the hit sequence.
Returntype : int
Exceptions : none
Caller : Bio::EnsEMBL::BaseAlignFeature
Status : Stable
=cut
sub _hit_unit {
return 3;
}
=head2 _query_unit
Arg [1] : none
Description: PRIVATE implementation of abstract superclass method. Returns
3 as the 'unit' used for the query sequence.
Returntype : int
Exceptions : none
Caller : Bio::EnsEMBL::BaseAlignFeature
Status : Stable
=cut
sub _query_unit {
return 3;
}
1;