———————————————————=head1 LICENSE
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2024] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=cut
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
Questions may also be sent to the Ensembl help desk at
=cut
=head1 NAME
Bio::EnsEMBL::AltAlleleGroup
=head1 SYNOPSIS
use Bio::EnsEMBL::AltAlleleGroup;
use Bio::EnsEMBL::DBSQL::AltAlleleGroupAdaptor;
my $aag_adaptor = Bio::EnsEMBL::Registry->get_DBAdaptor("Human","core","AltAlleleGroup");
# For a known Gene, find the reference alternative allele
my $aag = $aag_adaptor->fetch_Group_by_dbID($gene->dbID);
my $reference_gene = $aag->get_representative_Gene;
# Get a list of AltAlleleGroups
my $list = $aag_adaptor->fetch_all_Groups_by_type('HAS_CODING_POTENTIAL');
$list = $aag_adaptor->fetch_all_Groups();
while ($aag = shift @$list) {
$aag->get_all_Genes;
# Do your important things ...
}
# Creating and editing an AltAlleleGroup
my %type_flags = ('IS_MOST_COMMON_ALLELE' => '1','AUTOMATICALLY_ASSIGNED' => '1');
$aag = Bio::EnsEMBL::AltAlleleGroup->new(
-MEMBERS => [ [$gene_id,\%type_flags ] ],
);
$aag->remove_all_members;
$aag->add_member($gene_id,\%type_flags);
my $dbID = $aag_adaptor->store($aag);
=head1 DESCRIPTION
Alt allele groups keep track of which alleles are tied to a particular Gene
They allow related genes to be located. This class allows fetching of both
IDs and fully fledged Gene objects.
AltAlleleGroup members are assigned types to differentiate them by their
origin. These types are set as flags, allowing you to select the union of
types as well as by individual ones.
No flags set denotes a situation of no information.
Valid flags are as follows:
'IS_REPRESENTATIVE',
'IS_MOST_COMMON_ALLELE',
'IN_CORRECTED_ASSEMBLY',
'HAS_CODING_POTENTIAL',
'IN_ARTIFICIALLY_DUPLICATED_ASSEMBLY',
'IN_SYNTENIC_REGION',
'HAS_SAME_UNDERLYING_DNA_SEQUENCE',
'IN_BROKEN_ASSEMBLY_REGION',
'IS_VALID_ALTERNATE',
'SAME_AS_REPRESENTATIVE',
'SAME_AS_ANOTHER_ALLELE',
'MANUALLY_ASSIGNED',
'AUTOMATICALLY_ASSIGNED'
=cut
package
Bio::EnsEMBL::AltAlleleGroup;
use
strict;
use
warnings;
#use constant {
# IS_REPRESENTATIVE => 1,
# IS_MOST_COMMON_ALLELE => 2,
# IN_CORRECTED_ASSEMBLY => 3,
# HAS_CODING_POTENTIAL => 4,
# IN_ARTIFICIALLY_DUPLICATED_ASSEMBLY => 5,
# IN_SYNTENIC_REGION => 6,
# HAS_SAME_UNDERLYING_DNA_SEQUENCE => 7,
# IN_BROKEN_ASSEMBLY_REGION => 8,
# IS_VALID_ALTERNATE => 9,
# SAME_AS_REPRESENTATIVE => 10,
# SAME_AS_ANOTHER_ALLELE => 11,
# MANUALLY_ASSIGNED => 12,
# AUTOMATICALLY_ASSIGNED => 13,
#};
=head2 new
Arg [-MEMBERS]: A list reference of [gene_id,type_flags]
: gene_id is a dbID for Gene (consistent only within one release)
: type_flags is a hash ref of attributes for this member
Example : $aag = Bio::EnsEMBL::AltAlleleGroup->new(
-MEMBERS => [ [1,{$type} ], [2,{$other_type}],[3,{$type}],
);
Description: Creates a new alt-allele group object
Returntype : Bio::EnsEMBL::AltAlleleGroup
Exceptions : none
Caller : general
=cut
sub
new {
my
$caller
=
shift
;
my
$class
=
ref
(
$caller
) ||
$caller
;
my
$self
=
$class
->SUPER::new(
@_
);
my
(
$list
) = rearrange( [
'MEMBERS'
],
@_
);
$self
->{
'MEMBERS'
} =
$list
|| [];
return
$self
;
}
=head2 add_member
Arg [1] : Gene dbID
Arg [2] : Type List, used for assigning type flags of this member, see Description above
Description : Adds a record of one new member to the AltAlleleGroup. Once a
change is made, this must be persisted to the database with
AltAlleleGroupAdaptor->store or ->update
Example : $aag->add_member(1040032,$types_hash);
$aaga->update($aag); # updating the whole group is necessary.
=cut
sub
add_member {
my
(
$self
,
$gene_id
,
$type_hash
) =
@_
;
if
(!
$self
->contains_member(
$gene_id
)) {
push
(@{
$self
->{MEMBERS}}, [
$gene_id
, {}]);
$self
->set_attribs(
$gene_id
,
$type_hash
);
}
return
;
}
=head2 get_all_members_with_type
Arg [1] : String The type to search members by
Description : Loops through the internal members array returning all
attributes of the same type as what has been specified
Example : my $members = $aag->get_all_members_with_type('IS_VALID_ALTERNATE');
=cut
sub
get_all_members_with_type {
my
$self
=
shift
;
my
$type
=
shift
;
my
@filtered_members
;
my
$members
=
$self
->{
'MEMBERS'
};
foreach
my
$member
(
@$members
) {
if
(
exists
(
$member
->[1]->{
$type
})) {
push
@filtered_members
,
$member
;
}
}
return
\
@filtered_members
;
}
=head2 attribs
Arg [1] : Int gene id to record attributes against
Description : Returns all known attributes of the given gene id. Attributes
are returned as a HashRef but is a copy of the interally
held attribute list
Returntype : HashRef copy of all the given id's attributes
Example : $aag->attribs(10, 'IS_VALID_ALTERNATE');
$aag->attribs(10, [ 'IS_VALID_ALTERNATE' ]);
$aag->attribs(10, {IS_VALID_ALTERNATE => 1});
=cut
sub
attribs {
my
(
$self
,
$gene_id
) =
@_
;
assert_integer(
$gene_id
,
'gene_id'
);
foreach
my
$member
(@{
$self
->{MEMBERS}}) {
if
(
$member
->[0] ==
$gene_id
) {
my
$attribs
=
$member
->[1];
return
{ %{
$attribs
} };
# make a copy. never leak
}
}
return
{};
}
=head2 set_attribs
Arg [1] : Int gene id to set attributes against
Arg [2] : ArrayRef/HashRef/Scalar The attribute you wish to record
Description : Adds the given type to the specified gene id in this group. You
can specify the type using an ArrayRef, HashRef or a single scalar
Example : $aag->attribs(10, 'IS_VALID_ALTERNATE');
$aag->attribs(10, [ 'IS_VALID_ALTERNATE' ]);
$aag->attribs(10, {IS_VALID_ALTERNATE => 1});
=cut
sub
set_attribs {
my
(
$self
,
$gene_id
,
$attribs
) =
@_
;
assert_integer(
$gene_id
,
'gene_id'
);
my
$current_attribs
=
$self
->attribs(
$gene_id
);
if
(check_ref(
$attribs
,
'ARRAY'
)) {
#Loop & add
$current_attribs
->{
uc
(
$_
)} = 1
for
@{
$attribs
};
}
elsif
(check_ref(
$attribs
,
'HASH'
)) {
#loop through the keys adding them in
foreach
my
$key
(
keys
%{
$attribs
}) {
$current_attribs
->{
uc
(
$key
)} = 1;
}
}
#Simple scalar value so just add it in
else
{
$current_attribs
->{
uc
(
$attribs
)} = 1;
}
foreach
my
$member
(@{
$self
->{MEMBERS}}) {
if
(
$member
->[0] ==
$gene_id
) {
$member
->[1] =
$current_attribs
;
}
}
return
;
}
=head2 remove_attribs
Arg [1] : Int gene id to retrieve attributes against
Arg [2] : ArrayRef/HashRef/Scalar The attribute you wish to remove
Description : Removes the given type from this group against the specified
gene identifier
Example : $aag->remove_attribs(10, 'IS_VALID_ALTERNATE');
$aag->remove_attribs(10, [ 'IS_VALID_ALTERNATE' ]);
$aag->remove_attribs(10, {IS_VALID_ALTERNATE => 1});
=cut
sub
remove_attribs {
my
(
$self
,
$gene_id
,
$attribs
) =
@_
;
assert_integer(
$gene_id
,
'gene_id'
);
my
@to_remove
;
if
(check_ref(
$attribs
,
'ARRAY'
)) {
@to_remove
=
map
{
uc
(
$_
) } @{
$attribs
};
}
elsif
(check_ref(
$attribs
,
'HASH'
)) {
@to_remove
=
map
{
uc
(
$_
) }
keys
%{
$attribs
};
}
#Simple scalar value so just add it in
else
{
@to_remove
=
uc
(
$attribs
);
}
foreach
my
$member
(@{
$self
->{MEMBERS}}) {
if
(
$member
->[0] ==
$gene_id
) {
my
$current_attribs
=
$member
->[1];
delete
$current_attribs
->{
$_
}
for
@to_remove
;
}
}
return
;
}
=head2 remove_member
Arg [1] : Int gene id to retrieve attributes against
Arg [2] : ArrayRef/HashRef/Scalar The attribute you wish to remove
Description : Removes the given member from this group. Any changes
must be persisted back to the database via update() or
store() methods in Bio::EnsEMBL::DBSQL::AltAlleleGroupAdaptor.
Example : $aag->remove_member(10);
=cut
sub
remove_member {
my
(
$self
,
$gene_id
) =
@_
;
assert_integer(
$gene_id
,
'gene_id'
);
my
$members
=
$self
->{MEMBERS};
my
$size
=
scalar
(@{
$members
});
for
(
my
$i
= 0;
$i
<
$size
;
$i
++) {
my
$current_id
=
$members
->[
$i
]->[0];
#If this was the ID then splice it out of the array and exit
if
(
$current_id
==
$gene_id
) {
splice
(@{
$members
},
$i
, 1);
last
;
}
}
return
;
}
=head2 contains_member
Arg [1] : Int gene id to retrieve attributes against
Description : Searches through the members list looking for the
specified gene id. Returns true if it was found
or false if not.
Returntype : Boolean indicating if the given gene id is held in this group
Example : $aag->contains_member(10);
=cut
sub
contains_member {
my
(
$self
,
$gene_id
) =
@_
;
assert_integer(
$gene_id
,
'gene_id'
);
foreach
my
$member
(@{
$self
->{MEMBERS}}) {
if
(
$member
->[0] ==
$gene_id
) {
return
1;
}
}
return
0;
}
=head2 remove_all_members
Description : Remove members from this object, but NOT the database. See
AltAlleleGroupAdaptor->remove() to remove the group from the
database
=cut
sub
remove_all_members {
my
$self
=
shift
;
$self
->{
'MEMBERS'
} = [];
return
;
}
=head2 rep_Gene_id
Arg[1] : Optional - set a new representative Gene id for the group
Description : Reports or sets the representative Gene for this AltAlleleGroup
If you wish to remove the representative status of all genes without
setting a new one, see unset_rep_Gene_id
Returntype : Integer or undef if none set
=cut
sub
rep_Gene_id {
my
$self
=
shift
;
my
$new_id
=
shift
;
my
$list
=
$self
->{
'MEMBERS'
};
my
$change
;
foreach
my
$allele
(
@$list
) {
my
(
$gene_id
,
$type
) =
@$allele
;
if
(
exists
(
$type
->{IS_REPRESENTATIVE}) && !
defined
(
$new_id
) ) {
return
$gene_id
;
}
if
(
$new_id
) {
unless
(
$gene_id
==
$new_id
) {
delete
(
$allele
->[1]->{IS_REPRESENTATIVE})}
else
{
$allele
->[1]->{IS_REPRESENTATIVE} = 1;
$change
=
$new_id
;
}
}
}
if
(
$change
) {
$self
->{
'MEMBERS'
} =
$list
;
return
$new_id
;
}
elsif
(
$new_id
&& !
$change
) {
my
$db_id
=
$self
->dbID() ||
'unknown'
;
throw(
"Requested representative gene ID was not set because it is not in this AltAlleleGroup, ID $db_id"
);
}
else
{
warning(
"No representative allele currently set for this AltAlleleGroup"
);
return
;
}
}
=head2 unset_rep_Gene_id
Description : Removes the representative Gene flag from this AltAlleleGroup.
This action is not possible through rep_Gene_id due to
validation of inputs.
=cut
sub
unset_rep_Gene_id {
my
$self
=
shift
;
my
$list
=
$self
->{
'MEMBERS'
};
foreach
my
$allele
(
@$list
) {
delete
(
$allele
->[1]->{IS_REPRESENTATIVE});
}
$self
->{
'MEMBERS'
} =
$list
;
return
;
}
=head2 get_all_Gene_ids
Arg[1] : Boolean - Do not include representative gene in list of ids.
Arg[2] : ArrayRef - Can contain dbIDs or Gene objects to exclude from the returned list
Description : fetches all the Gene dbIDs within the allele group. It can also
be used to list those ids that are not the representative Gene.
Returntype : ArrayRef of gene dbIDs
=cut
sub
get_all_Gene_ids {
my
$self
=
shift
;
my
$all_but_rep
=
shift
;
my
$excluded_genes
=
shift
;
my
$list
=
$self
->{
'MEMBERS'
};
my
%gene_exclusions
;
if
(
$excluded_genes
) {
assert_ref(
$excluded_genes
,
'ARRAY'
,
'excluded genes'
);
foreach
my
$gene
(@{
$excluded_genes
}) {
my
$gene_id
= (
ref
(
$gene
)) ?
$gene
->dbID() :
$gene
;
$gene_exclusions
{
$gene_id
} =
$gene_id
;
}
}
my
@gene_ids
;
foreach
my
$allele
(
@$list
) {
my
(
$gene_id
,
$type
) =
@$allele
;
if
(
$all_but_rep
&&
$type
->{IS_REPRESENTATIVE}) {
next
;}
if
(
exists
$gene_exclusions
{
$gene_id
}) {
next
; }
push
@gene_ids
,
$gene_id
;
}
return
[
sort
{
$a
<=>
$b
}
@gene_ids
];
}
=head2 get_representative_Gene
Description : Used to fetch a Gene object which has been marked as the
representative Gene for this alt allele group.
Returntype : Bio::EnsEMBL::Gene object which is the representative gene
=cut
sub
get_representative_Gene {
my
$self
=
shift
;
my
$ga
=
$self
->adaptor->db->get_GeneAdaptor;
return
$ga
->fetch_by_dbID(
$self
->rep_Gene_id);
}
=head2 get_all_Genes
Arg[1] : Boolean - Do not include representative gene in list of ids.
Arg[2] : ArrayRef - Can contain dbIDs or Gene objects to exclude from the returned list
Description : Fetches all the Gene objects within the allele group. It can also
be used to list those Genes that are not the representative Gene.
Returntype : ArrayRef of Bio::EnsEMBL::Gene objects
=cut
sub
get_all_Genes {
my
(
$self
,
$all_but_rep
,
$excluded_genes
) =
@_
;
my
$gene_ids
=
$self
->get_all_Gene_ids(
$all_but_rep
,
$excluded_genes
);
return
$self
->adaptor()->db()->get_GeneAdaptor()->fetch_all_by_dbID_list(
$gene_ids
);
}
=head2 get_all_Genes_types
Arg[1] : Boolean - Do not include representative gene in list of ids.
Arg[2] : ArrayRef - Can contain dbIDs or Gene objects to exclude from the returned list
Description : Fetches all the Gene objects within the allele group and their
associcated attributes. It can also be used to list those
Genes that are not the representative Gene.
Returntype : ArrayRef. 2 dimensional holding [Bio::EnsEMBL::Gene, {attribute_hash}]
=cut
sub
get_all_Genes_types {
my
(
$self
,
$all_but_rep
,
$excluded_genes
) =
@_
;
my
$gene_ids
=
$self
->get_all_Gene_ids(
$all_but_rep
,
$excluded_genes
);
my
$ga
=
$self
->adaptor()->db()->get_GeneAdaptor();
my
@output
;
my
$members
=
$self
->{MEMBERS};
foreach
my
$allele
(@{
$members
}) {
my
(
$gene_id
,
$attribs
) =
@$allele
;
if
(
$all_but_rep
&&
$attribs
->{IS_REPRESENTATIVE}) {
next
;}
my
$gene
=
$ga
->fetch_by_dbID(
$gene_id
);
my
%attribs_copy
= %{
$attribs
};
push
(
@output
, [
$gene
, \
%attribs_copy
])
}
return
\
@output
;
}
=head2 size
Description : Returns the current size of this group in members
Returntype : Int the size of the current alt allele group
=cut
sub
size {
my
$self
=
shift
;
my
$list
=
$self
->{
'MEMBERS'
};
return
scalar
(
@$list
);
}
=head2 get_all_members
Description : Retrieves all of the information about all members. Be aware
that this emits the interal data structure so direct modification
should be done with caution.
Returntype : ArrayRef of id and type list: [gene_id,type]
Caller : AltAlleleGroupAdaptor->store
=cut
sub
get_all_members {
my
$self
=
shift
;
my
$members
=
$self
->{
'MEMBERS'
};
return
$members
;
}
1;