Hide Show 18 lines of Pod
Hide Show 49 lines of Pod
$Bio::EnsEMBL::DBSQL::GeneAdaptor::VERSION
=
'113.0.0'
;
@ISA
=
qw(Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor)
;
sub
_tables {
return
([
'gene'
,
'g'
], [
'xref'
,
'x'
], [
'external_db'
,
'exdb'
]);
}
sub
_columns {
my
(
$self
) =
@_
;
my
$created_date
=
$self
->db()->dbc()->from_date_to_seconds(
"g.created_date"
);
my
$modified_date
=
$self
->db()->dbc()->from_date_to_seconds(
"g.modified_date"
);
return
(
'g.gene_id'
,
'g.seq_region_id'
,
'g.seq_region_start'
,
'g.seq_region_end'
,
'g.seq_region_strand'
,
'g.analysis_id'
,
'g.biotype'
,
'g.display_xref_id'
,
'g.description'
,
'g.source'
,
'g.is_current'
,
'g.canonical_transcript_id'
,
'g.stable_id'
,
'g.version'
,
$created_date
,
$modified_date
,
'x.display_label'
,
'x.dbprimary_acc'
,
'x.description'
,
'x.version'
,
'exdb.db_name'
,
'exdb.status'
,
'exdb.db_release'
,
'exdb.db_display_name'
,
'x.info_type'
,
'x.info_text'
);
}
sub
_left_join {
return
([
'xref'
,
"x.xref_id = g.display_xref_id"
], [
'external_db'
,
"exdb.external_db_id = x.external_db_id"
]);
}
Hide Show 12 lines of Pod
sub
list_dbIDs {
my
(
$self
,
$ordered
) =
@_
;
return
$self
->_list_dbIDs(
"gene"
,
undef
,
$ordered
);
}
Hide Show 11 lines of Pod
sub
list_stable_ids {
my
(
$self
) =
@_
;
return
$self
->_list_dbIDs(
"gene"
,
"stable_id"
);
}
sub
list_seq_region_ids {
my
$self
=
shift
;
return
$self
->_list_seq_region_ids(
'gene'
);
}
Hide Show 15 lines of Pod
sub
fetch_by_display_label {
my
$self
=
shift
;
my
$label
=
shift
;
my
$constraint
=
"x.display_label = ? AND g.is_current = 1"
;
$self
->bind_param_generic_fetch(
$label
, SQL_VARCHAR);
my
@genes
= @{
$self
->generic_fetch(
$constraint
)};
my
$gene
;
if
(
scalar
(
@genes
) > 1) {
foreach
my
$gene_tmp
(
@genes
) {
if
(
$gene_tmp
->slice->is_reference) {
$gene
=
$gene_tmp
;
}
last
if
(
$gene
);
}
if
(!
$gene
) {
$gene
=
$genes
[0];
}
}
elsif
(
scalar
(
@genes
) == 1) {
$gene
=
$genes
[0];
}
return
$gene
;
}
Hide Show 13 lines of Pod
sub
fetch_all_by_display_label {
my
$self
=
shift
;
my
$label
=
shift
;
my
$constraint
=
"x.display_label = ? AND g.is_current = 1"
;
$self
->bind_param_generic_fetch(
$label
, SQL_VARCHAR);
my
$genes
=
$self
->generic_fetch(
$constraint
);
return
$genes
;
}
Hide Show 18 lines of Pod
sub
fetch_by_stable_id {
my
(
$self
,
$stable_id
) =
@_
;
my
$constraint
=
"g.stable_id = ? AND g.is_current = 1"
;
$self
->bind_param_generic_fetch(
$stable_id
, SQL_VARCHAR);
my
(
$gene
) = @{
$self
->generic_fetch(
$constraint
)};
if
(!
defined
(
$gene
) && (
my
$vindex
=
rindex
(
$stable_id
,
'.'
))) {
$gene
=
$self
->fetch_by_stable_id_version(
substr
(
$stable_id
,0,
$vindex
),
substr
(
$stable_id
,
$vindex
+1));
}
return
$gene
;
}
Hide Show 20 lines of Pod
sub
fetch_by_stable_id_version {
my
(
$self
,
$stable_id
,
$version
) =
@_
;
return
unless
(
$version
=~ /^\d+$/);
my
$constraint
=
"g.stable_id = ? AND g.version = ? AND g.is_current = 1"
;
$self
->bind_param_generic_fetch(
$stable_id
, SQL_VARCHAR);
$self
->bind_param_generic_fetch(
$version
, SQL_INTEGER);
my
(
$gene
) = @{
$self
->generic_fetch(
$constraint
)};
return
$gene
;
}
Hide Show 21 lines of Pod
sub
fetch_all_by_source {
my
(
$self
,
$source
) =
@_
;
my
@genes
= @{
$self
->generic_fetch(
$self
->source_constraint(
$source
))};
return
\
@genes
;
}
Hide Show 14 lines of Pod
sub
source_constraint {
my
(
$self
,
$sources
,
$inline_variables
) =
@_
;
my
$constraint
=
"g.is_current = 1"
;
my
$in_statement
=
$self
->generate_in_constraint(
$sources
,
'g.source'
, SQL_VARCHAR,
$inline_variables
);
$constraint
.=
" and $in_statement"
;
return
$constraint
;
}
Hide Show 15 lines of Pod
sub
count_all_by_source {
my
(
$self
,
$source
) =
@_
;
return
$self
->generic_count(
$self
->source_constraint(
$source
));
}
Hide Show 21 lines of Pod
sub
fetch_all_by_biotype {
my
(
$self
,
$biotype
) =
@_
;
my
@genes
= @{
$self
->generic_fetch(
$self
->biotype_constraint(
$biotype
))};
return
\
@genes
;
}
Hide Show 14 lines of Pod
sub
biotype_constraint {
my
(
$self
,
$biotypes
,
$inline_variables
) =
@_
;
my
$constraint
=
"g.is_current = 1"
;
my
$in_statement
=
$self
->generate_in_constraint(
$biotypes
,
'g.biotype'
, SQL_VARCHAR,
$inline_variables
);
$constraint
.=
" and $in_statement"
;
return
$constraint
;
}
Hide Show 15 lines of Pod
sub
count_all_by_biotype {
my
(
$self
,
$biotype
) =
@_
;
return
$self
->generic_count(
$self
->biotype_constraint(
$biotype
));
}
sub
fetch_all {
my
(
$self
) =
@_
;
my
$constraint
=
'g.biotype != "LRG_gene" and g.is_current = 1'
;
my
@genes
= @{
$self
->generic_fetch(
$constraint
)};
return
\
@genes
;
}
Hide Show 15 lines of Pod
sub
fetch_all_versions_by_stable_id {
my
(
$self
,
$stable_id
) =
@_
;
my
$constraint
=
"g.stable_id = ?"
;
$self
->bind_param_generic_fetch(
$stable_id
, SQL_VARCHAR);
return
$self
->generic_fetch(
$constraint
);
}
Hide Show 18 lines of Pod
sub
fetch_by_exon_stable_id {
my
(
$self
,
$stable_id
,
$version
) =
@_
;
my
$sql
=
qq(
SELECT t.gene_id
FROM transcript as t,
exon_transcript as et,
exon as e
WHERE t.transcript_id = et.transcript_id
AND et.exon_id = e.exon_id
AND e.stable_id = ?
AND e.is_current = 1
)
;
my
$sth
=
$self
->prepare(
$sql
);
$sth
->bind_param(1,
$stable_id
, SQL_VARCHAR);
$sth
->execute();
my
(
$dbID
) =
$sth
->fetchrow_array();
return
undef
if
(!
defined
(
$dbID
));
my
$gene
=
$self
->fetch_by_dbID(
$dbID
);
return
$gene
;
}
Hide Show 17 lines of Pod
sub
fetch_all_by_domain {
my
(
$self
,
$domain
) =
@_
;
throw(
"domain argument is required"
)
unless
(
$domain
);
my
$sth
=
$self
->prepare(
qq(
SELECT tr.gene_id
FROM interpro i,
protein_feature pf,
transcript tr,
translation tl,
seq_region sr,
coord_system cs
WHERE cs.species_id = ?
AND cs.coord_system_id = sr.coord_system_id
AND sr.seq_region_id = tr.seq_region_id
AND tr.is_current = 1
AND tr.transcript_id = tl.transcript_id
AND tl.translation_id = pf.translation_id
AND pf.hit_name = i.id
AND i.interpro_ac = ?
GROUP BY tr.gene_id)
);
$sth
->bind_param(1,
$self
->species_id(), SQL_VARCHAR);
$sth
->bind_param(2,
$domain
, SQL_VARCHAR);
$sth
->execute();
my
@array
= @{
$sth
->fetchall_arrayref()};
$sth
->finish();
my
@gene_ids
=
map
{
$_
->[0] }
@array
;
return
$self
->fetch_all_by_dbID_list(\
@gene_ids
);
}
Hide Show 27 lines of Pod
sub
fetch_all_by_Slice_and_external_dbname_link {
my
(
$self
,
$slice
,
$logic_name
,
$load_transcripts
,
$db_name
) =
@_
;
my
$dbentry_adaptor
=
$self
->db()->get_DBEntryAdaptor();
my
$external_db_ids
=
$dbentry_adaptor
->get_external_db_ids(
$db_name
,
undef
,
'ignore release'
);
if
(
scalar
(@{
$external_db_ids
}) == 0) {
my
$external_db_names
=
$dbentry_adaptor
->get_distinct_external_dbs();
my
$available
=
join
(
"\n"
,
map
{
"\t${_}"
} @{
$external_db_names
});
warning
sprintf
(
"Could not find external database "
.
"'%s' in the external_db table\n"
.
"Available are:\n%s"
,
$db_name
,
$available
);
return
[];
}
my
%linked_genes
;
foreach
my
$local_external_db_id
(@{
$external_db_ids
}) {
my
@linked_genes
=
$dbentry_adaptor
->list_gene_ids_by_external_db_id(
$local_external_db_id
);
$linked_genes
{
$_
} = 1
for
@linked_genes
;
}
my
$genes
=
$self
->fetch_all_by_Slice(
$slice
,
$logic_name
,
$load_transcripts
);
my
$genes_passed
= [
grep
{
exists
$linked_genes
{
$_
->dbID()} } @{
$genes
} ];
return
$genes_passed
;
}
Hide Show 25 lines of Pod
sub
fetch_all_by_Slice {
my
(
$self
,
$slice
,
$logic_name
,
$load_transcripts
,
$source
,
$biotype
) =
@_
;
my
$constraint
=
'g.is_current = 1'
;
if
(
defined
(
$source
)) {
$constraint
.=
" and g.source = '$source'"
;
}
if
(
defined
(
$biotype
)) {
my
$inline_variables
= 1;
$constraint
.=
" and "
.
$self
->generate_in_constraint(
$biotype
,
'g.biotype'
, SQL_VARCHAR,
$inline_variables
);
}
my
$genes
=
$self
->SUPER::fetch_all_by_Slice_constraint(
$slice
,
$constraint
,
$logic_name
);
if
(!
$load_transcripts
||
@$genes
< 1) {
return
$genes
;
}
if
(
exists
(
$genes
->[0]->{
'_transcript_array'
})) {
return
$genes
;
}
my
(
$min_start
,
$max_end
);
foreach
my
$g
(
@$genes
) {
if
(!
defined
(
$min_start
) ||
$g
->seq_region_start() <
$min_start
) {
$min_start
=
$g
->seq_region_start();
}
if
(!
defined
(
$max_end
) ||
$g
->seq_region_end() >
$max_end
) {
$max_end
=
$g
->seq_region_end();
}
}
my
$ext_slice
;
if
(
$min_start
>=
$slice
->start() &&
$max_end
<=
$slice
->end()) {
$ext_slice
=
$slice
;
}
else
{
my
$sa
=
$self
->db()->get_SliceAdaptor();
$ext_slice
=
$sa
->fetch_by_region(
$slice
->coord_system->name(),
$slice
->seq_region_name(),
$min_start
,
$max_end
,
$slice
->strand(),
$slice
->coord_system->version());
}
my
%g_hash
=
map
{
$_
->
dbID
=>
$_
} @{
$genes
};
my
$g_id_str
=
join
(
','
,
keys
(
%g_hash
));
my
$sth
=
$self
->prepare(
"SELECT gene_id, transcript_id "
.
"FROM transcript "
.
"WHERE gene_id IN ($g_id_str)"
);
$sth
->execute();
my
(
$g_id
,
$tr_id
);
$sth
->bind_columns(\(
$g_id
,
$tr_id
));
my
%tr_g_hash
;
while
(
$sth
->fetch()) {
$tr_g_hash
{
$tr_id
} =
$g_hash
{
$g_id
};
}
my
$ta
=
$self
->db()->get_TranscriptAdaptor();
my
$transcripts
=
$ta
->fetch_all_by_Slice(
$ext_slice
, 1,
undef
,
sprintf
(
"t.transcript_id IN (%s)"
,
join
(
','
,
sort
{
$a
<=>
$b
}
keys
(
%tr_g_hash
))));
foreach
my
$tr
(@{
$transcripts
}) {
if
(!
exists
(
$tr_g_hash
{
$tr
->dbID()})) {
next
}
my
$new_tr
;
if
(
$slice
!=
$ext_slice
) {
$new_tr
=
$tr
->transfer(
$slice
);
if
(!
defined
(
$new_tr
)) {
throw(
"Unexpected. "
.
"Transcript could not be transfered onto Gene slice."
);
}
}
else
{
$new_tr
=
$tr
;
}
$tr_g_hash
{
$tr
->dbID()}->add_Transcript(
$new_tr
);
}
return
$genes
;
}
Hide Show 16 lines of Pod
sub
count_all_by_Slice {
my
(
$self
,
$slice
,
$biotype
,
$source
) =
@_
;
my
$constraint
=
'g.is_current = 1'
;
if
(
defined
(
$source
)) {
$constraint
.=
" and g.source = '$source'"
;
}
if
(
defined
(
$biotype
)) {
$constraint
.=
" and "
.
$self
->biotype_constraint(
$biotype
);
}
return
$self
->count_by_Slice_constraint(
$slice
,
$constraint
);
}
Hide Show 19 lines of Pod
sub
fetch_by_transcript_id {
my
(
$self
,
$trans_id
) =
@_
;
my
$sth
=
$self
->prepare(
qq(
SELECT tr.gene_id
FROM transcript tr
WHERE tr.transcript_id = ?
)
);
$sth
->bind_param(1,
$trans_id
, SQL_INTEGER);
$sth
->execute();
my
(
$geneid
) =
$sth
->fetchrow_array();
$sth
->finish();
return
undef
if
(!
defined
$geneid
);
my
$gene
=
$self
->fetch_by_dbID(
$geneid
);
return
$gene
;
}
Hide Show 15 lines of Pod
sub
fetch_by_transcript_stable_id {
my
(
$self
,
$trans_stable_id
) =
@_
;
my
$sth
=
$self
->prepare(
qq(
SELECT gene_id
FROM transcript
WHERE stable_id = ?
AND is_current = 1
)
);
$sth
->bind_param(1,
$trans_stable_id
, SQL_VARCHAR);
$sth
->execute();
my
(
$geneid
) =
$sth
->fetchrow_array();
$sth
->finish;
return
undef
if
(!
defined
$geneid
);
my
$gene
=
$self
->fetch_by_dbID(
$geneid
);
return
$gene
;
}
Hide Show 14 lines of Pod
sub
fetch_by_translation_stable_id {
my
(
$self
,
$translation_stable_id
) =
@_
;
my
$sth
=
$self
->prepare(
qq(
SELECT tr.gene_id
FROM transcript tr,
translation tl
WHERE tl.stable_id = ?
AND tr.transcript_id = tl.transcript_id
AND tr.is_current = 1
)
);
$sth
->bind_param(1,
$translation_stable_id
, SQL_VARCHAR);
$sth
->execute();
my
(
$geneid
) =
$sth
->fetchrow_array();
$sth
->finish;
if
(!
defined
$geneid
) {
return
undef
;
}
return
$self
->fetch_by_dbID(
$geneid
);
}
Hide Show 28 lines of Pod
sub
fetch_all_by_external_name {
my
(
$self
,
$external_name
,
$external_db_name
,
$override
) =
@_
;
my
$entryAdaptor
=
$self
->db->get_DBEntryAdaptor();
my
@ids
=
$entryAdaptor
->list_gene_ids_by_extids(
$external_name
,
$external_db_name
,
$override
);
my
%genes_by_dbIDs
=
map
{
$_
->dbID(),
$_
} @{
$self
->fetch_all_by_dbID_list(\
@ids
)};
my
@features
=
map
{
$genes_by_dbIDs
{
$_
} }
@ids
;
my
@reference
=
grep
{
$_
->slice()->is_reference() }
@features
;
my
@non_reference
=
grep
{ !
$_
->slice()->is_reference() }
@features
;
return
[
@reference
,
@non_reference
];
}
Hide Show 10 lines of Pod
sub
fetch_all_by_description {
my
(
$self
,
$description
) =
@_
;
my
$constraint
=
"g.description LIKE ?"
;
$self
->bind_param_generic_fetch(
$description
, SQL_VARCHAR);
return
$self
->generic_fetch(
$constraint
);
}
Hide Show 25 lines of Pod
sub
fetch_all_by_GOTerm {
my
(
$self
,
$term
) =
@_
;
assert_ref(
$term
,
'Bio::EnsEMBL::OntologyTerm'
);
if
(
$term
->ontology() ne
'GO'
) {
throw(
'Argument is not a GO term'
);
}
my
$entryAdaptor
=
$self
->db->get_DBEntryAdaptor();
my
%unique_dbIDs
;
foreach
my
$accession
(
map
{
$_
->accession() } (
$term
, @{
$term
->descendants()})) {
my
@ids
=
$entryAdaptor
->list_gene_ids_by_extids(
$accession
,
'GO'
);
foreach
my
$dbID
(
@ids
) {
$unique_dbIDs
{
$dbID
} = 1 }
}
my
@result
= @{
$self
->fetch_all_by_dbID_list([
sort
{
$a
<=>
$b
}
keys
(
%unique_dbIDs
)])};
return
\
@result
;
}
Hide Show 21 lines of Pod
sub
fetch_all_by_ontology_linkage_type {
my
(
$self
,
$db_name
,
$linkage_type
) =
@_
;
$db_name
=
'GO'
if
!
defined
$db_name
;
throw
"No linkage type given"
if
!
defined
$linkage_type
;
my
$dbentry_adaptor
=
$self
->db->get_DBEntryAdaptor();
my
$external_db_ids
=
$dbentry_adaptor
->get_external_db_ids(
$db_name
,
undef
,
'ignore release'
);
if
(
scalar
(@{
$external_db_ids
}) == 0) {
warning
sprintf
(
"Could not find external database '%s' in the external_db table"
,
$db_name
);
return
[];
}
my
%unique_dbIDs
;
foreach
my
$local_external_db_id
(@{
$external_db_ids
}) {
my
@gene_ids
=
$dbentry_adaptor
->list_gene_ids_by_external_db_id(
$local_external_db_id
,
$linkage_type
);
$unique_dbIDs
{
$_
} = 1
for
@gene_ids
;
}
return
$self
->fetch_all_by_dbID_list([
keys
%unique_dbIDs
]);
}
Hide Show 28 lines of Pod
sub
fetch_all_by_GOTerm_accession {
my
(
$self
,
$accession
) =
@_
;
if
(
$accession
!~ /^GO:/) {
throw(
'Argument is not a GO term accession'
);
}
my
$goAdaptor
= Bio::EnsEMBL::Registry->get_adaptor(
'Multi'
,
'Ontology'
,
'OntologyTerm'
);
my
$term
=
$goAdaptor
->fetch_by_accession(
$accession
);
return
$self
->fetch_all_by_GOTerm(
$term
);
}
Hide Show 25 lines of Pod
sub
fetch_all_alt_alleles {
my
$self
=
shift
;
my
$gene
=
shift
;
my
$warn
=
shift
;
if
(!
ref
(
$gene
) || !
$gene
->isa(
'Bio::EnsEMBL::Gene'
)) {
throw(
'Bio::EnsEMBL::Gene argument is required'
);
}
my
$gene_id
=
$gene
->dbID();
if
(!
$gene_id
) {
warning(
'Cannot retrieve alternate alleles for gene without dbID'
);
return
[];
}
my
$aaga
=
$self
->db->get_adaptor(
'AltAlleleGroup'
);
my
$aag
=
$aaga
->fetch_by_gene_id(
$gene
->dbID);
unless
(
$aag
) {
if
(
$warn
) {
warning(
"Supplied gene has no alternative alleles"
);
}
return
[];
}
return
$aag
->get_all_Genes(
undef
, [
$gene
]);
}
Hide Show 11 lines of Pod
sub
is_ref {
my
(
$self
,
$gene_id
) =
@_
;
my
$aag
=
$self
->db->get_adaptor(
'AltAlleleGroup'
)->fetch_by_gene_id(
$gene_id
);
if
(
defined
(
$aag
)) {
if
(
$aag
->rep_Gene_id ==
$gene_id
) {
return
1;
}
else
{
return
0;
}
}
else
{
return
1;
}
throw(
"Unhandled circumstance in GeneAdaptor->is_ref"
);
}
Hide Show 20 lines of Pod
sub
store_alt_alleles {
my
$self
=
shift
;
my
$genes
=
shift
;
warning
"Unsupported. Switch to using AltAlleleGroupAdaptor::store() and AltAlleleGroups"
;
if
(!
ref
(
$genes
) eq
'ARRAY'
) {
throw(
'List reference of Bio::EnsEMBL::Gene argument expected.'
);
}
my
@genes
=
@$genes
;
my
$num_genes
=
scalar
(
@genes
);
if
(
$num_genes
< 2) {
warning(
'At least 2 genes must be provided to construct alternative alleles (gene id: '
.
$genes
[0]->dbID() .
'). Ignoring.'
);
return
;
}
my
$allele_list
;
foreach
my
$gene
(
@$genes
) {
my
$aa_record
= [];
push
@$aa_record
,
$gene
->dbID;
my
%type
= {};
if
(
$gene
->slice->is_reference()) {
$type
{
'IS_REPRESENTATIVE'
} = 1;
}
push
@$aa_record
, \
%type
;
push
@$allele_list
,
$aa_record
;
}
my
$aag
= Bio::EnsEMBL::AltAlleleGroup->new(
-MEMBERS
=>
$allele_list
,
);
if
(
scalar
( @{
$aag
->get_all_members_with_type(
'IS_REPRESENTATIVE'
)} ) != 1) {
warning(
'Inappropriate number of alternative alleles on the reference sequence. Ignoring.'
);
return
;
}
my
$aaga
=
$self
->db->get_adaptor(
'AltAlleleGroup'
);
return
$aaga
->store(
$aag
);
}
Hide Show 20 lines of Pod
sub
store {
my
(
$self
,
$gene
,
$ignore_release
,
$skip_recalculating_coordinates
,
$skip_exon_sf
) =
@_
;
if
(!
ref
$gene
|| !
$gene
->isa(
'Bio::EnsEMBL::Gene'
)) {
throw(
"Must store a gene object, not a $gene"
);
}
if
(!
defined
(
$ignore_release
)) {
$ignore_release
= 1;
}
my
$db
=
$self
->db();
if
(
$gene
->is_stored(
$db
)) {
return
$gene
->dbID();
}
$gene
->recalculate_coordinates();
my
$analysis
=
$gene
->analysis();
throw(
"Genes must have an analysis object."
)
if
(!
defined
(
$analysis
));
my
$analysis_id
;
if
(
$analysis
->is_stored(
$db
)) {
$analysis_id
=
$analysis
->dbID();
}
else
{
$analysis_id
=
$db
->get_AnalysisAdaptor->store(
$analysis
);
}
my
$type
=
$gene
->get_Biotype->name;
my
$is_current
=
$gene
->is_current;
$is_current
= 1
unless
(
defined
(
$is_current
));
my
$original
=
$gene
;
my
$original_transcripts
=
$gene
->get_all_Transcripts();
my
$seq_region_id
;
(
$gene
,
$seq_region_id
) =
$self
->_pre_store(
$gene
);
my
@columns
=
qw(
biotype
analysis_id
seq_region_id
seq_region_start
seq_region_end
seq_region_strand
description
source
is_current
canonical_transcript_id
)
;
my
@canned_columns
;
my
@canned_values
;
if
(
defined
(
$gene
->stable_id)) {
push
@columns
,
'stable_id'
,
'version'
;
my
$created
=
$self
->db->dbc->from_seconds_to_date(
$gene
->created_date());
my
$modified
=
$self
->db->dbc->from_seconds_to_date(
$gene
->modified_date());
if
(
$created
) {
push
@canned_columns
,
'created_date'
;
push
@canned_values
,
$created
;
}
if
(
$modified
) {
push
@canned_columns
,
'modified_date'
;
push
@canned_values
,
$modified
;
}
}
my
$columns
=
join
(
', '
,
@columns
,
@canned_columns
);
my
$values
=
join
(
', '
, (
'?'
) x
@columns
,
@canned_values
);
my
$store_gene_sql
=
qq(
INSERT INTO gene ( $columns )
VALUES (
$values
)
);
my
$sth
=
$self
->prepare(
$store_gene_sql
);
$sth
->bind_param(1,
$type
, SQL_VARCHAR);
$sth
->bind_param(2,
$analysis_id
, SQL_INTEGER);
$sth
->bind_param(3,
$seq_region_id
, SQL_INTEGER);
$sth
->bind_param(4,
$gene
->start(), SQL_INTEGER);
$sth
->bind_param(5,
$gene
->end(), SQL_INTEGER);
$sth
->bind_param(6,
$gene
->strand(), SQL_TINYINT);
$sth
->bind_param(7,
$gene
->description(), SQL_LONGVARCHAR);
$sth
->bind_param(8,
$gene
->source(), SQL_VARCHAR);
$sth
->bind_param(9,
$is_current
, SQL_TINYINT);
$sth
->bind_param(10, 0, SQL_TINYINT);
if
(
defined
(
$gene
->stable_id)) {
$sth
->bind_param(11,
$gene
->stable_id, SQL_VARCHAR);
$sth
->bind_param(12,
$gene
->version, SQL_INTEGER);
}
$sth
->execute();
$sth
->finish();
my
$gene_dbID
=
$self
->last_insert_id(
'gene_id'
,
undef
,
'gene'
);
my
$dbEntryAdaptor
=
$db
->get_DBEntryAdaptor();
foreach
my
$dbe
(@{
$gene
->get_all_DBEntries}) {
$dbEntryAdaptor
->store(
$dbe
,
$gene_dbID
,
"Gene"
,
$ignore_release
);
}
my
%exons
;
foreach
my
$trans
(@{
$gene
->get_all_Transcripts}) {
foreach
my
$e
(@{
$trans
->get_all_Exons}) {
my
$key
=
$e
->hashkey();
if
(
exists
$exons
{
$key
}) {
$trans
->swap_exons(
$e
,
$exons
{
$key
},
$skip_exon_sf
);
}
else
{
$exons
{
$key
} =
$e
;
}
}
}
my
$transcript_adaptor
=
$db
->get_TranscriptAdaptor();
my
$transcripts
=
$gene
->get_all_Transcripts();
my
$new_canonical_transcript_id
;
for
(
my
$i
= 0;
$i
<
@$transcripts
;
$i
++) {
my
$new
=
$transcripts
->[
$i
];
my
$old
=
$original_transcripts
->[
$i
];
my
$new_dbID
=
$transcript_adaptor
->store(
$new
,
$gene_dbID
,
$analysis_id
,
$skip_recalculating_coordinates
);
$new
=
$transcript_adaptor
->fetch_by_dbID(
$new_dbID
);
if
(
$new
) {
if
(!
defined
(
$new_canonical_transcript_id
) &&
$new
->is_canonical()) {
$new_canonical_transcript_id
=
$new
->dbID();
}
$old
->dbID(
$new
->dbID());
$old
->adaptor(
$new
->adaptor());
if
(
$new
->translation) {
$old
->translation->dbID(
$new
->translation()->dbID);
$old
->translation->adaptor(
$new
->translation()->adaptor);
}
}
}
if
(
defined
(
$new_canonical_transcript_id
)) {
my
$sth
=
$self
->prepare(
q(
UPDATE gene
SET canonical_transcript_id = ?
WHERE gene_id = ?)
);
$sth
->bind_param(1,
$new_canonical_transcript_id
, SQL_INTEGER);
$sth
->bind_param(2,
$gene_dbID
, SQL_INTEGER);
$sth
->execute();
$sth
->finish();
my
$transcript_adaptor
=
$db
->get_TranscriptAdaptor();
$transcript_adaptor
->update_canonical_attribute(
$new_canonical_transcript_id
);
}
if
(
my
$display_xref
=
$gene
->display_xref) {
my
$dxref_id
;
if
(
$display_xref
->is_stored(
$db
)) {
$dxref_id
=
$display_xref
->dbID();
}
else
{
$dxref_id
=
$dbEntryAdaptor
->
exists
(
$display_xref
);
}
if
(
defined
(
$dxref_id
)) {
my
$sth
=
$self
->prepare(
"UPDATE gene SET display_xref_id = ? WHERE gene_id = ?"
);
$sth
->bind_param(1,
$dxref_id
, SQL_INTEGER);
$sth
->bind_param(2,
$gene_dbID
, SQL_INTEGER);
$sth
->execute();
$sth
->finish();
$display_xref
->dbID(
$dxref_id
);
$display_xref
->adaptor(
$dbEntryAdaptor
);
$display_xref
->dbID(
$dxref_id
);
$display_xref
->adaptor(
$dbEntryAdaptor
);
}
else
{
warning(
"Display_xref "
.
$display_xref
->dbname() .
":"
.
$display_xref
->display_id() .
" is not stored in database.\n"
.
"Not storing relationship to this gene."
);
$display_xref
->dbID(
undef
);
$display_xref
->adaptor(
undef
);
}
}
my
$attr_adaptor
=
$db
->get_AttributeAdaptor();
$attr_adaptor
->store_on_Gene(
$gene_dbID
,
$gene
->get_all_Attributes);
$original
->adaptor(
$self
);
$original
->dbID(
$gene_dbID
);
return
$gene_dbID
;
}
Hide Show 16 lines of Pod
sub
remove {
my
$self
=
shift
;
my
$gene
=
shift
;
if
(!
ref
(
$gene
) || !
$gene
->isa(
'Bio::EnsEMBL::Gene'
)) {
throw(
"Bio::EnsEMBL::Gene argument expected."
);
}
if
(!
$gene
->is_stored(
$self
->db())) {
warning(
"Cannot remove gene "
.
$gene
->dbID() .
". Is not stored in "
.
"this database."
);
return
;
}
my
$dbe_adaptor
=
$self
->db()->get_DBEntryAdaptor();
foreach
my
$dbe
(@{
$gene
->get_all_DBEntries()}) {
$dbe_adaptor
->remove_from_object(
$dbe
,
$gene
,
'Gene'
);
}
my
$sth
=
$self
->prepare(
"DELETE FROM alt_allele WHERE gene_id = ?"
);
$sth
->bind_param(1,
$gene
->dbID, SQL_INTEGER);
$sth
->execute();
$sth
->finish();
my
$attrib_adaptor
=
$self
->db->get_AttributeAdaptor;
$attrib_adaptor
->remove_from_Gene(
$gene
);
my
$transcriptAdaptor
=
$self
->db->get_TranscriptAdaptor();
foreach
my
$trans
(@{
$gene
->get_all_Transcripts()}) {
$transcriptAdaptor
->remove(
$trans
);
}
$sth
=
$self
->prepare(
"DELETE FROM gene WHERE gene_id = ? "
);
$sth
->bind_param(1,
$gene
->dbID, SQL_INTEGER);
$sth
->execute();
$sth
->finish();
$gene
->dbID(
undef
);
$gene
->adaptor(
undef
);
return
;
}
Hide Show 16 lines of Pod
sub
get_Interpro_by_geneid {
my
(
$self
,
$gene_stable_id
) =
@_
;
my
$sql
=
qq(
SELECT i.interpro_ac,
x.description
FROM transcript t,
translation tl,
protein_feature pf,
interpro i,
xref x,
gene g
WHERE g.stable_id = ?
AND t.gene_id = g.gene_id
AND t.is_current = 1
AND tl.transcript_id = t.transcript_id
AND tl.translation_id = pf.translation_id
AND i.id = pf.hit_name
AND i.interpro_ac = x.dbprimary_acc)
;
my
$sth
=
$self
->prepare(
$sql
);
$sth
->bind_param(1,
$gene_stable_id
, SQL_VARCHAR);
$sth
->execute;
my
@out
;
my
%h
;
while
((
my
$arr
=
$sth
->fetchrow_arrayref())) {
if
(
$h
{
$arr
->[0]}) {
next
; }
$h
{
$arr
->[0]} = 1;
my
$string
=
$arr
->[0] .
":"
.
$arr
->[1];
push
(
@out
,
$string
);
}
return
\
@out
;
}
Hide Show 14 lines of Pod
sub
update {
my
(
$self
,
$gene
) =
@_
;
my
$update
= 0;
if
(!
defined
$gene
|| !
ref
$gene
|| !
$gene
->isa(
'Bio::EnsEMBL::Gene'
)) {
throw(
"Must update a gene object, not a $gene"
);
}
my
$sth
=
$self
->prepare(
"SELECT canonical_transcript_id FROM gene WHERE gene_id=?"
);
$sth
->execute(
$gene
->dbID());
my
(
$old_canonical_transcript_id
) =
$sth
->fetchrow_array();
$sth
->finish();
my
$update_gene_sql
=
qq(
UPDATE gene
SET stable_id = ?,
biotype = ?,
analysis_id = ?,
display_xref_id = ?,
description = ?,
is_current = ?,
canonical_transcript_id = ?,
version = ?
WHERE gene_id = ?
)
;
my
$display_xref
=
$gene
->display_xref();
my
$display_xref_id
;
if
(
$display_xref
&&
$display_xref
->dbID()) {
$display_xref_id
=
$display_xref
->dbID();
}
else
{
$display_xref_id
=
undef
;
}
$sth
=
$self
->prepare(
$update_gene_sql
);
$sth
->bind_param(1,
$gene
->stable_id(), SQL_VARCHAR);
$sth
->bind_param(2,
$gene
->get_Biotype->name, SQL_VARCHAR);
$sth
->bind_param(3,
$gene
->analysis->dbID(), SQL_INTEGER);
$sth
->bind_param(4,
$display_xref_id
, SQL_INTEGER);
$sth
->bind_param(5,
$gene
->description(), SQL_VARCHAR);
$sth
->bind_param(6,
$gene
->is_current(), SQL_TINYINT);
if
(
defined
(
$gene
->canonical_transcript())) {
$sth
->bind_param(7,
$gene
->canonical_transcript()->dbID(), SQL_INTEGER);
}
else
{
$sth
->bind_param(7, 0, SQL_INTEGER);
}
$sth
->bind_param(8,
$gene
->version(), SQL_TINYINT);
$sth
->bind_param(9,
$gene
->dbID(), SQL_INTEGER);
$sth
->execute();
if
(
defined
(
$gene
->canonical_transcript())) {
my
$transcript_adaptor
=
$self
->db()->get_TranscriptAdaptor();
$transcript_adaptor
->update_canonical_attribute(
$gene
->canonical_transcript()->dbID(),
$old_canonical_transcript_id
);
}
}
Hide Show 14 lines of Pod
sub
update_coords {
my
(
$self
,
$gene
) =
@_
;
throw(
'Must have a gene to update in order to update it'
)
unless
(
$gene
);
$gene
->recalculate_coordinates;
my
$update_sql
=
qq(
UPDATE gene
SET seq_region_start = ?,
seq_region_end = ?
WHERE gene_id = ?
)
;
my
$sth
=
$self
->prepare(
$update_sql
);
$sth
->bind_param(1,
$gene
->seq_region_start);
$sth
->bind_param(2,
$gene
->seq_region_end);
$sth
->bind_param(3,
$gene
->dbID);
$sth
->execute();
}
sub
_objs_from_sth {
my
(
$self
,
$sth
,
$mapper
,
$dest_slice
) =
@_
;
my
$sa
=
$self
->db()->get_SliceAdaptor();
my
$aa
=
$self
->db()->get_AnalysisAdaptor();
my
$dbEntryAdaptor
=
$self
->db()->get_DBEntryAdaptor();
my
@genes
;
my
%analysis_hash
;
my
%slice_hash
;
my
%sr_name_hash
;
my
%sr_cs_hash
;
my
(
$gene_id
,
$seq_region_id
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
,
$analysis_id
,
$biotype
,
$display_xref_id
,
$gene_description
,
$source
,
$is_current
,
$canonical_transcript_id
,
$stable_id
,
$version
,
$created_date
,
$modified_date
,
$xref_display_label
,
$xref_primary_acc
,
$xref_description
,
$xref_version
,
$external_db
,
$external_status
,
$external_release
,
$external_db_name
,
$info_type
,
$info_text
);
$sth
->bind_columns(\(
$gene_id
,
$seq_region_id
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
,
$analysis_id
,
$biotype
,
$display_xref_id
,
$gene_description
,
$source
,
$is_current
,
$canonical_transcript_id
,
$stable_id
,
$version
,
$created_date
,
$modified_date
,
$xref_display_label
,
$xref_primary_acc
,
$xref_description
,
$xref_version
,
$external_db
,
$external_status
,
$external_release
,
$external_db_name
,
$info_type
,
$info_text
) );
my
$dest_slice_start
;
my
$dest_slice_end
;
my
$dest_slice_strand
;
my
$dest_slice_length
;
my
$dest_slice_cs
;
my
$dest_slice_sr_name
;
my
$dest_slice_sr_id
;
my
$asma
;
if
(
$dest_slice
) {
$dest_slice_start
=
$dest_slice
->start();
$dest_slice_end
=
$dest_slice
->end();
$dest_slice_strand
=
$dest_slice
->strand();
$dest_slice_length
=
$dest_slice
->
length
();
$dest_slice_cs
=
$dest_slice
->coord_system();
$dest_slice_sr_name
=
$dest_slice
->seq_region_name();
$dest_slice_sr_id
=
$dest_slice
->get_seq_region_id();
$asma
=
$self
->db->get_AssemblyMapperAdaptor();
}
FEATURE:
while
(
$sth
->fetch()) {
my
$analysis
=
$analysis_hash
{
$analysis_id
} ||=
$aa
->fetch_by_dbID(
$analysis_id
);
$analysis_hash
{
$analysis_id
} =
$analysis
;
$seq_region_id
=
$self
->get_seq_region_id_internal(
$seq_region_id
);
my
$slice
=
$slice_hash
{
"ID:"
.
$seq_region_id
};
if
(!
$slice
) {
$slice
=
$sa
->fetch_by_seq_region_id(
$seq_region_id
);
$slice_hash
{
"ID:"
.
$seq_region_id
} =
$slice
;
$sr_name_hash
{
$seq_region_id
} =
$slice
->seq_region_name();
$sr_cs_hash
{
$seq_region_id
} =
$slice
->coord_system();
}
if
(!
$mapper
&&
$dest_slice
&& !
$dest_slice_cs
->equals(
$slice
->coord_system)) {
$mapper
=
$asma
->fetch_by_CoordSystems(
$dest_slice_cs
,
$slice
->coord_system);
}
my
$sr_name
=
$sr_name_hash
{
$seq_region_id
};
my
$sr_cs
=
$sr_cs_hash
{
$seq_region_id
};
if
(
$mapper
) {
if
(
defined
$dest_slice
&&
$mapper
->isa(
'Bio::EnsEMBL::ChainedAssemblyMapper'
) ) {
(
$seq_region_id
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
) =
$mapper
->
map
(
$sr_name
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
,
$sr_cs
, 1,
$dest_slice
);
}
else
{
(
$seq_region_id
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
) =
$mapper
->fastmap(
$sr_name
,
$seq_region_start
,
$seq_region_end
,
$seq_region_strand
,
$sr_cs
);
}
next
FEATURE
if
(!
defined
(
$seq_region_id
));
$slice
=
$slice_hash
{
"ID:"
.
$seq_region_id
} ||=
$sa
->fetch_by_seq_region_id(
$seq_region_id
);
}
if
(
defined
(
$dest_slice
)) {
my
$seq_region_len
=
$dest_slice
->seq_region_length();
if
(
$dest_slice_strand
== 1 ) {
$seq_region_start
=
$seq_region_start
-
$dest_slice_start
+ 1;
$seq_region_end
=
$seq_region_end
-
$dest_slice_start
+ 1;
if
(
$dest_slice
->is_circular ) {
if
(
$seq_region_start
>
$seq_region_end
) {
if
(
$seq_region_end
>
$dest_slice_start
) {
$seq_region_start
-=
$seq_region_len
;
}
if
(
$seq_region_end
< 0 ) {
$seq_region_end
+=
$seq_region_len
;
}
}
else
{
if
(
$dest_slice_start
>
$dest_slice_end
&&
$seq_region_end
< 0) {
$seq_region_start
+=
$seq_region_len
;
$seq_region_end
+=
$seq_region_len
;
}
}
}
}
else
{
my
$start
=
$dest_slice_end
-
$seq_region_end
+ 1;
my
$end
=
$dest_slice_end
-
$seq_region_start
+ 1;
if
(
$dest_slice
->is_circular()) {
if
(
$dest_slice_start
>
$dest_slice_end
) {
if
(
$seq_region_start
>=
$dest_slice_start
) {
$end
+=
$seq_region_len
;
$start
+=
$seq_region_len
if
$seq_region_end
>
$dest_slice_start
;
}
elsif
(
$seq_region_start
<=
$dest_slice_end
) {
}
elsif
(
$seq_region_end
>=
$dest_slice_start
) {
$start
+=
$seq_region_len
;
$end
+=
$seq_region_len
;
}
elsif
(
$seq_region_end
<=
$dest_slice_end
) {
$end
+=
$seq_region_len
if
$end
< 0;
}
elsif
(
$seq_region_start
>
$seq_region_end
) {
$end
+=
$seq_region_len
;
}
}
else
{
if
(
$seq_region_start
<=
$dest_slice_end
and
$seq_region_end
>=
$dest_slice_start
) {
}
elsif
(
$seq_region_start
>
$seq_region_end
) {
if
(
$seq_region_start
<=
$dest_slice_end
) {
$start
-=
$seq_region_len
;
}
elsif
(
$seq_region_end
>=
$dest_slice_start
) {
$end
+=
$seq_region_len
;
}
}
}
}
$seq_region_start
=
$start
;
$seq_region_end
=
$end
;
$seq_region_strand
*= -1;
}
if
(
$seq_region_end
< 1
||
$seq_region_start
>
$dest_slice_length
|| (
$dest_slice_sr_id
!=
$seq_region_id
)) {
next
FEATURE;
}
$slice
=
$dest_slice
;
}
my
$display_xref
;
if
(
defined
$xref_display_label
) {
$display_xref
= Bio::EnsEMBL::DBEntry->new_fast({
'dbID'
=>
$display_xref_id
,
'adaptor'
=>
$dbEntryAdaptor
,
'display_id'
=>
$xref_display_label
,
'primary_id'
=>
$xref_primary_acc
,
'version'
=>
$xref_version
,
'description'
=>
$xref_description
,
'release'
=>
$external_release
,
'dbname'
=>
$external_db
,
'db_display_name'
=>
$external_db_name
,
'info_type'
=>
$info_type
,
'info_text'
=>
$info_text
});
$display_xref
->status(
$external_status
);
}
push
(
@genes
,
$self
->_create_feature_fast(
'Bio::EnsEMBL::Gene'
, {
'analysis'
=>
$analysis
,
'biotype'
=>
$biotype
,
'start'
=>
$seq_region_start
,
'end'
=>
$seq_region_end
,
'strand'
=>
$seq_region_strand
,
'adaptor'
=>
$self
,
'slice'
=>
$slice
,
'dbID'
=>
$gene_id
,
'stable_id'
=>
$stable_id
,
'version'
=>
$version
,
'created_date'
=>
$created_date
||
undef
,
'modified_date'
=>
$modified_date
||
undef
,
'description'
=>
$gene_description
,
'external_name'
=>
undef
,
'external_db'
=>
$external_db
,
'external_status'
=>
$external_status
,
'display_xref'
=>
$display_xref
,
'source'
=>
$source
,
'is_current'
=>
$is_current
,
'canonical_transcript_id'
=>
$canonical_transcript_id
}));
}
return
\
@genes
;
}
Hide Show 12 lines of Pod
sub
cache_gene_seq_mappings {
my
(
$self
) =
@_
;
my
$sql
=
'SELECT name '
.
'FROM coord_system '
.
'WHERE attrib like "%%sequence_level%%"'
.
'AND species_id = ?'
;
my
$sth
=
$self
->prepare(
$sql
);
$sth
->bind_param(1,
$self
->species_id(), SQL_INTEGER);
$sth
->execute();
my
$sequence_level
=
$sth
->fetchrow_array();
$sth
->finish();
my
$csa
=
$self
->db->get_CoordSystemAdaptor();
my
$ama
=
$self
->db->get_AssemblyMapperAdaptor();
my
$cs1
=
$csa
->fetch_by_name(
$sequence_level
);
my
$mcc
=
$self
->db->get_MetaCoordContainerAdaptor();
my
$csnew
=
$mcc
->fetch_all_CoordSystems_by_feature_type(
'gene'
);
foreach
my
$cs2
(
@$csnew
) {
my
$am
=
$ama
->fetch_by_CoordSystems(
$cs1
,
$cs2
);
$am
->register_all();
}
}
Hide Show 19 lines of Pod
sub
fetch_all_by_exon_supporting_evidence {
my
(
$self
,
$hit_name
,
$feature_type
,
$analysis
) =
@_
;
if
(
$feature_type
!~ /(dna)|(protein)_align_feature/) {
throw(
"feature type must be dna_align_feature or protein_align_feature"
);
}
my
(
$anal_from
,
$anal_where
);
if
(
$analysis
) {
$anal_from
=
", analysis a "
;
$anal_where
=
"AND a.analysis_id = f.analysis_id AND a.analysis_id=? "
;
}
my
$sql
=
qq(
SELECT DISTINCT(g.gene_id)
FROM gene g,
transcript t,
exon_transcript et,
supporting_feature sf,
$feature_type
f
$anal_from
WHERE g.gene_id = t.gene_id
AND g.is_current = 1
AND t.transcript_id = et.transcript_id
AND et.exon_id = sf.exon_id
AND sf.feature_id = f.${feature_type}_id
AND sf.feature_type = ?
AND f.hit_name=?
$anal_where
);
my
$sth
=
$self
->prepare(
$sql
);
$sth
->bind_param(1,
$feature_type
, SQL_VARCHAR);
$sth
->bind_param(2,
$hit_name
, SQL_VARCHAR);
$sth
->bind_param(3,
$analysis
->dbID(), SQL_INTEGER)
if
(
$analysis
);
$sth
->execute();
my
@genes
;
while
(
my
$id
=
$sth
->fetchrow_array) {
my
$gene
=
$self
->fetch_by_dbID(
$id
);
push
(
@genes
,
$gene
)
if
$gene
;
}
return
\
@genes
;
}
Hide Show 18 lines of Pod
sub
fetch_all_by_transcript_supporting_evidence {
my
(
$self
,
$hit_name
,
$feature_type
,
$analysis
) =
@_
;
if
(
$feature_type
!~ /(dna)|(protein)_align_feature/) {
throw(
"feature type must be dna_align_feature or protein_align_feature"
);
}
my
(
$anal_from
,
$anal_where
);
if
(
$analysis
) {
$anal_from
=
", analysis a "
;
$anal_where
=
"AND a.analysis_id = f.analysis_id AND a.analysis_id=? "
;
}
my
$sql
=
qq(
SELECT DISTINCT(g.gene_id)
FROM gene g,
transcript t,
transcript_supporting_feature sf,
$feature_type
f
$anal_from
WHERE g.gene_id = t.gene_id
AND g.is_current = 1
AND t.transcript_id = sf.transcript_id
AND sf.feature_id = f.${feature_type}_id
AND sf.feature_type = ?
AND f.hit_name=?
$anal_where
);
my
$sth
=
$self
->prepare(
$sql
);
$sth
->bind_param(1,
$feature_type
, SQL_VARCHAR);
$sth
->bind_param(2,
$hit_name
, SQL_VARCHAR);
$sth
->bind_param(3,
$analysis
->dbID(), SQL_INTEGER)
if
(
$analysis
);
$sth
->execute();
my
@genes
;
while
(
my
$id
=
$sth
->fetchrow_array) {
my
$gene
=
$self
->fetch_by_dbID(
$id
);
push
(
@genes
,
$gene
)
if
$gene
;
}
return
\
@genes
;
}
sub
_final_clause {
return
' ORDER BY g.gene_id'
}
1;