@ISA
=
qw(Bio::EnsEMBL::DBSQL::BaseAdaptor)
;
my
$CHUNKFACTOR
= 20;
sub
new {
my
(
$class
,
$dbadaptor
) =
@_
;
my
$self
=
$class
->SUPER::new(
$dbadaptor
);
$self
->{
'_asm_mapper_cache'
} = {};
my
$seq_region_cache
=
$self
->db->get_SeqRegionCache();
$self
->{
'sr_name_cache'
} =
$seq_region_cache
->{
'name_cache'
};
$self
->{
'sr_id_cache'
} =
$seq_region_cache
->{
'id_cache'
};
return
$self
;
}
sub
cache_seq_ids_with_mult_assemblys{
my
$self
=
shift
;
my
%multis
;
return
if
(
defined
(
$self
->{
'multi_seq_ids'
}));
$self
->{
'multi_seq_ids'
} = {};
my
$sql
=
qq(
SELECT sra.seq_region_id
FROM seq_region_attrib sra,
attrib_type at,
seq_region sr,
coord_system cs
WHERE sra.attrib_type_id = at.attrib_type_id
AND code = "MultAssem"
AND sra.seq_region_id = sr.seq_region_id
AND sr.coord_system_id = cs.coord_system_id
AND cs.species_id = ?)
;
my
$sth
=
$self
->prepare(
$sql
);
$sth
->bind_param( 1,
$self
->species_id(), SQL_INTEGER );
$sth
->execute();
my
(
$seq_region_id
);
$sth
->bind_columns(\
$seq_region_id
);
while
(
$sth
->fetch()) {
$self
->{
'multi_seq_ids'
}->{
$seq_region_id
} = 1;
}
$sth
->finish;
}
sub
fetch_by_CoordSystems {
my
$self
=
shift
;
my
$cs1
=
shift
;
my
$cs2
=
shift
;
if
(!
ref
(
$cs1
) || !
$cs1
->isa(
'Bio::EnsEMBL::CoordSystem'
)) {
throw(
"cs1 argument must be a Bio::EnsEMBL::CoordSystem."
);
}
if
(!
ref
(
$cs2
) || !
$cs2
->isa(
'Bio::EnsEMBL::CoordSystem'
)) {
throw(
"cs2 argument must be a Bio::EnsEMBL::CoordSystem."
);
}
if
(
$cs1
->is_top_level()) {
return
Bio::EnsEMBL::TopLevelAssemblyMapper->new(
$self
,
$cs1
,
$cs2
);
}
if
(
$cs2
->is_top_level()) {
return
Bio::EnsEMBL::TopLevelAssemblyMapper->new(
$self
,
$cs2
,
$cs1
);
}
my
$csa
=
$self
->db->get_CoordSystemAdaptor();
my
@mapping_path
= @{
$csa
->get_mapping_path(
$cs1
,
$cs2
)};
if
(!
@mapping_path
) {
return
undef
;
}
my
$key
=
join
(
':'
,
map
({
defined
(
$_
)?
$_
->dbID():
"-"
}
@mapping_path
));
my
$asm_mapper
=
$self
->{
'_asm_mapper_cache'
}->{
$key
};
return
$asm_mapper
if
(
$asm_mapper
);
if
(
@mapping_path
== 1) {
throw(
"Incorrect mapping path defined in meta table. "
.
"0 step mapping encountered between:\n"
.
$cs1
->name() .
" "
.
$cs1
->version() .
" and "
.
$cs2
->name .
" "
.
$cs2
->version());
}
if
(
@mapping_path
== 2) {
$asm_mapper
= Bio::EnsEMBL::AssemblyMapper->new(
$self
,
@mapping_path
);
$self
->{
'_asm_mapper_cache'
}->{
$key
} =
$asm_mapper
;
return
$asm_mapper
;
}
if
(
@mapping_path
== 3) {
$asm_mapper
= Bio::EnsEMBL::ChainedAssemblyMapper->new(
$self
,
@mapping_path
);
$self
->{
'_asm_mapper_cache'
}->{
$key
} =
$asm_mapper
;
$key
=
join
(
':'
,
map
({
defined
(
$_
)?
$_
->dbID():
"-"
}
reverse
(
@mapping_path
)));
$self
->{
'_asm_mapper_cache'
}->{
$key
} =
$asm_mapper
;
return
$asm_mapper
;
}
throw(
"Only 1 and 2 step coordinate system mapping is currently\n"
.
"supported. Mapping between "
.
$cs1
->name() .
" "
.
$cs1
->version() .
" and "
.
$cs2
->name() .
" "
.
$cs2
->version() .
" requires "
. (
scalar
(
@mapping_path
)-1) .
" steps."
);
}
sub
register_assembled {
my
$self
=
shift
;
my
$asm_mapper
=
shift
;
my
$asm_seq_region
=
shift
;
my
$asm_start
=
shift
;
my
$asm_end
=
shift
;
if
(!
ref
(
$asm_mapper
) || !
$asm_mapper
->isa(
'Bio::EnsEMBL::AssemblyMapper'
)) {
throw(
"Bio::EnsEMBL::AssemblyMapper argument expected"
);
}
throw(
"asm_seq_region argument expected"
)
if
(!
defined
(
$asm_seq_region
));
throw(
"asm_start argument expected"
)
if
(!
defined
(
$asm_start
));
throw(
"asm_end argument expected"
)
if
(!
defined
(
$asm_end
));
my
$asm_cs_id
=
$asm_mapper
->assembled_CoordSystem->dbID();
my
$cmp_cs_id
=
$asm_mapper
->component_CoordSystem->dbID();
my
@chunk_regions
;
my
(
$start_chunk
,
$end_chunk
);
$start_chunk
=
$asm_start
>>
$CHUNKFACTOR
;
$end_chunk
=
$asm_end
>>
$CHUNKFACTOR
;
if
(
$asm_start
==
$asm_end
+ 1) {
(
$start_chunk
,
$end_chunk
) = (
$end_chunk
,
$start_chunk
);
}
my
$i
;
my
(
$begin_chunk_region
,
$end_chunk_region
);
for
(
$i
=
$start_chunk
;
$i
<=
$end_chunk
;
$i
++) {
if
(
$asm_mapper
->have_registered_assembled(
$asm_seq_region
,
$i
)) {
if
(
defined
(
$begin_chunk_region
)) {
my
$region
= [(
$begin_chunk_region
<<
$CHUNKFACTOR
),
((
$end_chunk_region
+1) <<
$CHUNKFACTOR
)-1];
push
@chunk_regions
,
$region
;
$begin_chunk_region
=
$end_chunk_region
=
undef
;
}
}
else
{
$begin_chunk_region
=
$i
if
(!
defined
(
$begin_chunk_region
));
$end_chunk_region
=
$i
+1;
$asm_mapper
->register_assembled(
$asm_seq_region
,
$i
);
}
}
if
(
defined
(
$begin_chunk_region
)) {
my
$region
= [(
$begin_chunk_region
<<
$CHUNKFACTOR
),
((
$end_chunk_region
+1) <<
$CHUNKFACTOR
) -1];
push
@chunk_regions
,
$region
;
}
return
if
(!
@chunk_regions
);
if
(
$asm_mapper
->size() >
$asm_mapper
->max_pair_count() ) {
$asm_mapper
->flush();
@chunk_regions
= ( [ (
$start_chunk
<<
$CHUNKFACTOR
)
, ((
$end_chunk
+1) <<
$CHUNKFACTOR
)-1 ] );
for
(
my
$i
=
$start_chunk
;
$i
<=
$end_chunk
;
$i
++ ) {
$asm_mapper
->register_assembled(
$asm_seq_region
,
$i
);
}
}
my
$q
=
qq{
SELECT
asm.cmp_start,
asm.cmp_end,
asm.cmp_seq_region_id,
sr.name,
sr.length,
asm.ori,
asm.asm_start,
asm.asm_end
FROM
assembly asm, seq_region sr
WHERE
asm.asm_seq_region_id = ? AND
? <= asm.asm_end AND
? >= asm.asm_start AND
asm.cmp_seq_region_id = sr.seq_region_id AND
sr.coord_system_id = ?
}
;
my
$sth
=
$self
->prepare(
$q
);
foreach
my
$region
(
@chunk_regions
) {
my
(
$region_start
,
$region_end
) =
@$region
;
$sth
->bind_param(1,
$asm_seq_region
,SQL_INTEGER);
$sth
->bind_param(2,
$region_start
,SQL_INTEGER);
$sth
->bind_param(3,
$region_end
,SQL_INTEGER);
$sth
->bind_param(4,
$cmp_cs_id
,SQL_INTEGER);
$sth
->execute();
my
(
$cmp_start
,
$cmp_end
,
$cmp_seq_region_id
,
$cmp_seq_region
,
$ori
,
$asm_start
,
$asm_end
,
$cmp_seq_region_length
);
$sth
->bind_columns(\
$cmp_start
, \
$cmp_end
, \
$cmp_seq_region_id
,
\
$cmp_seq_region
, \
$cmp_seq_region_length
, \
$ori
,
\
$asm_start
, \
$asm_end
);
while
(
$sth
->fetch()) {
next
if
(
$asm_mapper
->have_registered_component(
$cmp_seq_region_id
)
and !
defined
(
$self
->{
'multi_seq_ids'
}->{
$cmp_seq_region_id
}));
$asm_mapper
->register_component(
$cmp_seq_region_id
);
$asm_mapper
->mapper->add_map_coordinates(
$asm_seq_region
,
$asm_start
,
$asm_end
,
$ori
,
$cmp_seq_region_id
,
$cmp_start
,
$cmp_end
);
my
$arr
= [
$cmp_seq_region_id
,
$cmp_seq_region
,
$cmp_cs_id
,
$cmp_seq_region_length
];
$self
->{
'sr_name_cache'
}->{
"$cmp_seq_region:$cmp_cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$cmp_seq_region_id"
} =
$arr
;
}
}
$sth
->finish();
}
sub
_seq_region_name_to_id {
my
$self
=
shift
;
my
$sr_name
=
shift
;
my
$cs_id
=
shift
;
if
(!
defined
(
$sr_name
) or
!
defined
(
$cs_id
)){
throw(
'seq_region_name and coord_system_id args are required'
);
}
my
$arr
=
$self
->{
'sr_name_cache'
}->{
"$sr_name:$cs_id"
};
if
(
$arr
) {
return
$arr
->[0];
}
my
$sth
=
$self
->prepare(
"SELECT seq_region_id, length "
.
"FROM seq_region "
.
"WHERE name = ? AND coord_system_id = ?"
);
$sth
->bind_param(1,
$sr_name
,SQL_VARCHAR);
$sth
->bind_param(2,
$cs_id
,SQL_INTEGER);
$sth
->execute();
my
@row
=
$sth
->fetchrow_array();
unless
(
@row
) {
throw(
"No-existent seq_region [$sr_name] in coord system $cs_id"
);
}
my
@more
=
$sth
->fetchrow_array();
if
(
@more
) {
throw(
"Ambiguous seq_region [$sr_name] in coord system $cs_id"
);
}
my
(
$sr_id
,
$sr_length
) =
@row
;
$sth
->finish();
$arr
= [
$sr_id
,
$sr_name
,
$cs_id
,
$sr_length
];
$self
->{
'sr_name_cache'
}->{
"$sr_name:$cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$sr_id"
} =
$arr
;
return
$sr_id
;
}
sub
_seq_region_id_to_name {
my
$self
=
shift
;
my
$sr_id
=
shift
;
(
$sr_id
) || throw(
'seq_region_is required'
);
my
$arr
=
$self
->{
'sr_id_cache'
}->{
"$sr_id"
};
if
(
$arr
) {
return
$arr
->[1];
}
my
$sth
=
$self
->prepare(
"SELECT name, length ,coord_system_id "
.
"FROM seq_region "
.
"WHERE seq_region_id = ? "
);
$sth
->bind_param(1,
$sr_id
,SQL_INTEGER);
$sth
->execute();
my
@row
=
$sth
->fetchrow_array();
if
(!
$sth
->rows() == 1) {
throw(
"non-existant seq_region [$sr_id]"
);
}
my
(
$sr_name
,
$sr_length
,
$cs_id
) =
@row
;
$sth
->finish();
$arr
= [
$sr_id
,
$sr_name
,
$cs_id
,
$sr_length
];
$self
->{
'sr_name_cache'
}->{
"$sr_name:$cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$sr_id"
} =
$arr
;
return
$sr_name
;
}
sub
register_component {
my
$self
=
shift
;
my
$asm_mapper
=
shift
;
my
$cmp_seq_region
=
shift
;
if
(!
ref
(
$asm_mapper
) || !
$asm_mapper
->isa(
'Bio::EnsEMBL::AssemblyMapper'
)) {
throw(
"Bio::EnsEMBL::AssemblyMapper argument expected"
);
}
if
(!
defined
(
$cmp_seq_region
)) {
throw(
"cmp_seq_region argument expected"
);
}
my
$cmp_cs_id
=
$asm_mapper
->component_CoordSystem()->dbID();
my
$asm_cs_id
=
$asm_mapper
->assembled_CoordSystem()->dbID();
return
if
(
$asm_mapper
->have_registered_component(
$cmp_seq_region
)
and !
defined
(
$self
->{
'multi_seq_ids'
}->{
$cmp_seq_region
}));
my
$q
=
qq{
SELECT
asm.asm_start,
asm.asm_end,
asm.asm_seq_region_id,
sr.name,
sr.length
FROM
assembly asm, seq_region sr
WHERE
asm.cmp_seq_region_id = ? AND
asm.asm_seq_region_id = sr.seq_region_id AND
sr.coord_system_id = ?
}
;
my
$sth
=
$self
->prepare(
$q
);
$sth
->bind_param(1,
$cmp_seq_region
,SQL_INTEGER);
$sth
->bind_param(2,
$asm_cs_id
,SQL_INTEGER);
$sth
->execute();
my
@rows
=
$sth
->fetchrow_array();
if
(
$sth
->rows() == 0) {
$asm_mapper
->register_component(
$cmp_seq_region
);
$sth
->finish();
return
;
}
my
@more
=
$sth
->fetchrow_array();
if
(
$sth
->rows() != 1) {
$sth
->finish();
throw(
"Multiple assembled regions for single "
.
"component region cmp_seq_region_id=[$cmp_seq_region]\n"
.
"Remember that multiple mappings use the \#-operaator"
.
" in the meta-table (i.e. chromosome:EquCab2\#contig\n"
);
}
my
(
$asm_start
,
$asm_end
,
$asm_seq_region_id
,
$asm_seq_region
,
$asm_seq_region_length
) =
@rows
;
my
$arr
= [
$asm_seq_region_id
,
$asm_seq_region
,
$asm_cs_id
,
$asm_seq_region_length
];
$self
->{
'sr_name_cache'
}->{
"$asm_seq_region:$asm_cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$asm_seq_region_id"
} =
$arr
;
$sth
->finish();
$self
->register_assembled(
$asm_mapper
,
$asm_seq_region_id
,
$asm_start
,
$asm_end
);
}
sub
register_chained {
my
$self
=
shift
;
my
$casm_mapper
=
shift
;
my
$from
=
shift
;
my
$seq_region_id
=
shift
;
my
$ranges
=
shift
;
my
$to_slice
=
shift
;
my
$to_seq_region_id
;
if
(
defined
(
$to_slice
)){
if
(
$casm_mapper
->first_CoordSystem()->equals(
$casm_mapper
->last_CoordSystem())){
return
$self
->_register_chained_special(
$casm_mapper
,
$from
,
$seq_region_id
,
$ranges
,
$to_slice
);
}
$to_seq_region_id
=
$to_slice
->get_seq_region_id();
if
(!
defined
(
$to_seq_region_id
)){
die
"Could not get seq_region_id for to_slice"
.
$to_slice
->seq_region_name.
"\n"
;
}
}
my
(
$start_name
,
$start_mid_mapper
,
$start_cs
,
$start_registry
,
$end_name
,
$end_mid_mapper
,
$end_cs
,
$end_registry
);
if
(
$from
eq
'first'
) {
$start_name
=
'first'
;
$start_mid_mapper
=
$casm_mapper
->first_middle_mapper();
$start_cs
=
$casm_mapper
->first_CoordSystem();
$start_registry
=
$casm_mapper
->first_registry();
$end_mid_mapper
=
$casm_mapper
->last_middle_mapper();
$end_cs
=
$casm_mapper
->last_CoordSystem();
$end_registry
=
$casm_mapper
->last_registry();
$end_name
=
'last'
;
}
elsif
(
$from
eq
'last'
) {
$start_name
=
'last'
;
$start_mid_mapper
=
$casm_mapper
->last_middle_mapper();
$start_cs
=
$casm_mapper
->last_CoordSystem();
$start_registry
=
$casm_mapper
->last_registry();
$end_mid_mapper
=
$casm_mapper
->first_middle_mapper();
$end_cs
=
$casm_mapper
->first_CoordSystem();
$end_registry
=
$casm_mapper
->first_registry();
$end_name
=
'first'
;
}
else
{
throw(
"Invalid from argument: [$from], must be 'first' or 'last'"
);
}
my
$combined_mapper
=
$casm_mapper
->first_last_mapper();
my
$mid_cs
=
$casm_mapper
->middle_CoordSystem();
my
$mid_name
=
'middle'
;
my
$csa
=
$self
->db->get_CoordSystemAdaptor();
if
( !
defined
$mid_cs
) {
$start_mid_mapper
=
$combined_mapper
;
}
my
@path
;
if
(
defined
$mid_cs
) {
@path
= @{
$csa
->get_mapping_path(
$start_cs
,
$mid_cs
)};
}
else
{
@path
= @{
$csa
->get_mapping_path(
$start_cs
,
$end_cs
)};
}
if
(
@path
!= 2 &&
defined
(
$path
[1] )) {
my
$path
=
join
(
','
,
map
({
$_
->name .
' '
.
$_
->version}
@path
));
my
$len
=
scalar
(
@path
) - 1;
throw(
"Unexpected mapping path between start and intermediate "
.
"coord systems ("
.
$start_cs
->name .
" "
.
$start_cs
->version .
" and "
.
$mid_cs
->name .
" "
.
$mid_cs
->version .
")."
.
"\nExpected path length 1, got $len. "
.
"(path=$path)"
);
}
my
$sth
;
my
(
$asm_cs
,
$cmp_cs
);
$asm_cs
=
$path
[0];
$cmp_cs
=
$path
[-1];
my
$asm2cmp
= (
<<ASMCMP);
SELECT
asm.cmp_start,
asm.cmp_end,
asm.cmp_seq_region_id,
sr.name,
sr.length,
asm.ori,
asm.asm_start,
asm.asm_end
FROM
assembly asm, seq_region sr
WHERE
asm.asm_seq_region_id = ? AND
? <= asm.asm_end AND
? >= asm.asm_start AND
asm.cmp_seq_region_id = sr.seq_region_id AND
sr.coord_system_id = ?
ASMCMP
my
$cmp2asm
= (
<<CMPASM);
SELECT
asm.asm_start,
asm.asm_end,
asm.asm_seq_region_id,
sr.name,
sr.length,
asm.ori,
asm.cmp_start,
asm.cmp_end
FROM
assembly asm, seq_region sr
WHERE
asm.cmp_seq_region_id = ? AND
? <= asm.cmp_end AND
? >= asm.cmp_start AND
asm.asm_seq_region_id = sr.seq_region_id AND
sr.coord_system_id = ?
CMPASM
my
$asm2cmp_sth
;
my
$cmp2asm_sth
;
if
(
defined
(
$to_slice
)){
my
$to_cs
=
$to_slice
->coord_system;
if
(
$asm_cs
->equals(
$to_cs
)){
$asm2cmp_sth
=
$self
->prepare(
$asm2cmp
);
$cmp2asm_sth
=
$self
->prepare(
$cmp2asm
.
" AND asm.asm_seq_region_id = $to_seq_region_id"
);
}
elsif
(
$cmp_cs
->equals(
$to_cs
)){
$asm2cmp_sth
=
$self
->prepare(
$asm2cmp
.
" AND asm.cmp_seq_region_id = $to_seq_region_id"
);
$cmp2asm_sth
=
$self
->prepare(
$cmp2asm
);
}
else
{
$asm2cmp_sth
=
$self
->prepare(
$asm2cmp
);
$cmp2asm_sth
=
$self
->prepare(
$cmp2asm
);
}
}
else
{
$asm2cmp_sth
=
$self
->prepare(
$asm2cmp
);
$cmp2asm_sth
=
$self
->prepare(
$cmp2asm
);
}
$sth
= (
$asm_cs
->equals(
$start_cs
)) ?
$asm2cmp_sth
:
$cmp2asm_sth
;
my
$mid_cs_id
;
if
(
defined
$mid_cs
) {
$mid_cs_id
=
$mid_cs
->dbID();
}
else
{
$mid_cs_id
=
$end_cs
->dbID();
}
my
@mid_ranges
;
my
@start_ranges
;
foreach
my
$range
(
@$ranges
) {
my
(
$start
,
$end
) =
@$range
;
$sth
->bind_param(1,
$seq_region_id
,SQL_INTEGER);
$sth
->bind_param(2,
$start
,SQL_INTEGER);
$sth
->bind_param(3,
$end
,SQL_INTEGER);
$sth
->bind_param(4,
$mid_cs_id
,SQL_INTEGER);
$sth
->execute();
my
(
$mid_start
,
$mid_end
,
$mid_seq_region_id
,
$mid_seq_region
,
$mid_length
,
$ori
,
$start_start
,
$start_end
);
$sth
->bind_columns(\
$mid_start
, \
$mid_end
, \
$mid_seq_region_id
,
\
$mid_seq_region
, \
$mid_length
, \
$ori
, \
$start_start
,
\
$start_end
);
while
(
$sth
->fetch()) {
if
(
defined
$mid_cs
) {
$start_mid_mapper
->add_map_coordinates
(
$seq_region_id
,
$start_start
,
$start_end
,
$ori
,
$mid_seq_region_id
,
$mid_start
,
$mid_end
);
}
else
{
if
(
$from
eq
"first"
) {
$combined_mapper
->add_map_coordinates
(
$seq_region_id
,
$start_start
,
$start_end
,
$ori
,
$mid_seq_region_id
,
$mid_start
,
$mid_end
);
}
else
{
$combined_mapper
->add_map_coordinates
(
$mid_seq_region_id
,
$mid_start
,
$mid_end
,
$ori
,
$seq_region_id
,
$start_start
,
$start_end
);
}
}
my
$arr
= [
$mid_seq_region_id
,
$mid_seq_region
,
$mid_cs_id
,
$mid_length
];
$self
->{
'sr_name_cache'
}->{
"$mid_seq_region:$mid_cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$mid_seq_region_id"
} =
$arr
;
push
@mid_ranges
,[
$mid_seq_region_id
,
$mid_seq_region
,
$mid_start
,
$mid_end
];
push
@start_ranges
, [
$seq_region_id
,
$start_start
,
$start_end
];
if
(
$start_start
<
$start
||
$start_end
>
$end
) {
$start_registry
->check_and_register(
$seq_region_id
,
$start_start
,
$start_end
);
}
}
$sth
->finish();
}
if
( !
defined
$mid_cs
) {
for
my
$range
(
@mid_ranges
) {
$end_registry
->check_and_register(
$range
->[0],
$range
->[2],
$range
->[3] );
}
return
;
}
@path
= @{
$csa
->get_mapping_path(
$mid_cs
,
$end_cs
)};
if
(
@path
== 2 || (
@path
== 3 && !
defined
$path
[1])) {
$asm_cs
=
$path
[0];
$cmp_cs
=
$path
[-1];
}
else
{
my
$path
=
join
(
','
,
map
({
$_
->name .
' '
.
$_
->version}
@path
));
my
$len
=
scalar
(
@path
)-1;
throw(
"Unexpected mapping path between intermediate and last"
.
"coord systems ("
.
$mid_cs
->name .
" "
.
$mid_cs
->version .
" and "
.
$end_cs
->name .
" "
.
$end_cs
->version .
")."
.
"\nExpected path length 1, got $len. "
.
"(path=$path)"
);
}
if
(
defined
(
$to_slice
)){
my
$to_cs
=
$to_slice
->coord_system;
if
(
$asm_cs
->equals(
$to_cs
)){
$asm2cmp_sth
=
$self
->prepare(
$asm2cmp
);
$cmp2asm_sth
=
$self
->prepare(
$cmp2asm
.
" AND asm.asm_seq_region_id = $to_seq_region_id"
);
}
elsif
(
$cmp_cs
->equals(
$to_cs
)){
$asm2cmp_sth
=
$self
->prepare(
$asm2cmp
.
" AND asm.cmp_seq_region_id = $to_seq_region_id"
);
$cmp2asm_sth
=
$self
->prepare(
$cmp2asm
);
}
else
{
$asm2cmp_sth
=
$self
->prepare(
$asm2cmp
);
$cmp2asm_sth
=
$self
->prepare(
$cmp2asm
);
}
}
$sth
= (
$asm_cs
->equals(
$mid_cs
)) ?
$asm2cmp_sth
:
$cmp2asm_sth
;
my
$end_cs_id
=
$end_cs
->dbID();
foreach
my
$mid_range
(
@mid_ranges
) {
my
(
$mid_seq_region_id
,
$mid_seq_region
,
$start
,
$end
) =
@$mid_range
;
$sth
->bind_param(1,
$mid_seq_region_id
,SQL_INTEGER);
$sth
->bind_param(2,
$start
,SQL_INTEGER);
$sth
->bind_param(3,
$end
,SQL_INTEGER);
$sth
->bind_param(4,
$end_cs_id
,SQL_INTEGER);
$sth
->execute();
my
(
$end_start
,
$end_end
,
$end_seq_region_id
,
$end_seq_region
,
$end_length
,
$ori
,
$mid_start
,
$mid_end
);
$sth
->bind_columns(\
$end_start
, \
$end_end
, \
$end_seq_region_id
,
\
$end_seq_region
, \
$end_length
, \
$ori
, \
$mid_start
,
\
$mid_end
);
while
(
$sth
->fetch()) {
$end_mid_mapper
->add_map_coordinates
(
$end_seq_region_id
,
$end_start
,
$end_end
,
$ori
,
$mid_seq_region_id
,
$mid_start
,
$mid_end
);
my
$arr
= [
$end_seq_region_id
,
$end_seq_region
,
$end_cs_id
,
$end_length
];
$self
->{
'sr_name_cache'
}->{
"$end_seq_region:$end_cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$end_seq_region_id"
} =
$arr
;
$end_registry
->check_and_register(
$end_seq_region_id
,
$end_start
,
$end_end
);
}
$sth
->finish();
}
_build_combined_mapper(\
@start_ranges
,
$start_mid_mapper
,
$end_mid_mapper
,
$combined_mapper
,
$start_name
);
return
;
}
sub
_register_chained_special {
my
$self
=
shift
;
my
$casm_mapper
=
shift
;
my
$from
=
shift
;
my
$seq_region_id
=
shift
;
my
$ranges
=
shift
;
my
$to_slice
=
shift
;
my
$found
= 0;
my
$sth
=
$self
->prepare("SELECT
asm.cmp_start,
asm.cmp_end,
asm.cmp_seq_region_id,
sr.name,
sr.
length
,
asm.ori,
asm.asm_start,
asm.asm_end
FROM
assembly asm, seq_region sr
WHERE
asm.asm_seq_region_id = ? AND
? <= asm.asm_end AND
? >= asm.asm_start AND
asm.cmp_seq_region_id = sr.seq_region_id AND
sr.coord_system_id = ? AND
asm.cmp_seq_region_id = ?");
my
(
$start_name
,
$start_mid_mapper
,
$start_cs
,
$start_registry
,
$end_name
,
$end_mid_mapper
,
$end_cs
,
$end_registry
);
if
(
$from
eq
'first'
) {
$start_name
=
'first'
;
$start_mid_mapper
=
$casm_mapper
->first_middle_mapper();
$start_cs
=
$casm_mapper
->first_CoordSystem();
$start_registry
=
$casm_mapper
->first_registry();
$end_mid_mapper
=
$casm_mapper
->last_middle_mapper();
$end_cs
=
$casm_mapper
->last_CoordSystem();
$end_registry
=
$casm_mapper
->last_registry();
$end_name
=
'last'
;
}
elsif
(
$from
eq
'last'
) {
$start_name
=
'last'
;
$start_mid_mapper
=
$casm_mapper
->last_middle_mapper();
$start_cs
=
$casm_mapper
->last_CoordSystem();
$start_registry
=
$casm_mapper
->last_registry();
$end_mid_mapper
=
$casm_mapper
->first_middle_mapper();
$end_cs
=
$casm_mapper
->first_CoordSystem();
$end_registry
=
$casm_mapper
->first_registry();
$end_name
=
'first'
;
}
else
{
throw(
"Invalid from argument: [$from], must be 'first' or 'last'"
);
}
my
$combined_mapper
=
$casm_mapper
->first_last_mapper();
my
$mid_cs
=
$casm_mapper
->middle_CoordSystem();
my
$mid_name
=
'middle'
;
my
$csa
=
$self
->db->get_CoordSystemAdaptor();
if
( !
defined
$mid_cs
) {
$start_mid_mapper
=
$combined_mapper
;
}
my
@path
;
if
(
defined
$mid_cs
) {
@path
= @{
$csa
->get_mapping_path(
$start_cs
,
$mid_cs
)};
}
else
{
@path
= @{
$csa
->get_mapping_path(
$start_cs
,
$end_cs
)};
}
if
( !
defined
$mid_cs
) {
$start_mid_mapper
=
$combined_mapper
;
}
if
(
@path
!= 2 &&
defined
(
$path
[1] )) {
my
$path
=
join
(
','
,
map
({
$_
->name .
' '
.
$_
->version}
@path
));
my
$len
=
scalar
(
@path
) - 1;
throw(
"Unexpected mapping path between start and intermediate "
.
"coord systems ("
.
$start_cs
->name .
" "
.
$start_cs
->version .
" and "
.
$mid_cs
->name .
" "
.
$mid_cs
->version .
")."
.
"\nExpected path length 1, got $len. "
.
"(path=$path)"
);
}
my
(
$asm_cs
,
$cmp_cs
);
$asm_cs
=
$path
[0];
$cmp_cs
=
$path
[-1];
$combined_mapper
=
$casm_mapper
->first_last_mapper();
$mid_cs
=
$casm_mapper
->middle_CoordSystem();
$mid_name
=
'middle'
;
$csa
=
$self
->db->get_CoordSystemAdaptor();
my
$mid_cs_id
;
if
( !
defined
$mid_cs
) {
$start_mid_mapper
=
$combined_mapper
;
}
else
{
$mid_cs_id
=
$mid_cs
->dbID();
}
my
@mid_ranges
;
my
@start_ranges
;
my
$to_cs
=
$to_slice
->coord_system;
foreach
my
$direction
(1, 0){
my
$id1
;
my
$id2
;
if
(
$direction
){
$id1
=
$seq_region_id
;
$id2
=
$to_slice
->get_seq_region_id();
}
else
{
$id2
=
$seq_region_id
;
$id1
=
$to_slice
->get_seq_region_id();
}
foreach
my
$range
(
@$ranges
) {
my
(
$start
,
$end
) =
@$range
;
$sth
->bind_param(1,
$id1
,SQL_INTEGER);
$sth
->bind_param(2,
$start
,SQL_INTEGER);
$sth
->bind_param(3,
$end
,SQL_INTEGER);
$sth
->bind_param(4,
$to_cs
->dbID,SQL_INTEGER);
$sth
->bind_param(5,
$id2
,SQL_INTEGER);
$sth
->execute();
my
(
$mid_start
,
$mid_end
,
$mid_seq_region_id
,
$mid_seq_region
,
$mid_length
,
$ori
,
$start_start
,
$start_end
);
$sth
->bind_columns(\
$mid_start
, \
$mid_end
, \
$mid_seq_region_id
,
\
$mid_seq_region
, \
$mid_length
, \
$ori
, \
$start_start
,
\
$start_end
);
while
(
$sth
->fetch()) {
$found
= 1;
if
(
defined
$mid_cs
) {
$start_mid_mapper
->add_map_coordinates
(
$id1
,
$start_start
,
$start_end
,
$ori
,
$mid_seq_region_id
,
$mid_start
,
$mid_end
);
}
else
{
if
(
$from
eq
"first"
) {
if
(
$direction
){
$combined_mapper
->add_map_coordinates
(
$id1
,
$start_start
,
$start_end
,
$ori
,
$mid_seq_region_id
,
$mid_start
,
$mid_end
);
}
else
{
$combined_mapper
->add_map_coordinates
(
$mid_seq_region_id
,
$mid_start
,
$mid_end
,
$ori
,
$id1
,
$start_start
,
$start_end
);
}
}
else
{
if
(
$direction
){
$combined_mapper
->add_map_coordinates
(
$mid_seq_region_id
,
$mid_start
,
$mid_end
,
$ori
,
$id1
,
$start_start
,
$start_end
);
}
else
{
$combined_mapper
->add_map_coordinates
(
$id1
,
$start_start
,
$start_end
,
$ori
,
$mid_seq_region_id
,
$mid_start
,
$mid_end
);
}
}
}
my
$arr
= [
$mid_seq_region_id
,
$mid_seq_region
,
$mid_cs_id
,
$mid_length
];
$self
->{
'sr_name_cache'
}->{
"$mid_seq_region:$mid_cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$mid_seq_region_id"
} =
$arr
;
push
@mid_ranges
,[
$mid_seq_region_id
,
$mid_seq_region
,
$mid_start
,
$mid_end
];
push
@start_ranges
, [
$id1
,
$start_start
,
$start_end
];
if
(
$start_start
<
$start
||
$start_end
>
$end
) {
$start_registry
->check_and_register(
$id1
,
$start_start
,
$start_end
);
}
}
$sth
->finish();
}
if
(
$found
){
if
( !
defined
$mid_cs
) {
for
my
$range
(
@mid_ranges
) {
$end_registry
->check_and_register(
$range
->[0],
$range
->[2],
$range
->[3] );
}
return
;
}
}
}
}
sub
register_all {
my
$self
=
shift
;
my
$mapper
=
shift
;
my
$asm_cs_id
=
$mapper
->assembled_CoordSystem()->dbID();
my
$cmp_cs_id
=
$mapper
->component_CoordSystem()->dbID();
my
$q
=
qq{
SELECT
asm.cmp_start,
asm.cmp_end,
asm.cmp_seq_region_id,
cmp_sr.name,
cmp_sr.length,
asm.ori,
asm.asm_start,
asm.asm_end,
asm.asm_seq_region_id,
asm_sr.name,
asm_sr.length
FROM
assembly asm, seq_region asm_sr, seq_region cmp_sr
WHERE
asm.cmp_seq_region_id = cmp_sr.seq_region_id AND
asm.asm_seq_region_id = asm_sr.seq_region_id AND
cmp_sr.coord_system_id = ? AND
asm_sr.coord_system_id = ?
}
;
my
$sth
=
$self
->prepare(
$q
);
$sth
->bind_param(1,
$cmp_cs_id
,SQL_INTEGER);
$sth
->bind_param(2,
$asm_cs_id
,SQL_INTEGER);
$sth
->execute();
my
(
$cmp_start
,
$cmp_end
,
$cmp_seq_region_id
,
$cmp_seq_region
,
$cmp_length
,
$ori
,
$asm_start
,
$asm_end
,
$asm_seq_region_id
,
$asm_seq_region
,
$asm_length
);
$sth
->bind_columns(\
$cmp_start
, \
$cmp_end
, \
$cmp_seq_region_id
,
\
$cmp_seq_region
, \
$cmp_length
, \
$ori
,
\
$asm_start
, \
$asm_end
, \
$asm_seq_region_id
,
\
$asm_seq_region
, \
$asm_length
);
my
%asm_registered
;
while
(
$sth
->fetch()) {
$mapper
->register_component(
$cmp_seq_region_id
);
$mapper
->mapper->add_map_coordinates(
$asm_seq_region_id
,
$asm_start
,
$asm_end
,
$ori
,
$cmp_seq_region_id
,
$cmp_start
,
$cmp_end
);
my
$arr
= [
$cmp_seq_region_id
,
$cmp_seq_region
,
$cmp_cs_id
,
$cmp_length
];
$self
->{
'sr_name_cache'
}->{
"$cmp_seq_region:$cmp_cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$cmp_seq_region_id"
} =
$arr
;
if
(!
$asm_registered
{
$asm_seq_region_id
}) {
$asm_registered
{
$asm_seq_region_id
} = 1;
my
$end_chunk
=
$asm_length
>>
$CHUNKFACTOR
;
for
(
my
$i
= 0;
$i
<=
$end_chunk
;
$i
++) {
$mapper
->register_assembled(
$asm_seq_region_id
,
$i
);
}
$arr
= [
$asm_seq_region_id
,
$asm_seq_region
,
$asm_cs_id
,
$asm_length
];
$self
->{
'sr_name_cache'
}->{
"$asm_seq_region:$asm_cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$asm_seq_region_id"
} =
$arr
;
}
}
$sth
->finish();
return
;
}
sub
register_all_chained {
my
$self
=
shift
;
my
$casm_mapper
=
shift
;
my
$first_cs
=
$casm_mapper
->first_CoordSystem();
my
$mid_cs
=
$casm_mapper
->middle_CoordSystem();
my
$last_cs
=
$casm_mapper
->last_CoordSystem();
my
$start_mid_mapper
=
$casm_mapper
->first_middle_mapper();
my
$end_mid_mapper
=
$casm_mapper
->last_middle_mapper();
my
$combined_mapper
=
$casm_mapper
->first_last_mapper();
my
@ranges
;
my
$sth
=
$self
->prepare(
'SELECT
asm.cmp_start,
asm.cmp_end,
asm.cmp_seq_region_id,
sr_cmp.name,
sr_cmp.
length
,
asm.ori,
asm.asm_start,
asm.asm_end,
asm.asm_seq_region_id,
sr_asm.name,
sr_asm.
length
FROM
assembly asm, seq_region sr_asm, seq_region sr_cmp
WHERE
sr_asm.seq_region_id = asm.asm_seq_region_id AND
sr_cmp.seq_region_id = asm.cmp_seq_region_id AND
sr_asm.coord_system_id = ? AND
sr_cmp.coord_system_id = ?');
my
$csa
=
$self
->db()->get_CoordSystemAdaptor();
my
@path
;
if
( !
defined
$mid_cs
) {
@path
= @{
$csa
->get_mapping_path(
$first_cs
,
$last_cs
) };
if
( !
defined
(
$path
[1] ) ) {
splice
(
@path
, 1, 1 );
}
}
else
{
@path
= @{
$csa
->get_mapping_path(
$first_cs
,
$mid_cs
) };
if
( !
defined
(
$path
[1] ) ) {
splice
(
@path
, 1, 1 );
}
}
if
(
@path
!= 2 ) {
my
$path
=
join
(
','
,
map
( {
$_
->name .
' '
.
$_
->version }
@path
) );
my
$len
=
scalar
(
@path
) - 1;
throw(
"Unexpected mapping path between start and intermediate "
.
"coord systems ("
.
$first_cs
->name .
" "
.
$first_cs
->version .
" and "
.
$mid_cs
->name .
" "
.
$mid_cs
->version .
")."
.
"\nExpected path length 1, got $len. "
.
"(path=$path)"
);
}
my
(
$asm_cs
,
$cmp_cs
) =
@path
;
$sth
->{mysql_use_result} = 1;
$sth
->bind_param(1,
$asm_cs
->dbID,SQL_INTEGER);
$sth
->bind_param(2,
$cmp_cs
->dbID,SQL_INTEGER);
$sth
->execute();
my
(
$mid_start
,
$mid_end
,
$mid_seq_region_id
,
$mid_seq_region
,
$mid_length
,
$ori
,
$start_start
,
$start_end
,
$start_seq_region_id
,
$start_seq_region
,
$start_length
);
if
(
$asm_cs
->equals(
$first_cs
)) {
$sth
->bind_columns(\
$mid_start
, \
$mid_end
, \
$mid_seq_region_id
,
\
$mid_seq_region
, \
$mid_length
, \
$ori
, \
$start_start
,
\
$start_end
, \
$start_seq_region_id
, \
$start_seq_region
,
\
$start_length
);
}
else
{
$sth
->bind_columns(\
$start_start
, \
$start_end
, \
$start_seq_region_id
,
\
$start_seq_region
, \
$start_length
, \
$ori
,
\
$mid_start
, \
$mid_end
, \
$mid_seq_region_id
,
\
$mid_seq_region
, \
$mid_length
);
}
my
(
$mid_cs_id
,
$start_cs_id
,
$registry
,
$mapper
);
if
( !
defined
$mid_cs
) {
$mid_cs_id
=
$last_cs
->dbID();
$start_cs_id
=
$first_cs
->dbID();
$mapper
=
$combined_mapper
;
}
else
{
$mid_cs_id
=
$mid_cs
->dbID();
$start_cs_id
=
$first_cs
->dbID();
$mapper
=
$start_mid_mapper
;
}
$registry
=
$casm_mapper
->first_registry();
while
(
$sth
->fetch()) {
$mapper
->add_map_coordinates
(
$start_seq_region_id
,
$start_start
,
$start_end
,
$ori
,
$mid_seq_region_id
,
$mid_start
,
$mid_end
);
push
(
@ranges
, [
$start_seq_region_id
,
$start_start
,
$start_end
] );
$registry
->check_and_register(
$start_seq_region_id
, 1,
$start_length
);
if
( !
defined
$mid_cs
) {
$casm_mapper
->last_registry()->check_and_register
(
$mid_seq_region_id
,
$mid_start
,
$mid_end
);
}
my
$arr
= [
$mid_seq_region_id
,
$mid_seq_region
,
$mid_cs_id
,
$mid_length
];
$self
->{
'sr_name_cache'
}->{
"$mid_seq_region:$mid_cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$mid_seq_region_id"
} =
$arr
;
$arr
= [
$start_seq_region_id
,
$start_seq_region
,
$start_cs_id
,
$start_length
];
$self
->{
'sr_name_cache'
}->{
"$start_seq_region:$start_cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$start_seq_region_id"
} =
$arr
;
}
if
( !
defined
$mid_cs
) {
return
;
}
@path
= @{
$csa
->get_mapping_path(
$last_cs
,
$mid_cs
) };
if
(
defined
(
$mid_cs
) ) {
if
( !
defined
(
$path
[1] ) ) {
splice
(
@path
, 1, 1 );
}
}
if
(
@path
!= 2 ) {
my
$path
=
join
(
','
,
map
( {
$_
->name .
' '
.
$_
->version }
@path
) );
my
$len
=
scalar
(
@path
) - 1;
throw(
"Unexpected mapping path between intermediate and last "
.
"coord systems ("
.
$last_cs
->name .
" "
.
$last_cs
->version .
" and "
.
$mid_cs
->name .
" "
.
$mid_cs
->version .
")."
.
"\nExpected path length 1, got $len. "
.
"(path=$path)"
);
}
(
$asm_cs
,
$cmp_cs
) =
@path
;
$sth
->bind_param(1,
$asm_cs
->dbID,SQL_INTEGER);
$sth
->bind_param(2,
$cmp_cs
->dbID,SQL_INTEGER);
$sth
->execute();
my
(
$end_start
,
$end_end
,
$end_seq_region_id
,
$end_seq_region
,
$end_length
);
if
(
$asm_cs
->equals(
$mid_cs
)) {
$sth
->bind_columns(\
$end_start
, \
$end_end
, \
$end_seq_region_id
,
\
$end_seq_region
, \
$end_length
, \
$ori
,
\
$mid_start
, \
$mid_end
, \
$mid_seq_region_id
,
\
$mid_seq_region
, \
$mid_length
);
}
else
{
$sth
->bind_columns(\
$mid_start
, \
$mid_end
, \
$mid_seq_region_id
,
\
$mid_seq_region
, \
$mid_length
, \
$ori
, \
$end_start
,
\
$end_end
, \
$end_seq_region_id
, \
$end_seq_region
,
\
$end_length
);
}
my
$end_cs_id
=
$last_cs
->dbID();
$registry
=
$casm_mapper
->last_registry();
while
(
$sth
->fetch()) {
$end_mid_mapper
->add_map_coordinates
(
$end_seq_region_id
,
$end_start
,
$end_end
,
$ori
,
$mid_seq_region_id
,
$mid_start
,
$mid_end
);
$registry
->check_and_register(
$end_seq_region_id
, 1,
$end_length
);
my
$arr
= [
$end_seq_region_id
,
$end_seq_region
,
$end_cs_id
,
$end_length
];
$self
->{
'sr_name_cache'
}->{
"$end_seq_region:$end_cs_id"
} =
$arr
;
$self
->{
'sr_id_cache'
}->{
"$end_seq_region_id"
} =
$arr
;
}
_build_combined_mapper( \
@ranges
,
$start_mid_mapper
,
$end_mid_mapper
,
$combined_mapper
,
"first"
);
return
;
}
sub
_build_combined_mapper {
my
$ranges
=
shift
;
my
$start_mid_mapper
=
shift
;
my
$end_mid_mapper
=
shift
;
my
$combined_mapper
=
shift
;
my
$start_name
=
shift
;
my
$mid_name
=
"middle"
;
foreach
my
$range
(
@$ranges
) {
my
(
$seq_region_id
,
$start
,
$end
) =
@$range
;
my
$sum
= 0;
my
@initial_coords
=
$start_mid_mapper
->map_coordinates(
$seq_region_id
,
$start
,
$end
,1,
$start_name
);
foreach
my
$icoord
(
@initial_coords
) {
if
(
$icoord
->isa(
'Bio::EnsEMBL::Mapper::Gap'
)) {
$sum
+=
$icoord
->
length
();
next
;
}
my
@final_coords
=
$end_mid_mapper
->map_coordinates(
$icoord
->id,
$icoord
->start,
$icoord
->end,
$icoord
->strand,
$mid_name
);
foreach
my
$fcoord
(
@final_coords
) {
if
(
$fcoord
->isa(
'Bio::EnsEMBL::Mapper::Coordinate'
)) {
my
$total_start
=
$start
+
$sum
;
my
$total_end
=
$total_start
+
$fcoord
->
length
- 1;
my
$ori
=
$fcoord
->strand();
if
(
$start_name
eq
'first'
) {
$combined_mapper
->add_map_coordinates(
$seq_region_id
,
$total_start
,
$total_end
,
$ori
,
$fcoord
->id(),
$fcoord
->start(),
$fcoord
->end());
}
else
{
$combined_mapper
->add_map_coordinates(
$fcoord
->id(),
$fcoord
->start(),
$fcoord
->end(),
$ori
,
$seq_region_id
,
$total_start
,
$total_end
);
}
}
$sum
+=
$fcoord
->
length
();
}
}
}
}
sub
seq_regions_to_ids {
my
$self
=
shift
;
my
$coord_system
=
shift
;
my
$seq_regions
=
shift
;
my
$cs_id
=
$coord_system
->dbID();
my
@out
;
foreach
my
$sr
(
@$seq_regions
) {
my
$arr
=
$self
->{
'sr_name_cache'
}->{
"$sr:$cs_id"
};
if
(
$arr
) {
push
(
@out
,
$arr
->[0] );
}
else
{
push
@out
,
$self
->_seq_region_name_to_id(
$sr
,
$cs_id
);
}
}
return
\
@out
;
}
sub
seq_ids_to_regions {
my
$self
=
shift
;
my
$seq_region_ids
=
shift
;
my
@out
;
foreach
my
$sr
(
@$seq_region_ids
) {
my
$arr
=
$self
->{
'sr_id_cache'
}->{
"$sr"
};
if
(
$arr
) {
push
(
@out
,
$arr
->[1] );
}
else
{
push
@out
,
$self
->_seq_region_id_to_name(
$sr
);
}
}
return
\
@out
;
}
sub
delete_cache{
my
(
$self
) =
@_
;
%{
$self
->{
'sr_name_cache'
}} = ();
%{
$self
->{
'sr_id_cache'
}} = ();
foreach
my
$key
(
keys
%{
$self
->{
'_asm_mapper_cache'
}}){
$self
->{
'_asm_mapper_cache'
}->{
$key
}->flush();
}
%{
$self
->{
'_asm_mapper_cache'
}} = ();
return
;
}
1;