Hide Show 45 lines of Pod
$Bio::EnsEMBL::DBSQL::SequenceAdaptor::VERSION
=
'112.0_54'
;
$Bio::EnsEMBL::DBSQL::SequenceAdaptor::VERSION
=
'112.054'
;
our
@EXPORT
= (@{
$DBI::EXPORT_TAGS
{
'sql_types'
}});
Hide Show 13 lines of Pod
sub
new {
my
(
$caller
,
$db
,
$chunk_power
,
$cache_size
) =
@_
;
my
$class
=
ref
(
$caller
) ||
$caller
;
my
$self
=
$class
->SUPER::new(
$db
);
$self
->_init_seq_instance(
$chunk_power
,
$cache_size
);
$self
->_populate_seq_region_edits();
return
$self
;
}
Hide Show 28 lines of Pod
sub
fetch_by_Slice_start_end_strand {
my
(
$self
,
$slice
,
$start
,
$end
,
$strand
) =
@_
;
if
(!
ref
(
$slice
) || !(
$slice
->isa(
"Bio::EnsEMBL::Slice"
) or
$slice
->isa(
'Bio::EnsEMBL::LRGSlice'
)) ) {
throw(
"Slice argument is required."
);
}
$start
= 1
if
(!
defined
(
$start
));
if
( ( !
defined
(
$end
) ||
$start
>
$end
||
$start
< 0 ||
$end
< 0 ||
$slice
->start>
$slice
->end ) &&
$slice
->is_circular ) {
if
( !
defined
(
$end
) || (
$start
>
$end
) ) {
return
$self
->_fetch_by_Slice_start_end_strand_circular(
$slice
,
$start
,
$end
,
$strand
);
}
if
(
defined
(
$end
) && (
$end
< 0) ) {
$end
+=
$slice
->seq_region_length;
}
if
(
$start
< 0) {
$start
+=
$slice
->seq_region_length;
}
if
(
$slice
->start>
$slice
->end) {
return
$self
->_fetch_by_Slice_start_end_strand_circular(
$slice
,
$slice
->start,
$slice
->end,
$strand
);
}
}
if
( ( !
defined
(
$end
) ) && (not
$slice
->is_circular) ) {
$end
=
$slice
->end() -
$slice
->start() + 1;
}
if
(
$start
>
$end
) {
throw(
"Start must be less than or equal to end."
);
}
$strand
||= 1;
my
$right_expand
=
$end
-
$slice
->
length
();
my
$left_expand
= 1 -
$start
;
if
(
$right_expand
||
$left_expand
) {
$slice
=
$slice
->expand(
$left_expand
,
$right_expand
);
}
my
$slice_adaptor
=
$slice
->adaptor();
my
@symproj
=@{
$slice_adaptor
->fetch_normalized_slice_projection(
$slice
)};
if
(
@symproj
== 0) {
throw(
'Could not retrieve normalized Slices. Database contains '
.
'incorrect assembly_exception information.'
);
}
if
(
@symproj
!= 1 ||
$symproj
[0]->[2] !=
$slice
) {
my
$seq
;
foreach
my
$segment
(
@symproj
) {
my
$symlink_slice
=
$segment
->[2];
$seq
.= ${
$self
->fetch_by_Slice_start_end_strand(
$symlink_slice
,
1,
undef
,1)};
}
if
(
$strand
== -1) {
reverse_comp(\
$seq
);
}
return
\
$seq
;
}
my
$csa
=
$self
->db->get_CoordSystemAdaptor();
my
$seqlevel
=
$csa
->fetch_sequence_level();
my
@projection
=@{
$slice
->project(
$seqlevel
->name(),
$seqlevel
->version())};
my
$seq
=
''
;
my
$total
= 0;
my
$tmp_seq
;
foreach
my
$segment
(
@projection
) {
my
(
$start
,
$end
,
$seq_slice
) =
@$segment
;
my
$gap
=
$start
-
$total
- 1;
if
(
$gap
) {
$seq
.=
'N'
x
$gap
;
}
my
$seq_region_id
=
$slice_adaptor
->get_seq_region_id(
$seq_slice
);
$tmp_seq
= ${
$self
->_fetch_seq(
$seq_region_id
,
$seq_slice
->start,
$seq_slice
->
length
())};
if
(!
defined
$tmp_seq
) {
throw(
'No sequence found for seq_region '
.
$seq_region_id
.
':'
.
$seq_slice
->start);
}
if
(
$seq_slice
->strand == -1) {
reverse_comp(\
$tmp_seq
);
}
$seq
.=
$tmp_seq
;
$total
=
$end
;
}
my
$gap
=
$slice
->
length
-
$total
;
if
(
$gap
) {
$seq
.=
'N'
x
$gap
;
}
if
(
length
(
$seq
) !=
$slice
->
length
()) {
$seq
.=
'N'
x (
$slice
->
length
() -
length
(
$seq
));
}
if
(
defined
(
$self
->{_rna_edits_cache}) and
defined
(
$self
->{_rna_edits_cache}->{
$slice
->get_seq_region_id})){
$self
->_rna_edit(
$slice
,\
$seq
);
}
reverse_comp(\
$seq
)
if
(
$strand
== -1);
return
\
$seq
;
}
Hide Show 6 lines of Pod
sub
can_access_Slice {
return
1;
}
sub
_fetch_by_Slice_start_end_strand_circular {
my
(
$self
,
$slice
,
$start
,
$end
,
$strand
) =
@_
;
assert_ref(
$slice
,
'Bio::EnsEMBL::Slice'
);
$strand
||= 1;
if
( !
defined
(
$start
) ) {
$start
||= 1;
}
if
( !
defined
(
$end
) ) {
$end
=
$slice
->end() -
$slice
->start() + 1;
}
if
(
$start
>
$end
&&
$slice
->is_circular() ) {
my
(
$seq
,
$seq1
,
$seq2
);
my
$midpoint
=
$slice
->seq_region_length -
$slice
->start + 1;
$seq1
= ${
$self
->_fetch_by_Slice_start_end_strand_circular(
$slice
, 1,
$midpoint
, 1 )};
$seq2
= ${
$self
->_fetch_by_Slice_start_end_strand_circular(
$slice
,
$midpoint
+ 1,
$slice
->
length
(), 1 )};
$seq
=
$slice
->strand > 0 ?
"$seq1$seq2"
:
"$seq2$seq1"
;
reverse_comp( \
$seq
)
if
(
$strand
== -1 );
return
\
$seq
;
}
my
$right_expand
=
$end
-
$slice
->
length
();
my
$left_expand
= 1 -
$start
;
if
(
$right_expand
||
$left_expand
) {
$slice
=
$slice
->strand > 0
?
$slice
->expand(
$left_expand
,
$right_expand
)
:
$slice
->expand(
$right_expand
,
$left_expand
);
}
my
$slice_adaptor
=
$slice
->adaptor();
my
@symproj
=
@{
$slice_adaptor
->fetch_normalized_slice_projection(
$slice
) };
if
(
@symproj
== 0 ) {
throw(
'Could not retrieve normalized Slices. Database contains '
.
'incorrect assembly_exception information.'
);
}
if
(
@symproj
!= 1 ||
$symproj
[0]->[2] !=
$slice
) {
my
$seq
;
foreach
my
$segment
(
@symproj
) {
my
$symlink_slice
=
$segment
->[2];
$seq
.= ${
$self
->fetch_by_Slice_start_end_strand(
$symlink_slice
, 1,
undef
, 1 ) };
}
if
(
$strand
== -1 ) {
reverse_comp( \
$seq
);
}
return
\
$seq
;
}
my
$csa
=
$self
->db->get_CoordSystemAdaptor();
my
$seqlevel
=
$csa
->fetch_sequence_level();
my
@projection
=
@{
$slice
->project(
$seqlevel
->name(),
$seqlevel
->version() ) };
my
$seq
=
''
;
my
$total
= 0;
my
$tmp_seq
;
foreach
my
$segment
(
@projection
) {
my
(
$start
,
$end
,
$seq_slice
) = @{
$segment
};
my
$gap
=
$start
-
$total
- 1;
if
(
$gap
) {
$seq
.=
'N'
x
$gap
;
}
my
$seq_region_id
=
$slice_adaptor
->get_seq_region_id(
$seq_slice
);
$tmp_seq
= ${
$self
->_fetch_seq(
$seq_region_id
,
$seq_slice
->start(),
$seq_slice
->
length
() ) };
if
(
$seq_slice
->strand == -1 ) {
reverse_comp( \
$tmp_seq
);
}
$seq
.=
$tmp_seq
;
$total
=
$end
;
}
my
$gap
=
$slice
->
length
() -
$total
;
if
(
$gap
) {
$seq
.=
'N'
x
$gap
;
}
if
(
length
(
$seq
) !=
$slice
->
length
() ) {
$seq
.=
'N'
x (
$slice
->
length
() -
length
(
$seq
) );
}
if
(
defined
(
$self
->{_rna_edits_cache} )
&&
defined
(
$self
->{_rna_edits_cache}->{
$slice
->get_seq_region_id } ) )
{
$self
->_rna_edit(
$slice
, \
$seq
);
}
return
\
$seq
;
}
Hide Show 7 lines of Pod
sub
_rna_edit {
my
$self
=
shift
;
my
$slice
=
shift
;
my
$seq
=
shift
;
my
$s_start
=
$slice
->start;
my
$s_end
=
$s_start
+
length
(
$$seq
) - 1;
foreach
my
$edit
(@{
$self
->{_rna_edits_cache}->{
$slice
->get_seq_region_id}}){
my
(
$start
,
$end
,
$txt
) =
split
(/\s+/,
$edit
);
next
if
(
$end
<
$s_start
);
next
if
(
$s_end
<
$start
);
my
$edit_length
=
length
(
$txt
);
if
(
$start
<
$s_start
||
$end
>
$s_end
) {
my
$edit_offset
;
$edit_offset
= (
$s_start
-
$start
) < 0 ? 0 :
$s_start
-
$start
;
$edit_length
=
length
(
$txt
) -
$edit_offset
- (
$end
-
$s_end
< 0 ? 0 :
$end
-
$s_end
);
$txt
=
substr
(
$txt
,
$edit_offset
,
$edit_length
);
}
substr
(
$$seq
,(
$start
-
$s_start
< 0 ? 0 :
$start
-
$s_start
),
$edit_length
,
$txt
);
}
return
;
}
Hide Show 6 lines of Pod
sub
_fetch_raw_seq {
my
(
$self
,
$id
,
$start
,
$length
) =
@_
;
my
$sql
=
<<'SQL';
SELECT UPPER(SUBSTR(d.sequence, ?, ?))
FROM dna d
WHERE d.seq_region_id =?
SQL
my
$seq
=
$self
->dbc()->sql_helper()->execute_single_result(
-SQL
=>
$sql
,
-PARAMS
=> [[
$start
, SQL_INTEGER], [
$length
, SQL_INTEGER], [
$id
, SQL_INTEGER]],
-NO_ERROR
=> 1
);
return
\
$seq
;
}
Hide Show 17 lines of Pod
sub
store {
my
(
$self
,
$seq_region_id
,
$sequence
) =
@_
;
if
(!
$seq_region_id
) {
throw(
'seq_region_id is required'
);
}
$sequence
=
uc
(
$sequence
);
my
$statement
=
$self
->prepare(
"INSERT INTO dna(seq_region_id, sequence) VALUES(?,?)"
);
$statement
->bind_param(1,
$seq_region_id
,SQL_INTEGER);
$statement
->bind_param(2,
$sequence
,SQL_LONGVARCHAR);
$statement
->execute();
$statement
->finish();
return
;
}
Hide Show 13 lines of Pod
sub
remove {
my
(
$self
,
$seq_region_id
) =
@_
;
if
(!
$seq_region_id
) {
throw(
'seq_region_id is required'
);
}
my
$statement
=
$self
->prepare(
"DELETE FROM dna WHERE seq_region_id = ?"
);
$statement
->bind_param(1,
$seq_region_id
,SQL_INTEGER);
$statement
->execute();
$statement
->finish();
return
;
}
Hide Show 6 lines of Pod
sub
_populate_seq_region_edits {
my
(
$self
) =
@_
;
my
$sql
;
my
@params
= (
'_rna_edit'
);
if
(
$self
->db()->is_multispecies()) {
$sql
=
<<'SQL';
select sra.seq_region_id, sra.value
from seq_region_attrib sra
join attrib_type using (attrib_type_id)
join seq_region s using (seq_region_id)
join coord_system cs using (coord_system_id)
where code =?
and species_id =?
SQL
push
(
@params
,
$self
->db()->species_id());
}
else
{
$sql
=
<<'SQL';
select sra.seq_region_id, sra.value
from seq_region_attrib sra join attrib_type using (attrib_type_id)
where code = ?
SQL
}
my
$mapper
=
sub
{
my
(
$row
,
$array
) =
@_
;
my
(
$seq_region_id
,
$value
) = @{
$row
};
my
(
$start
,
$end
,
$substring
) =
split
(/\s+/,
$value
);
my
$edit_length
= (
$end
-
$start
) + 1;
my
$substring_length
=
length
(
$substring
);
if
(
$edit_length
!=
$substring_length
) {
throw
"seq_region_id $seq_region_id has an attrib of type '_rna_edit' (value '$value'). Edit length ${edit_length} is not the same as the replacement's length ${substring_length}. Please fix. We only support substitutions via this mechanism"
;
}
if
(
defined
$array
) {
push
(@{
$array
},
$value
);
return
;
}
return
[
$value
];
};
my
$edits
=
$self
->dbc()->sql_helper->execute_into_hash(
-SQL
=>
$sql
,
-PARAMS
=> [
'_rna_edit'
],
-CALLBACK
=>
$mapper
);
$self
->{_rna_edits_cache} =
$edits
if
%{
$edits
};
return
;
}
1;