$Bio::EnsEMBL::Utils::PolyA::VERSION
=
'112.0_53'
;
$Bio::EnsEMBL::Utils::PolyA::VERSION
=
'112.053'
;
sub
new{
my
(
$class
) =
@_
;
if
(
ref
(
$class
)){
$class
=
ref
(
$class
);
}
my
$self
= {};
bless
(
$self
,
$class
);
return
$self
;
}
sub
clip{
my
(
$self
,
$bioseq
) =
@_
;
my
$seq
=
$bioseq
->seq;
$self
->_clip(1);
$self
->_mask(0);
$self
->_softmask(0);
my
$new_seq
=
$self
->_find_polyA(
$seq
);
my
$cdna
= Bio::Seq->new();
if
(
length
(
$new_seq
) > 0){
$cdna
->seq(
$new_seq
);
}
else
{
print
"While clipping the the polyA tail, sequence "
.
$bioseq
->display_id.
" totally disappeared.\n"
;
print
"Returning undef\n"
;
return
undef
;
}
$cdna
->display_id(
$bioseq
->display_id );
$cdna
->desc(
$bioseq
->desc );
return
$cdna
;
}
sub
mask{
my
(
$self
,
$bioseq
,
$flag
) =
@_
;
my
$seq
=
$bioseq
->seq;
$self
->_clip(0);
if
(
$flag
){
$self
->_mask(0);
$self
->_softmask(1);
}
else
{
$self
->_mask(1);
$self
->_softmask(0);
}
my
$new_seq
=
$self
->_find_polyA(
$seq
);
my
$cdna
= new Bio::Seq;
$cdna
->seq(
$new_seq
);
$cdna
->display_id(
$bioseq
->display_id );
$cdna
->desc(
$bioseq
->desc );
return
$cdna
;
}
sub
_find_polyA{
my
(
$self
,
$seq
) =
@_
;
my
$new_seq
;
my
$length
=
length
(
$seq
);
my
$check_polyT
=
substr
(
$seq
, 0, 6 );
my
$check_polyA
=
substr
(
$seq
, -6 );
my
$t_count
=
$check_polyT
=~
tr
/Tt//;
my
$a_count
=
$check_polyA
=~
tr
/Aa//;
if
(
$a_count
>= 5 &&
$a_count
>
$t_count
){
my
$length_to_mask
= 0;
my
(
$piece
,
$count
) = (3,0);
my
$n_count
= 0;
while
(
$length_to_mask
<
$length
){
my
$chunk
=
substr
(
$seq
, (
$length
- (
$length_to_mask
+ 3)),
$piece
);
$count
=
$chunk
=~
tr
/Aa//;
$n_count
=
$chunk
=~
tr
/Nn//;
if
( (
$count
+
$n_count
) >= 2*(
$piece
)/3 ){
$length_to_mask
+= 3;
}
else
{
last
;
}
}
if
(
$length_to_mask
> 0 ){
my
$last_base
=
substr
(
$seq
, (
$length
-
$length_to_mask
), 1);
my
$previous_to_last
=
substr
(
$seq
, (
$length
-
$length_to_mask
- 1), 1);
if
( !(
$last_base
eq
'A'
||
$last_base
eq
'a'
) ){
$length_to_mask
--;
}
elsif
(
$previous_to_last
eq
'A'
||
$previous_to_last
eq
'a'
){
$length_to_mask
++;
}
my
$clipped_seq
=
substr
(
$seq
, 0,
$length
-
$length_to_mask
);
my
$mask
;
if
(
$self
->_clip ){
$mask
=
""
;
}
elsif
(
$self
->_mask ){
$mask
=
"N"
x (
$length_to_mask
);
}
elsif
(
$self
->_softmask ){
$mask
=
lc
substr
(
$seq
, (
$length
-
$length_to_mask
) );
}
$new_seq
=
$clipped_seq
.
$mask
;
}
else
{
$new_seq
=
$seq
;
}
}
elsif
(
$t_count
>=5 &&
$t_count
>
$a_count
){
my
$length_to_mask
= -3;
my
(
$piece
,
$count
) = (3,3);
my
$n_count
= 0;
while
(
$length_to_mask
<
$length
){
my
$chunk
=
substr
(
$seq
,
$length_to_mask
+ 3,
$piece
);
$count
=
$chunk
=~
tr
/Tt//;
$n_count
=
$chunk
=~
tr
/Nn//;
if
( (
$count
+
$n_count
) >= 2*(
$piece
)/3 ){
$length_to_mask
+=3;
}
else
{
last
;
}
}
if
(
$length_to_mask
>= 0 ){
my
$last_base
=
substr
(
$seq
, (
$length_to_mask
+ 3 - 1 ), 1 );
my
$previous_to_last
=
substr
(
$seq
, (
$length_to_mask
+ 3 ), 1 );
if
( !(
$last_base
eq
'T'
||
$last_base
eq
't'
) ){
$length_to_mask
--;
}
elsif
(
$previous_to_last
eq
'T'
||
$previous_to_last
eq
't'
){
$length_to_mask
++;
}
my
$clipped_seq
=
substr
(
$seq
,
$length_to_mask
+ 3);
my
$mask
;
if
(
$self
->_clip ){
$mask
=
""
;
}
elsif
(
$self
->_mask ){
$mask
=
"N"
x (
$length_to_mask
+3);
}
elsif
(
$self
->_softmask){
$mask
=
lc
substr
(
$seq
, 0, (
$length_to_mask
+ 3) );
}
$new_seq
=
$mask
.
$clipped_seq
;
}
else
{
$new_seq
=
$seq
;
}
}
else
{
$new_seq
=
$seq
;
}
return
$new_seq
;
}
sub
_mask{
my
(
$self
,
$mask
) =
@_
;
if
(
defined
(
$mask
)){
$self
->{_mask} =
$mask
;
}
return
$self
->{_mask};
}
sub
_clip{
my
(
$self
,
$clip
) =
@_
;
if
(
defined
(
$clip
)){
$self
->{_clip} =
$clip
;
}
return
$self
->{_clip};
}
sub
_softmask{
my
(
$self
,
$softmask
) =
@_
;
if
(
defined
(
$softmask
)){
$self
->{_softmask} =
$softmask
;
}
return
$self
->{_softmask};
}
sub
has_polyA_track{
my
(
$self
,
$seq
) =
@_
;
my
$new_seq
;
my
$length
=
length
(
$seq
);
my
$check_polyT
=
substr
(
$seq
, 0, 10 );
my
$check_polyA
=
substr
(
$seq
, -10 );
print
STDERR
"polyA: $check_polyA\n"
;
my
$t_count
=
$check_polyT
=~
tr
/Tt//;
my
$a_count
=
$check_polyA
=~
tr
/Aa//;
if
(
$a_count
>=7 ||
$t_count
>=7 ){
return
1;
}
else
{
return
0;
}
}
1;