sub
new {
my
(
$class
,
@args
) =
@_
;
my
$self
=
bless
{},
$class
;
$self
->_initialize(
@args
);
return
$self
;
}
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
return
unless
$self
->SUPER::_initialize(
@args
);
}
sub
next
{
my
(
$self
) =
@_
;
local
$/ =
'//'
;
return
unless
my
$entry
=
$self
->_readline;
return
if
$entry
=~ /^\s+$/;
$entry
=~ /\s
*ID
\s+\S+/ ||
$self
->throw(
"We do need an ID!"
);
my
(
$id
,
$offset
,
$alphabet
) =
$entry
=~ /\s
*ID
+([^:]+)..(\d+)[^\)]*.\[?([cg])?/
or
$self
->throw(
"Can't parse ID line"
);
my
$h
=Bio::Variation::SeqDiff->new(
-id
=>
$id
,
-offset
=>
$offset
,
);
if
(
$alphabet
) {
if
(
$alphabet
eq
'g'
) {
$alphabet
=
'dna'
;
}
elsif
(
$alphabet
eq
'c'
) {
$alphabet
=
'rna'
;
}
$h
->alphabet(
$alphabet
);
}
my
@dna
=
split
( / DNA;/,
$entry
);
shift
@dna
;
my
$prevdnaobj
;
foreach
my
$dna
(
@dna
) {
$dna
=~ s/Feature[ \t]+//g;
(
$dna
) =
split
"RNA; "
,
$dna
;
my
(
$mut_number
,
$proof
,
$location
,
$upflank
,
$change
,
$dnflank
) =
$dna
=~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+).+/upflank: ([ \n\w]+).+/change: ([^ /]+).+/dnflank: ([ \n\w]+)|s;
$change
=~ s/[ \n]//g;
my
(
$ori
,
$mut
) =
split
/[>\|]/,
$change
;
my
(
$variation_number
,
$change_number
) =
split
/\./,
$mut_number
;
my
$dnamut
;
if
(
$change_number
and
$change_number
> 1 ) {
my
$a3
= Bio::Variation::Allele->new;
$a3
->seq(
$mut
)
if
$mut
;
$prevdnaobj
->add_Allele(
$a3
);
}
else
{
$upflank
=~ s/[ \n]//g;
$dnflank
=~ s/[ \n]//g;
my
(
$region
,
$junk
,
$region_value
,
$junk2
,
$region_dist
) =
$dna
=~ m|.+/region: ([\w\']+)(; )?(\w+)?( ?\(\+?)?(-?\d+)?|s;
my
(
$start
,
$sep
,
$end
) =
$location
=~ /(-?\d+)(.)?\D?(-?\d+)?/;
$end
=
$start
if
not
defined
$end
;
my
(
$len
) =
$end
-
$start
+1;
$len
= 0,
$start
=
$end
if
defined
$sep
and
$sep
eq
'^'
;
my
$ismut
= 0;
$ismut
= 1
if
$change
=~ m/>/;
$dnamut
= Bio::Variation::DNAMutation->new
(
'-start'
=>
$start
,
'-end'
=>
$end
,
'-length'
=>
$len
,
'-upStreamSeq'
=>
$upflank
,
'-dnStreamSeq'
=>
$dnflank
,
'-proof'
=>
$proof
,
'-mut_number'
=>
$mut_number
);
$prevdnaobj
=
$dnamut
;
my
$a1
= Bio::Variation::Allele->new;
$a1
->seq(
$ori
)
if
$ori
;
$dnamut
->allele_ori(
$a1
);
my
$a2
= Bio::Variation::Allele->new;
$a2
->seq(
$mut
)
if
$mut
;
$dnamut
->add_Allele(
$a2
);
if
(
$ismut
) {
$dnamut
->isMutation(1);
$dnamut
->allele_mut(
$a2
);
}
$dnamut
->region(
$region
)
if
defined
$region
;
$dnamut
->region_value(
$region_value
)
if
defined
$region_value
;
$dnamut
->region_dist(
$region_dist
)
if
defined
$region_dist
;
$h
->add_Variant(
$dnamut
);
$dnamut
->SeqDiff(
$h
);
}
}
my
@rna
=
split
( / RNA;/,
$entry
);
shift
@rna
;
my
$prevrnaobj
;
foreach
my
$rna
(
@rna
) {
$rna
=
substr
(
$rna
, 0,
index
(
$rna
,
'Feature AA'
));
$rna
=~ s/Feature[ \t]+//g;
(
$rna
) =
split
"DNA; "
,
$rna
;
my
(
$mut_number
,
$proof
,
$location
,
$upflank
,
$change
,
$dnflank
) =
$rna
=~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+).+/upflank: (\w+).+/change: ([^/]+).+/dnflank: (\w+)|s ;
my
(
$region
,
$junk
,
$region_value
,
$junk2
,
$region_dist
) =
$rna
=~ m|.+/region: ([\w\']+)(; )?(\w+)?( ?\(\+?)?(-?\d+)?|s;
$change
=~ s/[ \n]//g;
my
(
$ori
,
$mut
) =
split
/[>\|]/,
$change
;
my
$rnamut
;
my
(
$variation_number
,
$change_number
) =
split
/\./,
$mut_number
;
if
(
$change_number
and
$change_number
> 1 ) {
my
$a3
= Bio::Variation::Allele->new;
$a3
->seq(
$mut
)
if
$mut
;
$prevrnaobj
->add_Allele(
$a3
);
}
else
{
my
(
$start
,
$sep
,
$end
) =
$location
=~ /(-?\d+)(.)?\D?(-?\d+)?/;
$end
=
$start
if
not
defined
$end
;
my
(
$len
) =
$end
-
$start
+ 1;
$len
= 0,
$start
=
$end
if
defined
$sep
and
$sep
eq
'^'
;
my
$ismut
;
$ismut
= 1
if
$change
=~ m/>/;
my
(
$codon_table
) =
$rna
=~ m|.+/codon_table: (\d+)|s;
my
(
$codon_pos
) =
$rna
=~ m|.+/codon:[^;]+; ([123])|s;
$rnamut
= Bio::Variation::RNAChange->new
(
'-start'
=>
$start
,
'-end'
=>
$end
,
'-length'
=>
$len
,
'-upStreamSeq'
=>
$upflank
,
'-dnStreamSeq'
=>
$dnflank
,
'-proof'
=>
$proof
,
'-mut_number'
=>
$mut_number
);
$prevrnaobj
=
$rnamut
;
my
$a1
= Bio::Variation::Allele->new;
$a1
->seq(
$ori
)
if
$ori
;
$rnamut
->allele_ori(
$a1
);
my
$a2
= Bio::Variation::Allele->new;
$a2
->seq(
$mut
)
if
$mut
;
$rnamut
->add_Allele(
$a2
);
if
(
$ismut
) {
$rnamut
->isMutation(1);
$rnamut
->allele_mut(
$a2
);
}
$rnamut
->region(
$region
)
if
defined
$region
;
$rnamut
->region_value(
$region_value
)
if
defined
$region_value
;
$rnamut
->region_dist(
$region_dist
)
if
defined
$region_dist
;
$rnamut
->codon_table(
$codon_table
)
if
$codon_table
;
$rnamut
->codon_pos(
$codon_pos
)
if
$codon_pos
;
$h
->add_Variant(
$rnamut
);
foreach
my
$mut
(
$h
->each_Variant) {
if
(
$mut
->isa(
'Bio::Variation::DNAMutation'
) ) {
if
(
$mut
->mut_number ==
$rnamut
->mut_number) {
$rnamut
->DNAMutation(
$mut
);
$mut
->RNAChange(
$rnamut
);
}
}
}
}
}
my
@aa
=
split
( / AA;/,
$entry
);
shift
@aa
;
my
$prevaaobj
;
foreach
my
$aa
(
@aa
) {
$aa
=
substr
(
$aa
, 0,
index
(
$aa
,
'Feature AA'
));
$aa
=~ s/Feature[ \t]+//g;
(
$aa
) =
split
"DNA; "
,
$aa
;
my
(
$mut_number
,
$proof
,
$location
,
$change
) =
$aa
=~ m|\W+([\d\.]+).+/proof: (\w+).+/location: ([^ \n]+)./change: ([^/;]+)|s;
$change
=~ s/[ \n]//g;
$change
=~ s/[ \n]//g;
$change
=~ s/DNA$//;
my
(
$ori
,
$mut
) =
split
/[>\|]/,
$change
;
my
(
$variation_number
,
$change_number
) =
split
/\./,
$mut_number
;
my
$aamut
;
if
(
$change_number
and
$change_number
> 1 ) {
my
$a3
= Bio::Variation::Allele->new;
$a3
->seq(
$mut
)
if
$mut
;
$prevaaobj
->add_Allele(
$a3
);
}
else
{
my
(
$start
,
$sep
,
$end
) =
$location
=~ /(-?\d+)(.)?\D?(-?\d+)?/;
$end
=
$start
if
not
defined
$end
;
my
(
$len
) =
$end
-
$start
+ 1;
$len
= 0,
$start
=
$end
if
defined
$sep
and
$sep
eq
'^'
;
my
$ismut
;
$ismut
= 1
if
$change
=~ m/>/;
my
(
$region
) =
$aa
=~ m|.+/region: (\w+)|s ;
$aamut
= Bio::Variation::AAChange->new
(
'-start'
=>
$start
,
'-end'
=>
$end
,
'-length'
=>
$len
,
'-proof'
=>
$proof
,
'-mut_number'
=>
$mut_number
);
$prevaaobj
=
$aamut
;
my
$a1
= Bio::Variation::Allele->new;
$a1
->seq(
$ori
)
if
$ori
;
$aamut
->allele_ori(
$a1
);
my
$a2
= Bio::Variation::Allele->new;
$a2
->seq(
$mut
)
if
$mut
;
$aamut
->add_Allele(
$a2
);
if
(
$ismut
) {
$aamut
->isMutation(1);
$aamut
->allele_mut(
$a2
);
}
$region
&&
$aamut
->region(
$region
);
$h
->add_Variant(
$aamut
);
foreach
my
$mut
(
$h
->each_Variant) {
if
(
$mut
->isa(
'Bio::Variation::RNAChange'
) ) {
if
(
$mut
->mut_number ==
$aamut
->mut_number) {
$aamut
->RNAChange(
$mut
);
$mut
->AAChange(
$aamut
);
}
}
}
}
}
return
$h
;
}
sub
write
{
my
(
$self
,
@h
) =
@_
;
my
%tag
=
(
'ID'
=>
'ID '
,
'Description'
=>
'Description '
,
'FeatureKey'
=>
'Feature '
,
'FeatureQual'
=>
"Feature "
,
'FeatureWrap'
=>
"Feature "
,
'ErrorComment'
=>
'Comment '
);
if
( !
defined
$h
[0] ) {
$self
->throw(
"Attempting to write with no information!"
);
}
foreach
my
$h
(
@h
) {
my
@entry
=();
my
(
$text
,
$tmp
,
$tmp2
,
$sep
);
my
(
$count
) = 0;
$text
=
$tag
{ID};
$text
.=
$h
->id;
$text
.=
":("
.
$h
->offset;
$text
.=
"+1"
if
$h
->sysname =~ /-/;
$text
.=
")"
.
$h
->sysname;
$text
.=
"; "
.
$h
->trivname
if
$h
->trivname;
push
(
@entry
,
$text
);
my
@allvariants
=
$h
->each_Variant;
my
%variants
= ();
foreach
my
$mut
(
$h
->each_Variant) {
push
@{
$variants
{
$mut
->mut_number} },
$mut
;
}
foreach
my
$var
(
sort
keys
%variants
) {
foreach
my
$mut
(@{
$variants
{
$var
}}) {
if
(
$mut
->isa(
'Bio::Variation::DNAMutation'
) ) {
$self
->throw(
"allele_ori needs to be defined in [$mut]"
)
if
not
$mut
->allele_ori;
if
(
$mut
->isMutation) {
$sep
=
'>'
;
}
else
{
$sep
=
'|'
;
}
my
@alleles
=
$mut
->each_Allele;
my
$count
= 0;
foreach
my
$allele
(
@alleles
) {
$count
++;
my
(
$variation_number
,
$change_number
) =
split
/\./,
$mut
->mut_number;
if
(
$change_number
and
$change_number
!=
$count
){
$mut
->mut_number(
"$change_number.$count"
);
}
$mut
->allele_mut(
$allele
);
push
(
@entry
,
$tag
{FeatureKey}.
'DNA'
.
"; "
.
$mut
->mut_number
);
$text
=
$tag
{FeatureQual}.
'/label: '
.
$mut
->label;
push
(
@entry
,
$text
);
if
(
$mut
->proof) {
$text
=
$tag
{FeatureQual}.
'/proof: '
.
$mut
->proof;
push
(
@entry
,
$text
) ;
}
$text
=
$tag
{FeatureQual}.
'/location: '
;
if
(
$mut
->
length
> 1 ) {
my
$l
=
$mut
->start +
$mut
->
length
-1;
$text
.=
$mut
->start.
'..'
.
$l
;
}
elsif
(
$mut
->
length
== 0) {
my
$tmp_start
=
$mut
->start - 1;
$tmp_start
--
if
$tmp_start
== 0;
$text
.=
$tmp_start
.
'^'
.
$mut
->end;
}
else
{
$text
.=
$mut
->start;
}
if
(
$h
->alphabet &&
$h
->alphabet eq
'dna'
) {
$tmp
=
$mut
->start +
$h
->offset;
$tmp
--
if
$tmp
<= 0;
$mut
->start < 1 &&
$tmp
++;
$tmp2
=
$mut
->end +
$h
->offset;
if
(
$mut
->
length
> 1 ) {
$mut
->end < 1 &&
$tmp2
++;
$text
.=
' ('
.
$h
->id.
'::'
.
$tmp
.
".."
.
$tmp2
;
}
elsif
(
$mut
->
length
== 0) {
$tmp
--;
$tmp
--
if
$tmp
== 0;
$text
.=
' ('
.
$h
->id.
'::'
.
$tmp
.
'^'
.
$tmp2
;
}
else
{
$text
.=
' ('
.
$h
->id.
'::'
.
$tmp
;
}
$text
.=
')'
;
}
push
(
@entry
,
$text
);
push
(
@entry
,
$tag
{FeatureQual}.
'/upflank: '
.
$mut
->upStreamSeq
);
$text
=
''
;
$text
=
$mut
->allele_ori->seq
if
$mut
->allele_ori->seq;
$text
.=
$sep
;
$text
.=
$mut
->allele_mut->seq
if
$mut
->allele_mut->seq;
push
(
@entry
,
wrap(
$tag
{FeatureQual}.
'/change: '
,
$tag
{FeatureWrap},
$text
)
);
push
(
@entry
,
$tag
{FeatureQual}.
'/dnflank: '
.
$mut
->dnStreamSeq
);
if
(
$mut
->restriction_changes ne
''
) {
$text
=
$mut
->restriction_changes;
$text
= wrap(
$tag
{FeatureQual}.
'/re_site: '
,
$tag
{FeatureWrap},
$text
);
push
(
@entry
,
$text
);
}
if
(
$mut
->region ) {
$text
=
$tag
{FeatureQual}.
'/region: '
.
$mut
->region;
$text
.=
';'
if
$mut
->region_value or
$mut
->region_dist;
$text
.=
' '
.
$mut
->region_value
if
$mut
->region_value;
if
(
$mut
->region_dist ) {
$tmp
=
''
;
$tmp
=
'+'
if
$mut
->region_dist > 1;
$text
.=
" ("
.
$tmp
.
$mut
->region_dist.
')'
;
}
push
(
@entry
,
$text
);
}
if
(
$mut
->CpG) {
push
(
@entry
,
$tag
{FeatureQual}.
"/CpG"
);
}
}
}
elsif
(
$mut
->isa(
'Bio::Variation::RNAChange'
) ) {
$self
->throw(
"allele_ori needs to be defined in [$mut]"
)
if
not
$mut
->allele_ori;
my
@alleles
=
$mut
->each_Allele;
if
(
$mut
->isMutation) {
$sep
=
'>'
;
}
else
{
$sep
=
'|'
;
}
my
$count
= 0;
foreach
my
$allele
(
@alleles
) {
$count
++;
my
(
$variation_number
,
$change_number
) =
split
/\./,
$mut
->mut_number;
if
(
$change_number
and
$change_number
!=
$count
){
$mut
->mut_number(
"$change_number.$count"
);
}
$mut
->allele_mut(
$allele
);
push
(
@entry
,
$tag
{FeatureKey}.
'RNA'
.
"; "
.
$mut
->mut_number
);
$text
=
$tag
{FeatureQual}.
'/label: '
.
$mut
->label;
push
(
@entry
,
$text
);
if
(
$mut
->proof) {
$text
=
$tag
{FeatureQual}.
'/proof: '
.
$mut
->proof;
push
(
@entry
,
$text
) ;
}
$text
=
$tag
{FeatureQual}.
'/location: '
;
if
(
$mut
->
length
> 1 ) {
$text
.=
$mut
->start.
'..'
.
$mut
->end;
$tmp2
=
$mut
->end +
$h
->offset;
}
elsif
(
$mut
->
length
== 0) {
my
$tmp_start
=
$mut
->start;
$tmp_start
--;
$tmp_start
--
if
$tmp_start
== 0;
$text
.=
$tmp_start
.
'^'
.
$mut
->end;
}
else
{
$text
.=
$mut
->start;
}
if
(
$h
->alphabet &&
$h
->alphabet eq
'rna'
) {
$tmp
=
$mut
->start +
$h
->offset;
$tmp
--
if
$tmp
<= 0;
$tmp2
=
$mut
->end +
$h
->offset;
if
(
$mut
->
length
> 1 ) {
$text
.=
' ('
.
$h
->id.
'::'
.
$tmp
.
".."
.
$tmp2
;
}
elsif
(
$mut
->
length
== 0) {
$tmp
--;
$text
.=
' ('
.
$h
->id.
'::'
.
$tmp
.
'^'
.
$tmp2
;
}
else
{
$text
.=
' ('
.
$h
->id.
'::'
.
$tmp
;
}
$text
.=
')'
;
}
push
(
@entry
,
$text
);
push
(
@entry
,
$tag
{FeatureQual}.
'/upflank: '
.
$mut
->upStreamSeq
);
$text
=
''
;
$text
=
$mut
->allele_ori->seq
if
$mut
->allele_ori->seq;
$text
.=
$sep
;
$text
.=
$mut
->allele_mut->seq
if
$mut
->allele_mut->seq;
push
(
@entry
,
wrap(
$tag
{FeatureQual}.
'/change: '
,
$tag
{FeatureWrap},
$text
)
);
push
(
@entry
,
$tag
{FeatureQual}.
'/dnflank: '
.
$mut
->dnStreamSeq
);
if
(
$mut
->restriction_changes ne
''
) {
$text
=
$mut
->restriction_changes;
$text
= wrap(
$tag
{FeatureQual}.
'/re_site: '
,
$tag
{FeatureWrap},
$text
);
push
(
@entry
,
$text
);
}
if
(
$mut
->region eq
'coding'
) {
$text
=
$tag
{FeatureQual}.
'/codon_table: '
;
$text
.=
$mut
->codon_table;
push
(
@entry
,
$text
);
$text
=
$tag
{FeatureQual}.
'/codon: '
.
$mut
->codon_ori.
$sep
;
if
(
$mut
->DNAMutation->label =~ /.
*point
/) {
$text
.=
$mut
->codon_mut;
}
else
{
$text
.=
'-'
;
}
$text
.=
"; "
.
$mut
->codon_pos;
push
(
@entry
,
$text
);
}
if
(
$mut
->region ) {
$text
=
$tag
{FeatureQual}.
'/region: '
.
$mut
->region;
$text
.=
';'
if
$mut
->region_value or
$mut
->region_dist;
$text
.=
' '
.
$mut
->region_value
if
$mut
->region_value;
if
(
$mut
->region_dist ) {
$tmp
=
''
;
$tmp
=
'+'
if
$mut
->region_dist > 1;
$text
.=
" ("
.
$tmp
.
$mut
->region_dist.
')'
;
}
push
(
@entry
,
$text
);
}
}
}
elsif
(
$mut
->isa(
'Bio::Variation::AAChange'
)) {
$self
->throw(
"allele_ori needs to be defined in [$mut]"
)
if
not
$mut
->allele_ori;
if
(
$mut
->isMutation) {
$sep
=
'>'
;
}
else
{
$sep
=
'|'
;
}
my
@alleles
=
$mut
->each_Allele;
my
$count
= 0;
foreach
my
$allele
(
@alleles
) {
$count
++;
my
(
$variation_number
,
$change_number
) =
split
/\./,
$mut
->mut_number;
if
(
$change_number
and
$change_number
!=
$count
){
$mut
->mut_number(
"$change_number.$count"
);
}
$mut
->allele_mut(
$allele
);
push
(
@entry
,
$tag
{FeatureKey}.
'AA'
.
"; "
.
$mut
->mut_number
);
$text
=
$tag
{FeatureQual}.
'/label: '
.
$mut
->label;
push
(
@entry
,
$text
) ;
if
(
$mut
->proof) {
$text
=
$tag
{FeatureQual}.
'/proof: '
.
$mut
->proof;
push
(
@entry
,
$text
) ;
}
$text
=
$tag
{FeatureQual}.
'/location: '
.
$mut
->start;
if
(
$mut
->
length
> 1 ) {
$tmp
=
$mut
->start +
$mut
->
length
-1;
$text
.=
'..'
.
$tmp
;
}
push
(
@entry
,
$text
);
$text
=
''
;
$text
=
$mut
->allele_ori->seq
if
$mut
->allele_ori->seq;
$text
.=
$sep
;
$text
.=
$mut
->allele_mut->seq
if
$mut
->allele_mut->seq;
push
(
@entry
,
wrap(
$tag
{FeatureQual}.
'/change: '
,
$tag
{FeatureWrap},
$text
)
);
if
(
$mut
->region ) {
$text
=
$tag
{FeatureQual}.
'/region: '
.
$mut
->region;
$text
.=
';'
if
$mut
->region_value or
$mut
->region_dist;
$text
.=
' '
.
$mut
->region_value
if
$mut
->region_value;
if
(
$mut
->region_dist ) {
$tmp
=
''
;
$tmp
=
'+'
if
$mut
->region_dist > 1;
$text
.=
" ("
.
$tmp
.
$mut
->region_dist.
')'
;
}
push
(
@entry
,
$text
);
}
}
}
}
}
push
(
@entry
,
"//"
);
my
$str
=
join
(
"\n"
,
@entry
).
"\n"
;
$str
=~ s/\t/ /g;
$self
->_print(
$str
);
}
return
1;
}
1;