our
$TM
=
'Bio::SeqFeature::Tools::TypeMapper'
;
our
$FNAMER
=
'Bio::SeqFeature::Tools::FeatureNamer'
;
our
$IDH
=
'Bio::SeqFeature::Tools::IDHandler'
;
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
$self
->SUPER::_initialize(
@args
);
if
( !
defined
$self
->sequence_factory ) {
$self
->sequence_factory(new Bio::Seq::SeqFactory
(
-verbose
=>
$self
->verbose(),
-type
=>
'Bio::Seq::RichSeq'
));
}
my
$wclass
=
$self
->default_handler_class;
$self
->handler(
$wclass
);
if
(
$self
->_fh) {
$self
->handler->fh(
$self
->_fh);
}
$self
->{_end_of_data} = 0;
$self
->_type_by_id_h({});
my
$t
=
time
;
my
$ppt
=
localtime
$t
;
$self
->handler->S(
"chaos"
);
$self
->handler->ev(
chaos_metadata
=>[
[
chaos_version
=>1],
[
chaos_flavour
=>
'bioperl'
],
[
feature_unique_key
=>
'feature_id'
],
[
equiv_chado_release
=>
'chado_1_01'
],
[
export_unixtime
=>
$t
],
[
export_localtime
=>
$ppt
],
[
export_host
=>
$ENV
{HOST}],
[
export_user
=>
$ENV
{USER}],
[
export_perl5lib
=>
$ENV
{PERL5LIB}],
[
export_program
=>$0],
[
export_module
=>
'Bio::SeqIO::chaos'
],
[
export_module_cvs_id
=>
'$Id: chaos.pm,v 1.10.4.1 2006/10/02 23:10:28 sendu Exp $'
],
]);
return
;
}
sub
DESTROY {
my
$self
=
shift
;
$self
->end_of_data();
$self
->SUPER::DESTROY();
}
sub
end_of_data {
my
$self
=
shift
;
return
if
$self
->{_end_of_data};
$self
->{_end_of_data} = 1;
$self
->handler->E(
"chaos"
);
}
sub
default_handler_class {
return
Data::Stag->makehandler;
}
sub
context_namespace{
my
$self
=
shift
;
return
$self
->{
'context_namespace'
} =
shift
if
@_
;
return
$self
->{
'context_namespace'
};
}
sub
next_seq {
my
(
$self
,
@args
) =
@_
;
my
$seq
=
$self
->sequence_factory->create
(
);
return
$seq
;
}
sub
handler {
my
$self
=
shift
;
$self
->{_handler} =
shift
if
@_
;
return
$self
->{_handler};
}
sub
write_seq {
my
(
$self
,
$seq
) =
@_
;
if
( !
defined
$seq
) {
$self
->throw(
"Attempting to write with no seq!"
);
}
if
( !
ref
$seq
|| !
$seq
->isa(
'Bio::SeqI'
) ) {
$self
->
warn
(
" $seq is not a SeqI compliant module. Attempting to dump, but may fail!"
);
}
my
$w
=
$self
->handler;
my
$seq_chaos_feature_id
;
my
$version
=
$seq
->can(
'seq_version'
) ?
$seq
->seq_version :
$seq
->version;
my
$accversion
=
$seq
->accession_number;
if
(
$version
) {
$accversion
.=
".$version"
;
}
if
(
$accversion
) {
$seq_chaos_feature_id
=
$accversion
;
}
else
{
$seq_chaos_feature_id
=
$self
->get_chaos_feature_id(
$seq
);
$accversion
=
$seq_chaos_feature_id
;
}
if
(
$seq_chaos_feature_id
!~ /:/) {
$seq_chaos_feature_id
=
"GenericSeqDB:$seq_chaos_feature_id"
;
}
my
$haplotype
;
if
(
$seq
->desc =~ /haplotype(.*)/i) {
$haplotype
= $1;
$haplotype
=~ s/\s+/_/g;
$haplotype
=~ s/\W+//g;
}
my
$OS
;
if
(
my
$spec
=
$seq
->species) {
my
(
$species
,
$genus
,
@class
) =
$spec
->classification();
$OS
=
"$genus $species"
;
if
(
my
$ssp
=
$spec
->sub_species) {
$OS
.=
" $ssp"
;
}
$self
->genus_species(
$OS
);
if
(
$spec
->common_name ) {
my
$common
=
$spec
->common_name;
if
(
$common
=~ /\((.*)\)/) {
$common
= $1;
}
$OS
.=
" ("
.
$common
.
")"
;
}
}
if
(
$OS
) {
$self
->organismstr(
$OS
);
}
if
(
$haplotype
) {
$self
->genus_species(
$self
->genus_species .=
" $haplotype"
);
}
my
$uname
=
$self
->make_uniquename(
$self
->genus_species,
$accversion
);
my
$seqnode
=
Data::Stag->new(
feature
=>[
[
feature_id
=>
$seq_chaos_feature_id
],
[
dbxrefstr
=>
'SEQDB:'
.
$accversion
],
[
name
=>
$seq
->display_name],
[
uniquename
=>
$uname
],
[
residues
=>
$seq
->seq],
]);
my
%prop
= ();
$seqnode
->set_type(
'databank_entry'
);
map
{
$prop
{
$_
} =
$seq
->
$_
()
if
$seq
->can(
$_
);
}
qw(desc keywords division molecule is_circular)
;
$prop
{dates} =
join
(
"; "
,
$seq
->get_dates)
if
$seq
->can(
"get_dates"
);
local
($^W) = 0;
my
$count
= 1;
foreach
my
$ref
(
$seq
->annotation->get_Annotations(
'reference'
) ) {
}
$seqnode
->add_featureprop([[
type
=>
'haplotype'
],[
value
=>
$haplotype
]])
if
$haplotype
;
foreach
my
$comment
(
$seq
->annotation->get_Annotations(
'comment'
) ) {
$seqnode
->add_featureprop([[
type
=>
'comment'
],[
value
=>
$comment
->text]]);
}
if
(
$OS
) {
$seqnode
->set_organismstr(
$OS
);
}
my
@sfs
=
$seq
->get_SeqFeatures;
my
@sources
=
grep
{
$_
->primary_tag eq
'source'
}
@sfs
;
@sfs
=
grep
{
$_
->primary_tag ne
'source'
}
@sfs
;
$self
->throw(
">1 source types"
)
if
@sources
> 1;
my
$source
=
shift
@sources
;
if
(
$source
) {
my
$tempw
= Data::Stag->makehandler;
$self
->write_sf(
$source
,
$seq_chaos_feature_id
,
$tempw
);
my
$snode
=
$tempw
->stag;
$seqnode
->add(
$_
->name,
$_
->data)
foreach
(
$snode
->get_featureprop,
$snode
->get_feature_dbxref);
}
$w
->ev(
@$seqnode
);
$seqnode
=
undef
;
foreach
my
$sf
(
@sfs
) {
$FNAMER
->name_feature(
$sf
);
$FNAMER
->name_contained_features(
$sf
);
$self
->write_sf(
$sf
,
$seq_chaos_feature_id
);
}
return
1;
}
sub
organismstr{
my
$self
=
shift
;
return
$self
->{
'organismstr'
} =
shift
if
@_
;
return
$self
->{
'organismstr'
};
}
sub
genus_species{
my
$self
=
shift
;
return
$self
->{
'genus_species'
} =
shift
if
@_
;
return
$self
->{
'genus_species'
};
}
sub
_type_by_id_h {
my
$self
=
shift
;
$self
->{_type_by_id_h} =
shift
if
@_
;
return
$self
->{_type_by_id_h};
}
sub
write_sf {
my
$self
=
shift
;
my
$sf
=
shift
;
my
$seq_chaos_feature_id
=
shift
;
my
$w
=
shift
||
$self
->handler;
my
%props
=
map
{
lc
(
$_
)=>[
$sf
->each_tag_value(
$_
)]
}
$sf
->all_tags;
my
$loc
=
$sf
->location;
my
$name
=
$FNAMER
->generate_feature_name(
$sf
);
my
$type
=
$sf
->primary_tag;
$type
=~ s/CDS/polypeptide/;
my
@subsfs
=
$sf
->sub_SeqFeature;
my
@locnodes
= ();
my
$sid
=
$loc
->is_remote ?
$loc
->seq_id :
$seq_chaos_feature_id
;
my
$CREATE_SPLIT_SFS
= 0;
if
(
$CREATE_SPLIT_SFS
&&
$loc
->isa(
"Bio::Location::SplitLocationI"
) ) {
my
$n
= 1;
push
(
@subsfs
,
map
{
my
$ssf
=
Bio::SeqFeature::Generic->new(
-start
=>
$_
->start,
-end
=>
$_
->end,
-strand
=>
$_
->strand,
-primary
=>
$self
->subpartof(
$type
),
);
if
(
$_
->is_remote) {
$ssf
->location->is_remote(1);
$ssf
->location->seq_id(
$_
->seq_id);
}
$ssf
;
}
$loc
->each_Location);
}
elsif
(
$loc
->isa(
"Bio::Location::RemoteLocationI"
) ) {
my
$n
= 1;
push
(
@subsfs
,
map
{
Bio::SeqFeature::Generic->new(
-start
=>
$_
->start,
-end
=>
$_
->end,
-strand
=>
$_
->strand,
-primary
=>
$self
->subpartof(
$type
),
)
}
$loc
->each_Location);
}
else
{
my
(
$beg
,
$end
,
$strand
) =
$self
->bp2ib(
$loc
);
if
(!
$strand
) {
print
Dumper
$sf
,
$loc
;
$self
->throw(
"($beg, $end, $strand) - no strand\n"
);
}
@locnodes
= (
[
featureloc
=>[
[
nbeg
=>
$beg
],
[
nend
=>
$end
],
[
strand
=>
$strand
],
[
srcfeature_id
=>
$sid
],
[
locgroup
=>0],
[
rank
=>0],
]
]
);
}
my
$feature_id
=
$self
->get_chaos_feature_id(
$sf
);
delete
$props
{id}
if
$props
{id};
my
$pid
=
$props
{
'protein_id'
};
my
$tn
=
$props
{
'translation'
};
my
@xrefs
= @{
$props
{
'db_xref'
} || []};
if
(
$pid
) {
push
(
@xrefs
,
"protein:$pid->[0]"
);
}
my
$org
=
$props
{organism} ?
$props
{organism}->[0] :
undef
;
if
(!
$org
&&
$self
->organismstr) {
$org
=
$self
->organismstr;
}
my
$uname
=
$name
?
$name
.
'/'
.
$feature_id
:
$feature_id
;
if
(
$self
->genus_species &&
$name
) {
$uname
=
$self
->make_uniquename(
$self
->genus_species,
$name
);
}
if
(!
$uname
) {
$self
->throw(
"cannot make uniquename for $feature_id $name"
);
}
$self
->_type_by_id_h->{
$feature_id
} =
$type
;
my
$fnode
=
[
feature
=>[
[
feature_id
=>
$feature_id
],
$name
? ([
name
=>
$name
]) : (),
[
uniquename
=>
$uname
],
[
type
=>
$type
],
$tn
? ([
residues
=>
$tn
->[0]],
[
seqlen
=>
length
(
$tn
->[0])],
) :(),
$org
? ([
organismstr
=>
$org
]) : (),
@locnodes
,
(
map
{
[
feature_dbxref
=>[
[
dbxrefstr
=>
$_
]
]
]
}
@xrefs
),
(
map
{
my
$k
=
$_
;
my
$rank
=0;
map
{ [
featureprop
=>[[
type
=>
$k
],[
value
=>
$_
],[
rank
=>
$rank
++]]] } @{
$props
{
$k
}}
}
keys
%props
),
]];
$w
->ev(
@$fnode
);
my
$rank
= 0;
if
(
@subsfs
) {
my
$strand
=
$subsfs
[0];
my
@sfs_on_main_strand
=
grep
{
$_
->strand ==
$strand
}
@subsfs
;
my
@sfs_on_other_strand
=
grep
{
$_
->strand !=
$strand
}
@subsfs
;
sort_by_strand(
$strand
, \
@sfs_on_main_strand
);
sort_by_strand(0-
$strand
, \
@sfs_on_other_strand
);
@subsfs
= (
@sfs_on_main_strand
,
@sfs_on_other_strand
);
foreach
my
$ssf
(
@subsfs
) {
my
$ssfid
=
$self
->write_sf(
$ssf
,
$sid
);
my
$rtype
=
$TM
->get_relationship_type_by_parent_child(
$sf
,
$ssf
);
if
(
$ssf
->primary_tag eq
'CDS'
) {
$rtype
=
'derives_from'
;
}
$w
->ev(
feature_relationship
=>[
[
subject_id
=>
$ssfid
],
[
object_id
=>
$feature_id
],
[
type
=>
$rtype
],
[
rank
=>
$rank
++],
]
);
}
}
else
{
my
@parent_ids
= @{
$props
{parent} || []};
foreach
my
$parent_id
(
@parent_ids
) {
my
$ptype
=
$self
->_type_by_id_h->{
$parent_id
} ||
'unknown'
;
my
$rtype
=
$TM
->get_relationship_type_by_parent_child(
$ptype
,
$type
);
$w
->ev(
feature_relationship
=>[
[
subject_id
=>
$feature_id
],
[
object_id
=>
$parent_id
],
[
type
=>
$rtype
],
[
rank
=>
$rank
++],
]
);
}
}
return
$feature_id
;
}
sub
sort_by_strand {
my
$strand
=
shift
|| 1;
my
$sfs
=
shift
;
@$sfs
=
sort
{ (
$a
->start <=>
$b
->start) *
$strand
}
@$sfs
;
return
;
}
sub
make_uniquename {
my
$self
=
shift
;
my
$org
=
shift
;
my
$name
=
shift
;
my
$os
=
$org
;
$os
=~ s/\s+/_/g;
$os
=~ s/\(/_/g;
$os
=~ s/\)/_/g;
$os
=~ s/_+/_/g;
$os
=~ s/^_+//g;
$os
=~ s/_+$//g;
return
"$os:$name"
;
}
sub
get_chaos_feature_id {
my
$self
=
shift
;
my
$ob
=
shift
;
my
$id
;
if
(
$ob
->isa(
"Bio::SeqI"
)) {
$id
=
$ob
->accession_number .
'.'
. (
$ob
->can(
'seq_version'
) ?
$ob
->seq_version :
$ob
->version);
}
else
{
$ob
->isa(
"Bio::SeqFeatureI"
) ||
$self
->throw(
"$ob must be either SeqI or SeqFeatureI"
);
if
(
$ob
->primary_id) {
$id
=
$ob
->primary_id;
}
else
{
eval
{
$id
=
$IDH
->generate_unique_persistent_id(
$ob
);
};
if
($@) {
$self
->
warn
($@);
$id
=
"$ob"
;
}
}
}
if
(!
$id
) {
if
(
$ob
->isa(
"Bio::SeqFeatureI"
)) {
$id
=
$IDH
->generate_unique_persistent_id(
$ob
);
}
else
{
$self
->throw(
"Cannot generate a unique persistent ID for a Seq without either primary_id or accession"
);
}
}
if
(
$id
) {
$id
=
$self
->context_namespace ?
$self
->context_namespace .
":"
.
$id
:
$id
;
}
return
$id
;
}
sub
bp2ib {
my
$self
=
shift
;
my
$loc
=
shift
;
my
(
$s
,
$e
,
$str
) =
ref
(
$loc
) eq
"ARRAY"
? (
@$loc
) : (
$loc
->start,
$loc
->end,
$loc
->strand);
$s
--;
if
(
$str
< 0) {
(
$s
,
$e
) = (
$e
,
$s
);
}
return
(
$s
,
$e
,
$str
|| 1);
}
sub
subpartof {
my
$self
=
shift
;
my
$type
=
'partof_'
.
shift
;
$type
=~ s/partof_CDS/CDS_exon/;
$type
=~ s/partof_protein/CDS_exon/;
$type
=~ s/partof_polypeptide/CDS_exon/;
$type
=~ s/partof_\w
*RNA
/exon/;
return
$type
;
}
1;