use
vars
qw(%species $g $c $fac)
;
BEGIN{
$fac
= Bio::Seq::SeqFactory->new(
-type
=>
'Bio::Seq::RichSeq'
);
}
sub
new {
my
(
$class
,
@args
) =
@_
;
my
$self
=
bless
{},
$class
;
$self
->_initialize(
@args
);
return
$self
;
}
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
return
unless
$self
->SUPER::_initialize_io(
@args
);
}
sub
next_network {
my
$self
=
shift
;
$g
= Bio::Graph::ProteinGraph->new();
my
$t
= XML::Twig->new
(
TwigHandlers
=> {
proteinInteractor
=> \
&_proteinInteractor
,
interaction
=> \
&_addEdge
});
$t
->parsefile(
$self
->file);
return
$g
;
}
sub
_proteinInteractor {
my
(
$twig
,
$pi
) =
@_
;
my
(
$acc
,
$sp
,
$desc
,
$taxid
,
$prim_id
);
my
$org
=
$pi
->first_child(
'organism'
);
$taxid
=
$org
->att(
'ncbiTaxId'
);
if
(!
exists
(
$species
{
$taxid
})) {
my
$common
=
$org
->first_child(
'names'
)->first_child(
'shortLabel'
)->text;
my
$full
=
$org
->first_child(
'names'
)->first_child(
'fullName'
)->text;
my
(
$gen
,
$sp
) =
$full
=~ /(\S+)\s+(.+)/;
my
$sp_obj
= Bio::Species->new(
-ncbi_taxid
=>
$taxid
,
-classification
=> [
$sp
,
$gen
],
-common_name
=>
$common
);
$sp_obj
->name(
'scientific'
,
$full
);
$species
{
$taxid
} =
$sp_obj
;
print
"species parse error $@"
if
$@;
}
my
@ids
=
$pi
->first_child(
'xref'
)->children();
my
%ids
=
map
{
$_
->att(
'db'
),
$_
->att(
'id'
)}
@ids
;
$ids
{
'psixml'
} =
$pi
->att(
'id'
);
$prim_id
=
defined
(
$ids
{
'GI'
}) ?
$ids
{
'GI'
} :
''
;
$acc
=
$ids
{
'RefSeq'
} ||
$ids
{
'SWP'
} ||
$ids
{
'Swiss-Prot'
} ||
$ids
{
'Ref-Seq'
} ||
$ids
{
'GI'
} ||
$ids
{
'PIR'
} ||
$ids
{
'intact'
} ||
$ids
{
'psi-mi'
};
eval
{
$desc
=
$pi
->first_child(
'names'
)->first_child(
'fullName'
)->text;
};
if
($@) {
warn
(
"No fullName, use shortLabel for description instead"
);
$desc
=
$pi
->first_child(
'names'
)->first_child(
'shortLabel'
)->text;
}
my
$ac
= Bio::Annotation::Collection->new();
for
my
$db
(
keys
%ids
) {
next
if
$ids
{
$db
} eq
$acc
;
next
if
$ids
{
$db
} eq
$prim_id
;
my
$an
= Bio::Annotation::DBLink->new(
-database
=>
$db
,
-primary_id
=>
$ids
{
$db
},
);
$ac
->add_Annotation(
'dblink'
,
$an
);
}
my
$node
=
$fac
->create(
-accession_number
=>
$acc
,
-desc
=>
$desc
,
-display_id
=>
$acc
,
-primary_id
=>
$prim_id
,
-species
=>
$species
{
$taxid
},
-annotation
=>
$ac
);
$g
->{
'_id_map'
}{
$ids
{
'psixml'
}} =
$node
;
if
(
defined
(
$node
->primary_id)) {
$g
->{
'_id_map'
}{
$node
->primary_id} =
$node
;
}
if
(
defined
(
$node
->accession_number)) {
$g
->{
'_id_map'
}{
$node
->accession_number} =
$node
;
}
$ac
=
$node
->annotation();
for
my
$an
(
$ac
->get_Annotations(
'dblink'
)) {
$g
->{
'_id_map'
}{
$an
->primary_id} =
$node
;
}
$twig
->purge();
}
sub
_addEdge {
my
(
$twig
,
$i
) =
@_
;
my
@ints
=
$i
->first_child(
'participantList'
)->children;
my
@node
=
map
{
$_
->first_child(
'proteinInteractorRef'
)->att(
'ref'
)}
@ints
;
my
$edge_id
=
$i
->first_child(
'xref'
)->first_child(
'primaryRef'
)->att(
'id'
);
$g
->add_edge(Bio::Graph::Edge->new(
-nodes
=>[(
$g
->{
'_id_map'
}{
$node
[0]},
$g
->{
'_id_map'
}{
$node
[1]})],
-id
=>
$edge_id
));
$twig
->purge();
}
1;