Hide Show 47 lines of Pod
my
(
$record_count
,
$processed_count
,
$is_a_rel
,
$contains_rel
,
$found_in_rel
);
Hide Show 12 lines of Pod
sub
new{
my
(
$class
,
@args
) =
@_
;
my
$self
=
$class
->SUPER::new(
@args
);
my
(
$eng
,
$ont
,
$name
,
$fact
) =
$self
->_rearrange([
qw(ENGINE
ONTOLOGY
ONTOLOGY_NAME
TERM_FACTORY)
],
@args
);
if
(
defined
(
$ont
)) {
$self
->ontology(
$ont
);
}
else
{
$name
=
"InterPro"
unless
$name
;
$self
->ontology(Bio::Ontology::Ontology->new(
-name
=>
$name
));
}
$self
->ontology_engine(
$eng
)
if
$eng
;
$self
->term_factory(
$fact
)
if
$fact
;
$is_a_rel
= Bio::Ontology::RelationshipType->get_instance(
"IS_A"
);
$contains_rel
= Bio::Ontology::RelationshipType->get_instance(
"CONTAINS"
);
$found_in_rel
= Bio::Ontology::RelationshipType->get_instance(
"FOUND_IN"
);
$is_a_rel
->ontology(
$self
->ontology());
$contains_rel
->ontology(
$self
->ontology());
$found_in_rel
->ontology(
$self
->ontology());
$self
->_cite_skip(0);
$self
->secondary_accessions_map( {} );
return
$self
;
}
Hide Show 12 lines of Pod
sub
ontology_engine{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
if
(
defined
$self
->{
'ontology_engine'
}) {
$self
->throw(
"ontology_engine already defined"
);
}
else
{
$self
->throw(
ref
(
$value
).
" does not implement "
.
"Bio::Ontology::OntologyEngineI. Bummer."
)
unless
$value
->isa(
"Bio::Ontology::OntologyEngineI"
);
$self
->{
'ontology_engine'
} =
$value
;
my
$ont
=
$self
->ontology();
if
(
$ont
&&
$ont
->can(
"engine"
) && (!
$ont
->engine())) {
$ont
->engine(
$value
);
}
$self
->debug(
ref
(
$self
) .
"::ontology_engine: registering ontology engine ("
.
ref
(
$value
).
"):\n"
.
$value
->to_string.
"\n"
);
}
}
return
$self
->{
'ontology_engine'
};
}
Hide Show 15 lines of Pod
sub
ontology{
my
(
$self
,
$ont
) =
@_
;
if
(
defined
(
$ont
)) {
$self
->throw(
ref
(
$ont
).
" does not implement Bio::Ontology::OntologyI"
.
". Bummer."
)
unless
$ont
->isa(
"Bio::Ontology::OntologyI"
);
$self
->{
'_ontology'
} =
$ont
;
}
return
$self
->{
'_ontology'
};
}
Hide Show 13 lines of Pod
sub
term_factory{
my
$self
=
shift
;
return
$self
->{
'term_factory'
} =
shift
if
@_
;
return
$self
->{
'term_factory'
};
}
Hide Show 12 lines of Pod
sub
_cite_skip{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_cite_skip'
} =
$value
;
}
return
$self
->{
'_cite_skip'
};
}
Hide Show 12 lines of Pod
sub
_hash{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_hash'
} =
$value
;
}
return
$self
->{
'_hash'
};
}
Hide Show 12 lines of Pod
sub
_stack{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_stack'
} =
$value
;
}
return
$self
->{
'_stack'
};
}
Hide Show 12 lines of Pod
sub
_top{
my
(
$self
,
$_stack
) =
@_
;
my
@stack
= @{
$_stack
};
return
(
@stack
>= 1) ?
$stack
[
@stack
- 1] :
undef
;
}
Hide Show 12 lines of Pod
sub
_term{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_term'
} =
$value
;
}
return
$self
->{
'_term'
};
}
Hide Show 12 lines of Pod
sub
_clear_term{
my
(
$self
) =
@_
;
delete
$self
->{
'_term'
};
}
Hide Show 12 lines of Pod
sub
_names{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_names'
} =
$value
;
}
return
$self
->{
'_names'
};
}
Hide Show 12 lines of Pod
sub
_create_relationship{
my
(
$self
,
$ref_id
,
$rel_type_term
) =
@_
;
my
$ont
=
$self
->ontology();
my
$fact
=
$self
->term_factory();
my
$term_temp
= (
$ont
->engine->get_term_by_identifier(
$ref_id
))[0];
my
$rel
= Bio::Ontology::Relationship->new(
-predicate_term
=>
$rel_type_term
);
if
(!
defined
$term_temp
) {
$term_temp
=
$ont
->engine->add_term(
$fact
->create_object(
-InterPro_id
=>
$ref_id
,
-name
=>
$ref_id
,
-ontology
=>
$ont
) );
$ont
->engine->mark_uninstantiated(
$term_temp
);
}
my
$rel_type_name
=
$self
->_top(
$self
->_names);
if
(
$rel_type_name
eq
'parent_list'
||
$rel_type_name
eq
'found_in'
) {
$rel
->object_term(
$term_temp
);
$rel
->subject_term(
$self
->_term );
}
else
{
$rel
->object_term(
$self
->_term );
$rel
->subject_term(
$term_temp
);
}
$rel
->ontology(
$ont
);
$ont
->add_relationship(
$rel
);
}
Hide Show 15 lines of Pod
sub
start_element {
my
(
$self
,
$element
) =
@_
;
my
$ont
=
$self
->ontology();
my
$fact
=
$self
->term_factory();
if
(
$element
->{Name} eq
'interprodb'
) {
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Active_site"
,
-name
=>
"Active Site"
) );
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Binding_site"
,
-name
=>
"Binding Site"
) );
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Family"
,
-name
=>
"Family"
) );
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Domain"
,
-name
=>
"Domain"
) );
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Repeat"
,
-name
=>
"Repeat"
) );
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"PTM"
,
-name
=>
"post-translational modification"
));
}
elsif
(
$element
->{Name} eq
'interpro'
) {
my
%record_args
= %{
$element
->{Attributes}};
my
$id
=
$record_args
{
"id"
};
my
$term_temp
= (
$ont
->engine->get_term_by_identifier(
$id
))[0];
$self
->_term(
(!
defined
$term_temp
)
?
$ont
->add_term(
$fact
->create_object(
-InterPro_id
=>
$id
,
-name
=>
$id
) )
:
$term_temp
);
$self
->_term->ontology(
$ont
);
$self
->_term->short_name(
$record_args
{
"short_name"
} );
$self
->_term->protein_count(
$record_args
{
"protein_count"
} );
$self
->_increment_record_count();
$self
->_stack([{
interpro
=>
undef
}]);
$self
->_names([
"interpro"
]);
my
$rel
= Bio::Ontology::Relationship->new(
-predicate_term
=>
$is_a_rel
);
$rel
->object_term( (
$ont
->engine->get_term_by_identifier(
$record_args
{
"type"
}))[0] );
$rel
->subject_term(
$self
->_term );
$rel
->ontology(
$ont
);
$ont
->add_relationship(
$rel
);
}
elsif
(
defined
$self
->_stack) {
my
%hash
= ();
if
(
keys
%{
$element
->{Attributes}} > 0) {
foreach
my
$key
(
keys
%{
$element
->{Attributes}}) {
$hash
{
$key
} =
$element
->{Attributes}->{
$key
};
}
}
push
@{
$self
->_stack}, \
%hash
;
if
(
$element
->{Name} eq
'rel_ref'
) {
my
$ref_id
=
$element
->{Attributes}->{
"ipr_ref"
};
my
$parent
=
$self
->_top(
$self
->_names);
if
(
$parent
eq
'parent_list'
||
$parent
eq
'child_list'
) {
$self
->_create_relationship(
$ref_id
,
$is_a_rel
);
}
if
(
$parent
eq
'contains'
) {
$self
->_create_relationship(
$ref_id
,
$contains_rel
);
}
if
(
$parent
eq
'found_in'
) {
$self
->_create_relationship(
$ref_id
,
$found_in_rel
);
}
}
elsif
(
$element
->{Name} eq
'abstract'
) {
$self
->_cite_skip(1);
}
push
@{
$self
->_names},
$element
->{Name};
}
}
Hide Show 12 lines of Pod
sub
_char_storage{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_char_storage'
} =
$value
;
}
return
$self
->{
'_char_storage'
};
}
Hide Show 12 lines of Pod
sub
characters {
my
(
$self
,
$characters
) =
@_
;
my
$text
=
$characters
->{Data};
chomp
$text
;
$text
=~ s/^(\s+)//;
$self
->{_char_storage} .=
$text
;
}
Hide Show 12 lines of Pod
sub
end_element {
my
(
$self
,
$element
) =
@_
;
if
(
$element
->{Name} eq
'interprodb'
) {
$self
->debug(
"Interpro DB Parser Finished: $record_count read, $processed_count processed\n"
);
}
elsif
(
$element
->{Name} eq
'interpro'
) {
$self
->_clear_term;
$self
->_increment_processed_count();
}
elsif
(
$element
->{Name} ne
'cite'
) {
$self
->{_char_storage} =~ s/<\/?p>//g;
if
((
defined
$self
->_stack)) {
my
$current_hash
=
pop
@{
$self
->_stack};
my
$parent_hash
=
$self
->_top(
$self
->_stack);
my
$current_hash_key
=
pop
@{
$self
->_names};
if
(
keys
%{
$current_hash
} > 0 &&
$self
->_char_storage ne
""
) {
$current_hash
->{comment} =
$self
->_char_storage;
push
@{
$parent_hash
->{
$current_hash_key
} },
$current_hash
}
elsif
(
$self
->_char_storage ne
""
){
push
@{
$parent_hash
->{
$current_hash_key
} }, {
'accumulated_text_12345'
=>
$self
->_char_storage };
}
elsif
(
keys
%{
$current_hash
} > 0) {
push
@{
$parent_hash
->{
$current_hash_key
} },
$current_hash
;
}
if
(
$element
->{Name} eq
'pub_list'
) {
my
@refs
= ();
foreach
my
$pub_record
( @{
$current_hash
->{publication} } ) {
my
$ref
= Bio::Annotation::Reference->new;
my
$loc
=
$pub_record
->{location}->[0];
$ref
->location(
$pub_record
->{journal}->[0]->{accumulated_text_12345}.
", "
.
$loc
->{firstpage}.
"-"
.
$loc
->{lastpage}.
", "
.
$loc
->{volume}.
", "
.
$pub_record
->{year}->[0]->{accumulated_text_12345});
$ref
->title(
$pub_record
->{title}->[0]->{accumulated_text_12345} );
my
$ttt
=
$pub_record
->{author_list}->[0];
$ref
->authors(
$ttt
->{accumulated_text_12345} );
$ref
->medline(
scalar
(
$ttt
->{dbkey}) )
if
exists
(
$ttt
->{db}) &&
$ttt
->{db} eq
"MEDLINE"
;
push
@refs
,
$ref
;
}
$self
->_term->add_reference(
@refs
);
}
elsif
(
$element
->{Name} eq
'name'
) {
$self
->_term->name(
$self
->_char_storage );
}
elsif
(
$element
->{Name} eq
'abstract'
) {
$self
->_term->definition(
$self
->_char_storage );
$self
->_cite_skip(0);
}
elsif
(
$element
->{Name} eq
'member_list'
) {
my
@refs
= ();
foreach
my
$db_xref
( @{
$current_hash
->{db_xref} } ) {
push
@refs
, Bio::Annotation::DBLink->new(
-database
=>
$db_xref
->{db},
-primary_id
=>
$db_xref
->{dbkey}
);
}
$self
->_term->add_member(
@refs
);
}
elsif
(
$element
->{Name} eq
'sec_list'
) {
my
@refs
= ();
foreach
my
$sec_ac
( @{
$current_hash
->{sec_ac} } ) {
push
@refs
,
$sec_ac
->{sec_ac};
}
$self
->_term->add_secondary_id(
@refs
);
$self
->secondary_accessions_map->{
$self
->_term->identifier} = \
@refs
;
}
elsif
(
$element
->{Name} eq
'example_list'
) {
my
@refs
= ();
foreach
my
$example
( @{
$current_hash
->{example} } ) {
push
@refs
, Bio::Annotation::DBLink->new(
-database
=>
$example
->{db_xref}->[0]->{db},
-primary_id
=>
$example
->{db_xref}->[0]->{dbkey},
-comment
=>
$example
->{comment}
);
}
$self
->_term->add_example(
@refs
);
}
elsif
(
$element
->{Name} eq
'external_doc_list'
) {
my
@refs
= ();
foreach
my
$db_xref
( @{
$current_hash
->{db_xref} } ) {
push
@refs
, Bio::Annotation::DBLink->new(
-database
=>
$db_xref
->{db},
-primary_id
=>
$db_xref
->{dbkey}
);
}
$self
->_term->add_external_document(
@refs
);
}
elsif
(
$element
->{Name} eq
'class_list'
) {
my
@refs
= ();
foreach
my
$classification
( @{
$current_hash
->{classification} } ) {
push
@refs
, Bio::Annotation::DBLink->new(
-database
=>
$classification
->{class_type},
-primary_id
=>
$classification
->{id}
);
}
$self
->_term->class_list(\
@refs
);
}
elsif
(
$element
->{Name} eq
'deleted_entries'
) {
my
@refs
= ();
foreach
my
$del_ref
( @{
$current_hash
->{del_ref} } ) {
my
$term
= (
$self
->ontology_engine->get_term_by_identifier(
$del_ref
->{id} ))[0];
$term
->is_obsolete(1)
if
defined
$term
;
}
}
}
$self
->_char_storage(
''
)
if
!
$self
->_cite_skip;
}
}
Hide Show 14 lines of Pod
sub
secondary_accessions_map{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'secondary_accessions_map'
} =
$value
;
}
return
$self
->{
'secondary_accessions_map'
};
}
Hide Show 12 lines of Pod
sub
_increment_record_count{
$record_count
++;
}
Hide Show 12 lines of Pod
sub
_increment_processed_count{
$processed_count
++;
print
STDERR
$processed_count
.
"\n"
if
$processed_count
% 100 == 0;
}
1;