Hide Show 58 lines of Pod
my
(
$record_count
,
$processed_count
,
$is_a_rel
,
$contains_rel
,
$found_in_rel
);
Hide Show 12 lines of Pod
sub
new {
my
(
$class
,
@args
) =
@_
;
my
$self
=
$class
->SUPER::new(
@args
);
my
(
$eng
,
$ont
,
$name
,
$fact
) =
$self
->_rearrange(
[
qw[
ENGINE
ONTOLOGY
ONTOLOGY_NAME
TERM_FACTORY
]
],
@args
);
if
(
defined
(
$ont
) ) {
$self
->ontology(
$ont
);
}
else
{
$name
=
"InterPro"
unless
$name
;
$self
->ontology( FAST::Bio::Ontology::Ontology->new(
-name
=>
$name
) );
}
$self
->ontology_engine(
$eng
)
if
$eng
;
$self
->term_factory(
$fact
)
if
$fact
;
$is_a_rel
= FAST::Bio::Ontology::RelationshipType->get_instance(
"IS_A"
);
$contains_rel
= FAST::Bio::Ontology::RelationshipType->get_instance(
"CONTAINS"
);
$found_in_rel
= FAST::Bio::Ontology::RelationshipType->get_instance(
"FOUND_IN"
);
$is_a_rel
->ontology(
$self
->ontology() );
$contains_rel
->ontology(
$self
->ontology() );
$found_in_rel
->ontology(
$self
->ontology() );
$self
->_cite_skip(0);
$self
->secondary_accessions_map( {} );
return
$self
;
}
Hide Show 12 lines of Pod
sub
ontology_engine {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
if
(
defined
$self
->{
'ontology_engine'
} ) {
$self
->throw(
"ontology_engine already defined"
);
}
else
{
$self
->throw(
ref
(
$value
) .
" does not implement "
.
"FAST::Bio::Ontology::OntologyEngineI. Bummer."
)
unless
$value
->isa(
"FAST::Bio::Ontology::OntologyEngineI"
);
$self
->{
'ontology_engine'
} =
$value
;
my
$ont
=
$self
->ontology();
if
(
$ont
&&
$ont
->can(
"engine"
) && ( !
$ont
->engine() ) ) {
$ont
->engine(
$value
);
}
$self
->debug(
ref
(
$self
)
.
"::ontology_engine: registering ontology engine ("
.
ref
(
$value
) .
"):\n"
.
$value
->to_string
.
"\n"
);
}
}
return
$self
->{
'ontology_engine'
};
}
Hide Show 15 lines of Pod
sub
ontology {
my
(
$self
,
$ont
) =
@_
;
if
(
defined
(
$ont
) ) {
$self
->throw(
ref
(
$ont
) .
" does not implement FAST::Bio::Ontology::OntologyI"
.
". Bummer."
)
unless
$ont
->isa(
"FAST::Bio::Ontology::OntologyI"
);
$self
->{
'_ontology'
} =
$ont
;
}
return
$self
->{
'_ontology'
};
}
Hide Show 13 lines of Pod
sub
term_factory {
my
$self
=
shift
;
return
$self
->{
'term_factory'
} =
shift
if
@_
;
return
$self
->{
'term_factory'
};
}
Hide Show 12 lines of Pod
sub
_cite_skip {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_cite_skip'
} =
$value
;
}
return
$self
->{
'_cite_skip'
};
}
Hide Show 12 lines of Pod
sub
_hash {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_hash'
} =
$value
;
}
return
$self
->{
'_hash'
};
}
Hide Show 12 lines of Pod
sub
_stack {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_stack'
} =
$value
;
}
return
$self
->{
'_stack'
};
}
Hide Show 12 lines of Pod
sub
_top {
my
(
$self
,
$_stack
) =
@_
;
my
@stack
= @{
$_stack
};
return
(
@stack
>= 1 ) ?
$stack
[
@stack
- 1 ] :
undef
;
}
Hide Show 12 lines of Pod
sub
_term {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_term'
} =
$value
;
}
return
$self
->{
'_term'
};
}
Hide Show 12 lines of Pod
sub
_clear_term {
my
(
$self
) =
@_
;
delete
$self
->{
'_term'
};
}
Hide Show 12 lines of Pod
sub
_names {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_names'
} =
$value
;
}
return
$self
->{
'_names'
};
}
Hide Show 11 lines of Pod
{
my
%relationship_cache
;
sub
_clear_cache {
%relationship_cache
= () }
sub
_create_relationship {
my
(
$self
,
$ref_id
,
$rel_type_term
) =
@_
;
my
$ont
=
$self
->ontology();
my
$fact
=
$self
->term_factory();
my
$term_temp
= (
$ont
->engine->get_term_by_identifier(
$ref_id
) )[0];
if
( !
defined
$term_temp
) {
$term_temp
=
$ont
->engine->add_term(
$fact
->create_object(
-InterPro_id
=>
$ref_id
,
-name
=>
$ref_id
,
-ontology
=>
$ont
) );
$ont
->engine->mark_uninstantiated(
$term_temp
);
}
my
$marshalled
=
join
(
':'
, (
sort
$self
->_term->identifier,
$ref_id
));
if
(
$relationship_cache
{
$marshalled
}++) {
return
;
}
my
$rel_type_name
=
$self
->_top(
$self
->_names );
my
$rel
= FAST::Bio::Ontology::Relationship->new(
-predicate_term
=>
$rel_type_term
);
if
(
$rel_type_name
eq
'parent_list'
||
$rel_type_name
eq
'found_in'
) {
$rel
->object_term(
$term_temp
);
$rel
->subject_term(
$self
->_term );
}
else
{
$rel
->object_term(
$self
->_term );
$rel
->subject_term(
$term_temp
);
}
$rel
->ontology(
$ont
);
$ont
->add_relationship(
$rel
);
}
}
Hide Show 15 lines of Pod
sub
start_element {
my
(
$self
,
$element
) =
@_
;
my
$ont
=
$self
->ontology();
my
$fact
=
$self
->term_factory();
if
(
$element
->{Name} eq
'interprodb'
) {
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Active_site"
,
-name
=>
"Active Site"
)
);
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Conserved_site"
,
-name
=>
"Conserved Site"
)
);
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Binding_site"
,
-name
=>
"Binding Site"
)
);
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Family"
,
-name
=>
"Family"
)
);
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Domain"
,
-name
=>
"Domain"
)
);
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Repeat"
,
-name
=>
"Repeat"
)
);
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"PTM"
,
-name
=>
"post-translational modification"
)
);
$ont
->add_term(
$fact
->create_object(
-identifier
=>
"Region"
,
-name
=>
"Region"
)
);
}
elsif
(
$element
->{Name} eq
'interpro'
) {
my
%record_args
= %{
$element
->{Attributes} };
my
$id
=
$record_args
{
"id"
};
my
$term
= (
$ont
->engine->get_term_by_identifier(
$id
) )[0] ||
$fact
->create_object(
-InterPro_id
=>
$id
,
-name
=>
$id
);
$self
->_term(
$term
);
$term
->ontology(
$ont
);
$term
->short_name(
$record_args
{
"short_name"
} );
$term
->protein_count(
$record_args
{
"protein_count"
} );
$self
->_increment_record_count();
$self
->_stack( [ {
interpro
=>
undef
} ] );
$self
->_names( [
"interpro"
] );
my
$rel
= FAST::Bio::Ontology::Relationship->new(
-predicate_term
=>
$is_a_rel
);
my
(
$object_term
) =
$ont
->find_terms(
-identifier
=>
$record_args
{
"type"
} )
or
$self
->throw(
"when processing interpro ID '$id', no term found for interpro type '$record_args{type}'"
);
$rel
->object_term(
$object_term
);
$rel
->subject_term(
$self
->_term );
$rel
->ontology(
$ont
);
$ont
->add_relationship(
$rel
);
$ont
->add_term(
$term
);
}
elsif
(
defined
$self
->_stack ) {
my
%hash
= ();
if
(
keys
%{
$element
->{Attributes} } > 0 ) {
foreach
my
$key
(
keys
%{
$element
->{Attributes} } ) {
$hash
{
$key
} =
$element
->{Attributes}->{
$key
};
}
}
push
@{
$self
->_stack }, \
%hash
;
if
(
$element
->{Name} eq
'rel_ref'
) {
my
$ref_id
=
$element
->{Attributes}->{
"ipr_ref"
};
my
$parent
=
$self
->_top(
$self
->_names );
if
(
$parent
eq
'parent_list'
||
$parent
eq
'child_list'
) {
$self
->_create_relationship(
$ref_id
,
$is_a_rel
);
}
if
(
$parent
eq
'contains'
) {
$self
->_create_relationship(
$ref_id
,
$contains_rel
);
}
if
(
$parent
eq
'found_in'
) {
$self
->_create_relationship(
$ref_id
,
$found_in_rel
);
}
}
elsif
(
$element
->{Name} eq
'abstract'
) {
$self
->_cite_skip(1);
}
push
@{
$self
->_names },
$element
->{Name};
}
}
Hide Show 12 lines of Pod
sub
_char_storage {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_char_storage'
} =
$value
;
}
return
$self
->{
'_char_storage'
};
}
Hide Show 12 lines of Pod
sub
characters {
my
(
$self
,
$characters
) =
@_
;
my
$text
=
$characters
->{Data};
chomp
$text
;
$text
=~ s/^(\s+)//;
$self
->{_char_storage} .=
$text
;
}
Hide Show 12 lines of Pod
sub
end_element {
my
(
$self
,
$element
) =
@_
;
if
(
$element
->{Name} eq
'interprodb'
) {
$self
->debug(
"Interpro DB Parser Finished: $record_count read, $processed_count processed\n"
);
$self
->_clear_cache();
}
elsif
(
$element
->{Name} eq
'interpro'
) {
$self
->_clear_term;
$self
->_increment_processed_count();
}
elsif
(
$element
->{Name} ne
'cite'
) {
$self
->{_char_storage} =~ s/<\/?p>//g;
if
( (
defined
$self
->_stack ) ) {
my
$current_hash
=
pop
@{
$self
->_stack };
my
$parent_hash
=
$self
->_top(
$self
->_stack );
my
$current_hash_key
=
pop
@{
$self
->_names };
if
(
keys
%{
$current_hash
} > 0 &&
$self
->_char_storage ne
""
) {
$current_hash
->{comment} =
$self
->_char_storage;
push
@{
$parent_hash
->{
$current_hash_key
} },
$current_hash
;
}
elsif
(
$self
->_char_storage ne
""
) {
push
@{
$parent_hash
->{
$current_hash_key
} },
{
'accumulated_text_12345'
=>
$self
->_char_storage };
}
elsif
(
keys
%{
$current_hash
} > 0 ) {
push
@{
$parent_hash
->{
$current_hash_key
} },
$current_hash
;
}
if
(
$element
->{Name} eq
'pub_list'
) {
my
@refs
= ();
foreach
my
$pub_record
( @{
$current_hash
->{publication} } ) {
my
$ref
= FAST::Bio::Annotation::Reference->new;
my
$loc
=
$pub_record
->{location}->[0];
$ref
->location(
sprintf
(
"%s, %s-%s, %s, %s"
,
$pub_record
->{journal}->[0]->{accumulated_text_12345} ||
''
,
$loc
->{firstpage} ||
''
,
$loc
->{lastpage} ||
''
,
$loc
->{volume} ||
''
,
$pub_record
->{year}->[0]->{accumulated_text_12345} ||
''
)
);
$ref
->title(
$pub_record
->{title}->[0]->{accumulated_text_12345} );
my
$ttt
=
$pub_record
->{author_list}->[0];
$ref
->authors(
$ttt
->{accumulated_text_12345} );
$ref
->medline(
scalar
(
$ttt
->{dbkey} ) )
if
exists
(
$ttt
->{db} ) &&
$ttt
->{db} eq
"MEDLINE"
;
push
@refs
,
$ref
;
}
$self
->_term->add_reference(
@refs
);
}
elsif
(
$element
->{Name} eq
'name'
) {
$self
->_term->name(
$self
->_char_storage );
}
elsif
(
$element
->{Name} eq
'abstract'
) {
$self
->_term->definition(
$self
->_char_storage );
$self
->_cite_skip(0);
}
elsif
(
$element
->{Name} eq
'member_list'
) {
my
@refs
= ();
foreach
my
$db_xref
( @{
$current_hash
->{db_xref} } ) {
push
@refs
,
FAST::Bio::Annotation::DBLink->new(
-database
=>
$db_xref
->{db},
-primary_id
=>
$db_xref
->{dbkey}
);
}
$self
->_term->add_dbxref(
-dbxrefs
=> \
@refs
,
-context
=>
'member_list'
);
}
elsif
(
$element
->{Name} eq
'sec_list'
) {
my
@refs
= ();
foreach
my
$sec_ac
( @{
$current_hash
->{sec_ac} } ) {
push
@refs
,
$sec_ac
->{sec_ac};
}
$self
->_term->add_secondary_id(
@refs
);
$self
->secondary_accessions_map->{
$self
->_term->identifier } = \
@refs
;
}
elsif
(
$element
->{Name} eq
'example_list'
) {
my
@refs
= ();
foreach
my
$example
( @{
$current_hash
->{examples} } ) {
push
@refs
,
FAST::Bio::Annotation::DBLink->new(
-database
=>
$example
->{db_xref}->[0]->{db},
-primary_id
=>
$example
->{db_xref}->[0]->{dbkey},
-comment
=>
$example
->{comment}
);
}
$self
->_term->add_dbxref(
-dbxrefs
=> \
@refs
,
-context
=>
'example_list'
);
}
elsif
(
$element
->{Name} eq
'external_doc_list'
) {
my
@refs
= ();
foreach
my
$db_xref
( @{
$current_hash
->{db_xref} } ) {
push
@refs
,
FAST::Bio::Annotation::DBLink->new(
-database
=>
$db_xref
->{db},
-primary_id
=>
$db_xref
->{dbkey}
);
}
$self
->_term->add_dbxref(
-dbxrefs
=> \
@refs
,
-context
=>
'external_doc_list'
);
}
elsif
(
$element
->{Name} eq
'class_list'
) {
my
@refs
= ();
foreach
my
$classification
( @{
$current_hash
->{classification} } ) {
push
@refs
,
FAST::Bio::Annotation::DBLink->new(
-database
=>
$classification
->{class_type},
-primary_id
=>
$classification
->{id}
);
}
$self
->_term->add_dbxref(
-dbxrefs
=> \
@refs
,
-context
=>
'class_list'
);
}
elsif
(
$element
->{Name} eq
'deleted_entries'
) {
my
@refs
= ();
foreach
my
$del_ref
( @{
$current_hash
->{del_ref} } ) {
my
$term
=
(
$self
->ontology_engine->get_term_by_identifier(
$del_ref
->{id} ) )[0];
$term
->is_obsolete(1)
if
defined
$term
;
}
}
}
$self
->_char_storage(
''
)
if
!
$self
->_cite_skip;
}
}
Hide Show 14 lines of Pod
sub
secondary_accessions_map {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'secondary_accessions_map'
} =
$value
;
}
return
$self
->{
'secondary_accessions_map'
};
}
Hide Show 12 lines of Pod
sub
_increment_record_count {
$record_count
++;
}
Hide Show 12 lines of Pod
sub
_increment_processed_count {
my
$self
=
shift
;
$processed_count
++;
$self
->debug(
"$processed_count\n"
)
if
$processed_count
% 100 == 0;
}
1;