package
FAST::Bio::OntologyIO::obo;
sub
_initialize {
my
(
$self
,
%arg
) =
@_
;
my
(
$file
,
$name
,
$eng
) =
$self
->_rearrange(
[
qw( FILE
ONTOLOGY_NAME
ENGINE)
],
%arg
);
$self
->SUPER::_initialize(
%arg
);
delete
$self
->{
'_ontologies'
};
$eng
= FAST::Bio::Ontology::OBOEngine->new()
unless
$eng
;
if
(
$eng
->isa(
"FAST::Bio::Ontology::OntologyI"
) ) {
$self
->ontology_name(
$eng
->name() );
$eng
=
$eng
->engine()
if
$eng
->can(
'engine'
);
}
$self
->_ont_engine(
$eng
);
$self
->ontology_name(
$name
)
if
$name
;
}
sub
ontology_name {
my
$self
=
shift
;
return
$self
->{
'ontology_name'
} =
shift
if
@_
;
return
$self
->{
'ontology_name'
};
}
sub
parse {
my
$self
=
shift
;
$self
->term_factory(
FAST::Bio::Ontology::TermFactory->new(
-type
=>
"FAST::Bio::Ontology::OBOterm"
) )
unless
$self
->term_factory();
my
$annotations_collection
=
$self
->_header();
my
$ont
= FAST::Bio::Ontology::Ontology->new(
-name
=>
$self
->ontology_name(),
-engine
=>
$self
->_ont_engine()
);
$ont
->annotation(
$annotations_collection
);
foreach
(
$self
->_part_of_relationship(),
$self
->_is_a_relationship(),
$self
->_related_to_relationship(),
$self
->_regulates_relationship(),
$self
->_positively_regulates_relationship(),
$self
->_negatively_regulates_relationship(),
)
{
$_
->ontology(
$ont
);
}
$self
->_add_ontology(
$ont
);
while
(
my
$term
=
$self
->_next_term() ) {
if
( !
$term
->identifier() || !
$term
->name() ) {
$self
->throw(
"OBO File Format Error on line "
.
$self
->{
'_current_line_no'
}
.
" \nThe term does not have a id/name tag. This term will be ignored.\n"
);
next
;
}
my
$new_ontology_flag
= 1;
my
$ontologies_array_ref
=
$self
->{
'_ontologies'
};
foreach
my
$ontology
(
@$ontologies_array_ref
) {
my
(
$oname
,
$t_ns
) = (
$ontology
->name(),
$term
->namespace() );
next
unless
(
defined
(
$oname
) &&
defined
(
$t_ns
));
if
(
$oname
eq
$t_ns
) {
$new_ontology_flag
= 0;
$ont
=
$ontology
;
}
}
if
(
$new_ontology_flag
&&
$term
->namespace() ) {
my
$new_ont
= FAST::Bio::Ontology::Ontology->new(
-name
=>
$term
->namespace(),
-engine
=>
$self
->_ont_engine()
);
$new_ont
->annotation(
$annotations_collection
);
$self
->_add_ontology(
$new_ont
);
$ont
=
$new_ont
;
}
$self
->_add_term(
$term
,
$ont
);
my
$isa_parents_array_ref
=
$self
->{
'_isa_parents'
};
foreach
my
$parent_term
(
@$isa_parents_array_ref
) {
if
( !(
$self
->_has_term(
$parent_term
) ) ) {
$self
->_add_term(
$parent_term
,
$ont
);
}
$self
->_add_relationship(
$parent_term
,
$term
,
$self
->_is_a_relationship(),
$ont
);
}
my
$relationship_hash_ref
=
$self
->{
'_relationships'
};
foreach
my
$relationship
(
keys
%$relationship_hash_ref
) {
my
$reltype
;
if
(
$self
->_ont_engine->get_relationship_type(
$relationship
) ) {
$reltype
=
$self
->_ont_engine->get_relationship_type(
$relationship
);
}
else
{
$self
->_ont_engine->add_relationship_type(
$relationship
,
$ont
);
$reltype
=
$self
->_ont_engine->get_relationship_type(
$relationship
);
}
my
$id_array_ref
=
$$relationship_hash_ref
{
$relationship
};
foreach
my
$id
(
@$id_array_ref
) {
my
$parent_term
=
$self
->_create_term_object();
$parent_term
->identifier(
$id
);
$parent_term
->ontology(
$ont
);
if
( !(
$self
->_has_term(
$parent_term
) ) ) {
$self
->_add_term(
$parent_term
,
$ont
);
}
$self
->_add_relationship(
$parent_term
,
$term
,
$reltype
,
$ont
);
}
}
}
return
$self
->_ont_engine();
}
sub
next_ontology {
my
$self
=
shift
;
$self
->parse()
unless
exists
(
$self
->{
'_ontologies'
} );
if
(
exists
(
$self
->{
'_ontologies'
} ) ) {
my
$ont
=
shift
( @{
$self
->{
'_ontologies'
} } );
if
(
$ont
) {
my
$store
= FAST::Bio::Ontology::OntologyStore->new();
$store
->register_ontology(
$ont
);
return
$ont
;
}
}
return
;
}
sub
close
{
my
$self
=
shift
;
$self
->SUPER::
close
();
}
sub
_add_ontology {
my
$self
=
shift
;
$self
->{
'_ontologies'
} = []
unless
exists
(
$self
->{
'_ontologies'
} );
foreach
my
$ont
(
@_
) {
$self
->throw(
ref
(
$ont
) .
" does not implement FAST::Bio::Ontology::OntologyI"
)
unless
ref
(
$ont
) &&
$ont
->isa(
"FAST::Bio::Ontology::OntologyI"
);
$ont
->name(
$self
->ontology_name )
unless
$ont
->name();
push
( @{
$self
->{
'_ontologies'
} },
$ont
);
}
}
sub
_add_term {
my
(
$self
,
$term
,
$ont
) =
@_
;
$term
->ontology(
$ont
)
if
$ont
&& ( !
$term
->ontology );
$self
->_ont_engine()->add_term(
$term
);
}
sub
_part_of_relationship {
my
$self
=
shift
;
return
$self
->_ont_engine()->part_of_relationship(
@_
);
}
sub
_is_a_relationship {
my
$self
=
shift
;
return
$self
->_ont_engine()->is_a_relationship(
@_
);
}
sub
_related_to_relationship {
my
$self
=
shift
;
return
$self
->_ont_engine()->related_to_relationship(
@_
);
}
sub
_regulates_relationship {
my
$self
=
shift
;
return
$self
->_ont_engine()->regulates_relationship(
@_
);
}
sub
_positively_regulates_relationship {
my
$self
=
shift
;
return
$self
->_ont_engine()->positively_regulates_relationship(
@_
);
}
sub
_negatively_regulates_relationship {
my
$self
=
shift
;
return
$self
->_ont_engine()->negatively_regulates_relationship(
@_
);
}
sub
_add_relationship {
my
(
$self
,
$parent
,
$child
,
$type
,
$ont
) =
@_
;
$self
->_ont_engine()->add_relationship(
$child
,
$type
,
$parent
,
$ont
);
}
sub
_has_term {
my
$self
=
shift
;
return
$self
->_ont_engine()->has_term(
@_
);
}
sub
_ont_engine {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
"_ont_engine"
} =
$value
;
}
return
$self
->{
"_ont_engine"
};
}
sub
_filter_line {
my
(
$self
,
$line
) =
@_
;
chomp
(
$line
);
$line
=~
tr
[\200-\377]
[\000-\177];
$line
=~
tr
/\0-\10//d;
$line
=~
tr
/\13\14//d;
$line
=~
tr
/\16-\37//d;
$line
=~
tr
/\177//d;
$line
=~ s/^\!.*//;
$line
=~ s/[^\\]\!.*//;
$line
=~ s/[^\\]\
$line
=~ s/^\s+//;
$line
=~ s/\s+$//;
return
$line
;
}
sub
_header {
my
$self
=
shift
;
my
$annotation_collection
= FAST::Bio::Annotation::Collection->new();
my
(
$tag
,
$value
);
my
$line_counter
= 0;
$self
->{
'_current_line_no'
} = 0;
my
$format_version_header_flag
= 0;
my
$default_namespace_header_flag
= 0;
while
(
my
$line
=
$self
->_readline() ) {
++
$line_counter
;
my
$line
=
$self
->_filter_line(
$line
);
if
( !
$line
) {
if
( !
$format_version_header_flag
|| !
$default_namespace_header_flag
) {
$self
->throw(
"OBO File Format Error - \nCannot find tag format-version and/ default-namespace . These are required header.\n"
);
}
$self
->{
'_current_line_no'
} =
$line_counter
;
return
$annotation_collection
;
}
if
(
$line
=~ /\[\w*\]/) {
$self
->throw(
"OBO File Format Error - \nCannot find tag format-version. Thi ia a required header.\n"
);
}
$self
->_check_colon(
$line
,
$line_counter
);
if
(
$line
=~
/^(\[|
format
-version:|typeref:|version:|date:|saved-by:|auto-generated-by:|
default
-namespace:|remark:|subsetdef:)/
)
{
if
(
$line
=~ /^([\w\-]+)\:\s*(.*)/ ) {
(
$tag
,
$value
) = ( $1, $2 );
}
if
(
$tag
=~ /
format
-version/) {
$format_version_header_flag
= 1;
}
elsif
(
$tag
=~ /
default
-namespace/ ) {
$default_namespace_header_flag
= 1;
}
my
$header
= FAST::Bio::Annotation::SimpleValue->new(
-value
=>
$value
);
$annotation_collection
->add_Annotation(
$tag
,
$header
);
if
(
$tag
=~ /
default
-namespace/i ) {
$self
->ontology_name(
$value
);
}
}
}
}
sub
_next_term {
my
$self
=
shift
;
my
$term
;
my
$skip_stanza_flag
= 1;
my
$line_counter
=
$self
->{
'_current_line_no'
};
while
(
my
$line
=
$self
->_readline() ) {
++
$line_counter
;
my
$line
=
$self
->_filter_line(
$line
);
if
( !
$line
&&
$term
) {
$self
->{
'_current_line_no'
} =
$line_counter
;
return
$term
;
}
if
( (
$line
=~ /^\[(\w+)\]\s*(.*)/ ) ) {
if
(
uc
($1) eq
"TERM"
) {
$term
=
$self
->_create_term_object;
$skip_stanza_flag
= 0;
$self
->{
'_relationships'
} = {};
$self
->{
'_isa_parents'
} =
undef
;
}
elsif
(
uc
($1) eq
"TYPEDEF"
) {
$skip_stanza_flag
= 1;
}
else
{
$skip_stanza_flag
= 1;
$self
->
warn
(
"OBO File Format Warning on line $line_counter $line \nUnrecognized stanza type found. Skipping this stanza.\n"
);
}
next
;
}
$self
->_check_colon(
$line
,
$line_counter
);
next
if
(
(
$line
!~
/^(\[|id:|name:|is_a:|relationship:|namespace:|is_obsolete:|alt_id:|def:|xref_analog:|exact_synonym:|broad_synonym:|related_synonym:|synonym:|comment:|xref:)/
)
||
$skip_stanza_flag
);
if
(
$line
=~ /^([\w\-]+)\:\s*(.*)/ ) {
my
(
$tag
,
$val
) = ( $1, $2 );
if
( !
$val
) {
$self
->
warn
(
"OBO File Format Warning on line $line_counter $line \nTag has no value\n"
);
}
my
$qh
;
(
$val
,
$qh
) =
$self
->_extract_quals(
$val
);
my
$val2
=
$val
;
$val2
=~ s/\\,/,/g;
$tag
=
uc
(
$tag
);
if
(
$tag
eq
"ID"
) {
$term
->identifier(
$val
);
if
(
$self
->_has_term(
$term
) ) {
$term
=
$self
->_ont_engine()->get_terms(
$val
);
}
}
elsif
(
$tag
eq
"NAME"
) {
$term
->name(
$val
);
}
elsif
(
$tag
eq
"XREF_ANALOG"
) {
if
( !
$term
->has_dbxref(
$val
) ) {
$term
->add_dbxref(
-dbxrefs
=>
$self
->_to_annotation([
$val
]));
}
}
elsif
(
$tag
eq
"XREF_UNKNOWN"
) {
$term
->add_dbxref(
-dbxrefs
=>
$self
->_to_annotation([
$val
]));
}
elsif
(
$tag
eq
"NAMESPACE"
) {
$term
->namespace(
$val
);
}
elsif
(
$tag
eq
"DEF"
) {
my
(
$defstr
,
$parts
) =
$self
->_extract_qstr(
$val
);
$term
->definition(
$defstr
);
my
$ann
=
$self
->_to_annotation(
$parts
);
$term
->add_dbxref(
-dbxrefs
=>
$ann
);
}
elsif
(
$tag
=~ /(\w*)synonym/i ) {
$term
->add_synonym(
$val
);
}
elsif
(
$tag
eq
"ALT_ID"
) {
$term
->add_secondary_id(
$val
);
}
elsif
(
$tag
=~ /XREF/i ) {
$term
->add_secondary_id(
$val
);
}
elsif
(
$tag
eq
"IS_OBSOLETE"
) {
if
(
$val
eq
'true'
) {
$val
= 1;
}
if
(
$val
eq
'false'
) {
$val
= 0;
}
$term
->is_obsolete(
$val
);
}
elsif
(
$tag
eq
"COMMENT"
) {
$term
->comment(
$val
);
}
elsif
(
$tag
eq
"RELATIONSHIP"
) {
$self
->_handle_relationship_tag(
$val
);
}
elsif
(
$tag
eq
"IS_A"
) {
$val
=~ s/ //g;
my
$parent_term
=
$self
->_create_term_object();
$parent_term
->identifier(
$val
);
if
(
$self
->{
'_isa_parents'
} ) {
my
$isa_parents_array_ref
=
$self
->{
'_isa_parents'
};
push
(
@$isa_parents_array_ref
,
$parent_term
);
}
else
{
my
@terms_array
;
push
(
@terms_array
,
$parent_term
);
$self
->{
'_isa_parents'
} = \
@terms_array
;
}
}
}
}
return
$term
;
}
sub
_create_term_object {
my
(
$self
) =
@_
;
my
$term
=
$self
->term_factory->create_object();
return
$term
;
}
sub
_extract_quals {
my
(
$self
,
$str
) =
@_
;
my
%q
= ();
if
(
$str
=~ /(.*)\s+(\{.*\})\s*$/ ) {
my
$return_str
= $1;
my
$extr
= $2;
if
(
$extr
) {
my
@qparts
=
$self
->_split_on_comma(
$extr
);
foreach
(
@qparts
) {
if
(/(\w+)=\"(.*)\"/) {
$q
{$1} = $2;
}
elsif
(/(\w+)=\'(.*)\'/) {
$q
{$1} = $2;
}
else
{
warn
(
"$_ in $str"
);
}
}
}
return
(
$return_str
, \
%q
);
}
else
{
return
(
$str
, {} );
}
}
sub
_extract_qstr {
my
(
$self
,
$str
) =
@_
;
my
(
$extr
,
$rem
,
$prefix
) = extract_quotelike(
$str
);
my
$txt
=
$extr
;
$txt
=~ s/^\"//;
$txt
=~ s/\"$//;
if
(
$prefix
) {
warn
(
"illegal prefix: $prefix in: $str"
);
}
my
@extra
= ();
if
(
$rem
=~ /(\w+)\s+(\[.*)/ ) {
$rem
= $2;
push
(
@extra
,
split
(
' '
, $1 ) );
}
my
@parts
= ();
while
( (
$extr
,
$rem
,
$prefix
) = extract_bracketed(
$rem
,
'[]'
) ) {
last
unless
$extr
;
$extr
=~ s/^\[//;
$extr
=~ s/\]$//;
push
(
@parts
,
$extr
)
if
$extr
;
}
@parts
=
map
{
$self
->_split_on_comma(
$_
) }
@parts
;
$txt
=~ s/\\//g;
return
(
$txt
, \
@parts
, \
@extra
);
}
sub
_split_on_comma {
my
(
$self
,
$str
) =
@_
;
my
@parts
= ();
while
(
$str
=~ /(.*[^\\],\s*)(.*)/ ) {
$str
= $1;
my
$part
= $2;
unshift
(
@parts
,
$part
);
$str
=~ s/,\s*$//;
}
unshift
(
@parts
,
$str
);
return
map
{ s/\\//g;
$_
}
@parts
;
}
sub
_check_colon {
my
(
$self
,
$line
,
$line_no
) =
@_
;
if
(
$line
&& !(
$line
=~ /:/ ) ) {
$self
->throw(
"OBO File Format Error on line $line_no $line - \nCannot find key-terminating colon\n"
);
}
}
sub
_handle_relationship_tag {
my
(
$self
,
$val
) =
@_
;
my
@parts
=
split
( / /,
$val
);
my
$relationship
=
uc
(
$parts
[0]);
my
$id
=
$parts
[1] =~ /\^(w+)\s+\!/ ? $1 :
$parts
[1];
my
$parent_term
=
$self
->_create_term_object();
$parent_term
->identifier(
$id
);
if
(
my
$realtionships_hash
=
$self
->{
'_relationships'
} ) {
my
$id_array_ref
=
$$realtionships_hash
{
$relationship
};
if
( !
$id_array_ref
) {
my
@ids
;
push
(
@ids
,
$id
);
$$realtionships_hash
{
$relationship
} = \
@ids
;
}
else
{
push
(
@$id_array_ref
,
$id
);
}
}
}
sub
_to_annotation {
my
(
$self
,
$links
) =
@_
;
return
unless
$links
;
my
@dbxrefs
;
for
my
$string
(@{
$links
}) {
my
(
$db
,
$id
) =
split
(
':'
,
$string
);
push
@dbxrefs
, FAST::Bio::Annotation::DBLink->new(
-database
=>
$db
,
-primary_id
=>
$id
);
}
return
\
@dbxrefs
;
}
1;