package
FAST::Bio::OntologyIO::dagflat;
sub
_initialize {
my
(
$self
,
%arg
) =
@_
;
my
(
$defs_file_name
,
$files
,
$defs_url
,
$url
,
$name
,
$eng
) =
$self
->_rearrange([
qw( DEFS_FILE
FILES
DEFS_URL
URL
ONTOLOGY_NAME
ENGINE)
],
%arg
);
delete
(
$arg
{-url});
$self
->SUPER::_initialize(
%arg
);
$self
->_done( FALSE );
$self
->_not_first_record( FALSE );
$self
->_term(
""
);
delete
$self
->{
'_ontologies'
};
$eng
= FAST::Bio::Ontology::OBOEngine->new()
unless
$eng
;
if
(
$eng
->isa(
"FAST::Bio::Ontology::OntologyI"
)) {
$self
->ontology_name(
$eng
->name());
$eng
=
$eng
->engine()
if
$eng
->can(
'engine'
);
}
$self
->_ont_engine(
$eng
);
if
(
defined
(
$defs_file_name
) &&
defined
(
$defs_url
)){
$self
->throw(
'cannot provide both -defs_file and -defs_url'
);
}
else
{
defined
(
$defs_file_name
) &&
$self
->defs_file(
$defs_file_name
);
defined
(
$defs_url
) &&
$self
->defs_url(
$defs_url
);
}
if
(
defined
(
$files
) &&
defined
(
$url
)){
}
elsif
(
defined
(
$files
)){
$self
->{_flat_files} =
$files
?
ref
(
$files
) ?
$files
: [
$files
] : [];
}
elsif
(
defined
(
$url
)){
$self
->url(
$url
);
}
$self
->ontology_name(
$name
)
if
$name
;
}
sub
ontology_name{
my
$self
=
shift
;
return
$self
->{
'ontology_name'
} =
shift
if
@_
;
return
$self
->{
'ontology_name'
};
}
sub
parse {
my
$self
=
shift
;
$self
->term_factory(FAST::Bio::Ontology::TermFactory->new(
-type
=>
"FAST::Bio::Ontology::Term"
))
unless
$self
->term_factory();
my
$ont
= FAST::Bio::Ontology::Ontology->new(
-name
=>
$self
->ontology_name(),
-engine
=>
$self
->_ont_engine());
while
(
my
$term
=
$self
->_next_term() ) {
$self
->_add_term(
$term
,
$ont
);
}
foreach
(
$self
->_part_of_relationship(),
$self
->_is_a_relationship(),
$self
->_related_to_relationship()) {
$_
->ontology(
$ont
);
}
if
(!
$self
->_fh) {
if
(
$self
->url){
if
(
ref
(
$self
->url) eq
'ARRAY'
){
foreach
my
$url
(@{
$self
->url }){
$self
->_initialize_io(
-url
=>
$url
);
$self
->_parse_flat_file(
$ont
);
}
$self
->
close
();
}
else
{
$self
->_initialize_io(
-url
=>
$self
->url);
}
}
elsif
(
$self
->_flat_files){
$self
->_initialize_io(
-file
=>
shift
(@{
$self
->_flat_files()}));
}
}
while
(
$self
->_fh) {
$self
->_parse_flat_file(
$ont
);
if
(@{
$self
->_flat_files()}) {
$self
->
close
();
$self
->_initialize_io(
-file
=>
shift
(@{
$self
->_flat_files()}));
}
else
{
last
;
}
}
$self
->_add_ontology(
$ont
);
return
$self
->_ont_engine();
}
sub
next_ontology {
my
$self
=
shift
;
$self
->parse()
unless
exists
(
$self
->{
'_ontologies'
});
if
(
exists
(
$self
->{
'_ontologies'
})){
my
$ont
=
shift
(@{
$self
->{
'_ontologies'
}});
if
(
$ont
){
my
$store
= FAST::Bio::Ontology::OntologyStore->new();
$store
->register_ontology(
$ont
);
return
$ont
;
}
}
return
;
}
sub
defs_file {
my
$self
=
shift
;
if
(
@_
) {
my
$f
=
shift
;
$self
->{
"_defs_file_name"
} =
$f
;
$self
->_defs_io->
close
()
if
$self
->_defs_io();
if
(
defined
(
$f
)) {
$self
->_defs_io( FAST::Bio::Root::IO->new(
-input
=>
$f
) );
}
}
return
$self
->{
"_defs_file_name"
};
}
sub
defs_url {
my
$self
=
shift
;
my
$val
=
shift
;
if
(
defined
(
$val
)){
$self
->{
'_defs_url'
} =
$val
;
$self
->_defs_io->
close
()
if
$self
->_defs_io();
$self
->_defs_io( FAST::Bio::Root::IO->new(
-url
=>
$val
) );
}
return
$self
->{
'_defs_url'
};
}
sub
url {
my
$self
=
shift
;
my
$val
=
shift
;
if
(
defined
(
$val
)){
$self
->{
'_url'
} =
$val
;
}
return
$self
->{
'_url'
};
}
sub
close
{
my
$self
=
shift
;
$self
->SUPER::
close
();
$self
->_defs_io->
close
()
if
$self
->_defs_io();
}
sub
_flat_files {
my
$self
=
shift
;
$self
->{_flat_files} = []
unless
exists
(
$self
->{_flat_files});
return
$self
->{_flat_files};
}
sub
_defs_io{
my
$self
=
shift
;
return
$self
->{
'_defs_io'
} =
shift
if
@_
;
return
$self
->{
'_defs_io'
};
}
sub
_add_ontology {
my
$self
=
shift
;
$self
->{
'_ontologies'
} = []
unless
exists
(
$self
->{
'_ontologies'
});
foreach
my
$ont
(
@_
) {
$self
->throw(
ref
(
$ont
).
" does not implement FAST::Bio::Ontology::OntologyI"
)
unless
ref
(
$ont
) &&
$ont
->isa(
"FAST::Bio::Ontology::OntologyI"
);
$ont
->name(
$self
->ontology_name)
unless
$ont
->name();
push
(@{
$self
->{
'_ontologies'
}},
$ont
);
}
}
sub
_add_term {
my
(
$self
,
$term
,
$ont
) =
@_
;
$term
->ontology(
$ont
)
if
$ont
&& (!
$term
->ontology);
$self
->_ont_engine()->add_term(
$term
);
}
sub
_part_of_relationship {
my
$self
=
shift
;
return
$self
->_ont_engine()->part_of_relationship(
@_
);
}
sub
_is_a_relationship {
my
$self
=
shift
;
return
$self
->_ont_engine()->is_a_relationship(
@_
);
}
sub
_related_to_relationship {
my
$self
=
shift
;
return
$self
->_ont_engine()->related_to_relationship(
@_
);
}
sub
_add_relationship {
my
(
$self
,
$parent
,
$child
,
$type
,
$ont
) =
@_
;
$self
->_ont_engine()->add_relationship(
$child
,
$type
,
$parent
,
$ont
);
}
sub
_has_term {
my
$self
=
shift
;
return
$self
->_ont_engine()->has_term(
@_
);
}
sub
_parse_flat_file {
my
$self
=
shift
;
my
$ont
=
shift
;
my
@stack
= ();
my
$prev_spaces
= -1;
my
$prev_term
=
""
;
while
(
my
$line
=
$self
->_readline() ) {
if
(
$line
=~ /^!/ ) {
next
;
}
my
@termspecs
=
split
(/ (?=[%<])/,
$line
);
shift
(
@termspecs
)
if
$termspecs
[0] =~ /^\s*$/;
my
$current_term
=
$self
->_get_first_termid(
$termspecs
[0] );
my
@syns
=
$self
->_get_synonyms(
$termspecs
[0] );
my
@sec_go_ids
=
$self
->_get_secondary_termids(
$termspecs
[0] );
my
@cross
=
$self
->_get_db_cross_refs(
$termspecs
[0] );
my
@cross_refs
;
foreach
my
$cross_ref
(
@cross
) {
$cross_ref
eq
$current_term
&&
next
;
push
(
@cross_refs
,
$cross_ref
);
}
shift
(
@termspecs
);
my
@isa_parents
= ();
my
@partof_parents
= ();
foreach
my
$parent
(
@termspecs
) {
if
(
index
(
$parent
,
"%"
) == 0) {
push
(
@isa_parents
,
$self
->_get_first_termid(
$parent
));
}
elsif
(
index
(
$parent
,
"<"
) == 0) {
push
(
@partof_parents
,
$self
->_get_first_termid(
$parent
));
}
else
{
$self
->
warn
(
"unhandled relationship type in '"
.
$parent
.
"'"
);
}
}
if
( !
$self
->_has_term(
$current_term
) ) {
my
$term
=
$self
->_create_ont_entry(
$self
->_get_name(
$line
,
$current_term
),
$current_term
);
$self
->_add_term(
$term
,
$ont
);
}
my
$current_term_object
=
$self
->_ont_engine()->get_terms(
$current_term
);
my
$anno
=
$self
->_to_annotation(\
@cross_refs
);
$current_term_object
->add_dbxref(
-dbxrefs
=>
$anno
);
$current_term_object
->add_secondary_id(
@sec_go_ids
);
$current_term_object
->add_synonym(
@syns
);
unless
(
$line
=~ /^\$/ ) {
$current_term_object
->ontology(
$ont
);
}
foreach
my
$parent
(
@isa_parents
) {
if
( !
$self
->_has_term(
$parent
) ) {
my
$term
=
$self
->_create_ont_entry(
$self
->_get_name(
$line
,
$parent
),
$parent
);
$self
->_add_term(
$term
,
$ont
);
}
$self
->_add_relationship(
$parent
,
$current_term
,
$self
->_is_a_relationship(),
$ont
);
}
foreach
my
$parent
(
@partof_parents
) {
if
( !
$self
->_has_term(
$parent
) ) {
my
$term
=
$self
->_create_ont_entry(
$self
->_get_name(
$line
,
$parent
),
$parent
);
$self
->_add_term(
$term
,
$ont
);
}
$self
->_add_relationship(
$parent
,
$current_term
,
$self
->_part_of_relationship(),
$ont
);
}
my
$current_spaces
=
$self
->_count_spaces(
$line
);
if
(
$current_spaces
!=
$prev_spaces
) {
if
(
$current_spaces
==
$prev_spaces
+ 1 ) {
push
(
@stack
,
$prev_term
);
}
elsif
(
$current_spaces
<
$prev_spaces
) {
my
$n
=
$prev_spaces
-
$current_spaces
;
for
(
my
$i
= 0;
$i
<
$n
; ++
$i
) {
pop
(
@stack
);
}
}
else
{
$self
->throw(
"format error (file "
.
$self
->file.
")"
);
}
}
my
$parent
=
$stack
[
@stack
- 1 ];
if
(
index
(
$line
,
'$'
) != 0 ) {
if
(
$line
!~ /^\s*([<%~]|\@\w+?\@)/ ) {
$self
->throw(
"format error (file "
.
$self
->file.
") offending line:\n$line"
);
}
my
(
$relstring
) =
$line
=~ /^\s*([<%~]|\@[^\@]+?\@)/;
my
$reltype
;
if
(
$relstring
eq
'<'
) {
$reltype
=
$self
->_part_of_relationship;
}
elsif
(
$relstring
eq
'%'
) {
$reltype
=
$self
->_is_a_relationship;
}
elsif
(
$relstring
eq
'~'
) {
$reltype
=
$self
->_related_to_relationship;
}
else
{
$relstring
=~ s/\@//g;
if
(
$self
->_ont_engine->get_relationship_type(
$relstring
)) {
$reltype
=
$self
->_ont_engine->get_relationship_type(
$relstring
);
}
else
{
$self
->_ont_engine->add_relationship_type(
$relstring
,
$ont
);
$reltype
=
$self
->_ont_engine->get_relationship_type(
$relstring
);
}
}
$self
->_add_relationship(
$parent
,
$current_term
,
$reltype
,
$ont
);
}
$prev_spaces
=
$current_spaces
;
$prev_term
=
$current_term
;
}
return
$ont
;
}
sub
_get_first_termid {
my
(
$self
,
$line
) =
@_
;
if
(
$line
=~ /;\s*([A-Z_]{1,8}:\d{1,})/ ) {
return
$1;
}
else
{
$self
->throw(
"format error: no term id in line \"$line\""
);
}
}
sub
_get_name {
my
(
$self
,
$line
,
$termid
) =
@_
;
if
(
$line
=~ /([^;<%~]+);\s
*$termid
/ ) {
my
$name
= $1;
$name
=~ s/\s+$//;
$name
=~ s/^\s+//;
$name
=~ s/\@.+?\@//;
if
(
index
(
$name
,
'$'
) == 0) {
$name
=
substr
(
$name
,1);
$self
->ontology_name(
join
(
" "
,
split
(/_/,
$name
)))
unless
$self
->ontology_name();
}
return
$name
;
}
else
{
return
;
}
}
sub
_get_synonyms {
my
(
$self
,
$line
) =
@_
;
my
@synonyms
= ();
while
(
$line
=~ /synonym\s*:\s*([^;<%~]+)/g ) {
my
$syn
= $1;
$syn
=~ s/\s+$//;
$syn
=~ s/^\s+//;
push
(
@synonyms
,
$syn
);
}
return
@synonyms
;
}
sub
_get_db_cross_refs {
my
(
$self
,
$line
) =
@_
;
my
@refs
= ();
while
(
$line
=~ /;([^;<%~:]+:[^;<%~:]+)/g ) {
my
$ref
= $1;
if
(
$ref
=~ /synonym/ ||
$ref
=~ /[A-Z]{1,8}:\d{3,}/ ) {
next
;
}
$ref
=~ s/\s+$//;
$ref
=~ s/^\s+//;
$ref
=
$self
->unescape(
$ref
);
push
(
@refs
,
$ref
)
if
defined
$ref
;
}
return
@refs
;
}
sub
_get_secondary_termids {
my
(
$self
,
$line
) =
@_
;
my
@secs
= ();
while
(
$line
=~ /,\s*(\w+:\w+)/g ) {
my
$sec
= $1;
push
(
@secs
,
$sec
);
}
return
@secs
;
}
sub
_count_spaces {
my
(
$self
,
$line
) =
@_
;
if
(
$line
=~ /^(\s+)/ ) {
return
length
( $1 );
}
else
{
return
0;
}
}
sub
_next_term {
my
(
$self
) =
@_
;
if
( (
$self
->_done() == TRUE) || (!
$self
->_defs_io())) {
return
;
}
my
$line
=
""
;
my
$termid
=
""
;
my
$next_term
=
$self
->_term();
my
$def
=
""
;
my
$comment
=
""
;
my
@def_refs
= ();
my
$isobsolete
;
while
(
$line
= (
$self
->_defs_io->_readline() ) ) {
if
(
$line
!~ /\S/
||
$line
=~ /^\s*!/ ) {
next
;
}
elsif
(
$line
=~ /^\s
*term
:\s*(.+)/ ) {
$self
->_term( $1 );
last
if
$self
->_not_first_record();
$next_term
= $1;
$self
->_not_first_record( TRUE );
}
elsif
(
$line
=~ /^\s*[a-z]{0,8}id:\s*(.+)/ ) {
$termid
= $1;
}
elsif
(
$line
=~ /^\s
*definition
:\s*(.+)/ ) {
$def
=
$self
->unescape($1);
$isobsolete
= 1
if
index
(
$def
,
"OBSOLETE"
) == 0;
}
elsif
(
$line
=~ /^\s
*definition_reference
:\s*(.+)/ ) {
push
(
@def_refs
,
$self
->unescape($1) );
}
elsif
(
$line
=~ /^\s
*comment
:\s*(.+)/ ) {
$comment
=
$self
->unescape($1);
}
}
$self
->_done( TRUE )
unless
$line
;
return
$self
->_create_ont_entry(
$next_term
,
$termid
,
$def
,
$comment
, \
@def_refs
,
$isobsolete
);
}
sub
_ont_engine {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
"_ont_engine"
} =
$value
;
}
return
$self
->{
"_ont_engine"
};
}
sub
_create_ont_entry {
my
(
$self
,
$name
,
$termid
,
$def
,
$cmt
,
$dbxrefs
,
$obsolete
) =
@_
;
if
((!
defined
(
$obsolete
)) && (
index
(
lc
(
$name
),
"obsolete"
) == 0)) {
$obsolete
= 1;
}
my
$anno
=
$self
->_to_annotation(
$dbxrefs
);
my
$term
=
$self
->term_factory->create_object(
-name
=>
$name
,
-identifier
=>
$termid
,
-definition
=>
$def
,
-comment
=>
$cmt
,
-dbxrefs
=>
$anno
,
-is_obsolete
=>
$obsolete
);
return
$term
;
}
sub
_not_first_record {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
"_not_first_record"
} =
$value
;
}
return
$self
->{
"_not_first_record"
};
}
sub
_done {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
"_done"
} =
$value
;
}
return
$self
->{
"_done"
};
}
sub
_term {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
"_term"
} =
$value
;
}
return
$self
->{
"_term"
};
}
sub
_to_annotation {
my
(
$self
,
$links
) =
@_
;
return
unless
$links
;
my
@dbxrefs
;
for
my
$string
(@{
$links
}) {
my
(
$db
,
$id
) =
split
(
':'
,
$string
);
push
@dbxrefs
, FAST::Bio::Annotation::DBLink->new(
-database
=>
$db
,
-primary_id
=>
$id
);
}
return
\
@dbxrefs
;
}
1;