use
vars
qw($EntrezLocation $UrlParamSeparatorValue %EntrezParams
$EntrezGet $EntrezSummary $EntrezFetch %SequenceParams
$XMLTWIG $DATA_CACHE $RELATIONS)
;
eval
{
$XMLTWIG
= 1;
};
if
( $@ ) {
$XMLTWIG
= 0;
}
$EntrezGet
=
'esearch.fcgi'
;
$EntrezFetch
=
'efetch.fcgi'
;
$EntrezSummary
=
'esummary.fcgi'
;
$DATA_CACHE
= {};
$RELATIONS
= {};
%EntrezParams
= (
'db'
=>
'taxonomy'
,
'report'
=>
'xml'
,
'retmode'
=>
'xml'
,
'tool'
=>
'Bioperl'
);
%SequenceParams
= (
'db'
=>
'nucleotide'
,
'retmode'
=>
'xml'
,
'tool'
=>
'Bioperl'
);
$UrlParamSeparatorValue
=
'&'
;
sub
new {
my
(
$class
,
@args
) =
@_
;
my
(
$self
) =
$class
->SUPER::new(
@args
);
$self
->_initialize(
@args
);
return
$self
;
}
sub
_initialize {
my
(
$self
) =
shift
;
$self
->SUPER::_initialize(
@_
);
my
(
$location
,
$params
) =
$self
->_rearrange([
qw(LOCATION PARAMS)
],
@_
);
if
(
$params
) {
if
(
ref
(
$params
) !~ /HASH/i ) {
$self
->
warn
(
"Must have provided a valid HASHref for -params"
);
$params
= \
%EntrezParams
;
}
}
else
{
$params
= \
%EntrezParams
;
}
$self
->entrez_params(
$params
);
$self
->entrez_url(
$location
||
$EntrezLocation
);
}
sub
get_taxon {
my
$self
=
shift
;
if
(!
$XMLTWIG
) {
$self
->throw(
"Could not load XML::Twig for get_taxon(): $@"
)
if
$@;
}
my
%p
=
$self
->entrez_params;
my
(
@taxonids
,
$taxonid
,
$want_full
);
if
(
@_
> 1) {
my
%params
=
@_
;
if
(
$params
{
'-taxonid'
}) {
$taxonid
=
$params
{
'-taxonid'
};
}
elsif
(
$params
{
'-gi'
}) {
my
$db
=
$params
{
'-db'
};
my
%p
=
%SequenceParams
;
my
%items
;
if
(
ref
(
$params
{
'-gi'
}) =~ /ARRAY/i ) {
$p
{
'id'
} =
join
(
','
, @{
$params
{
'-gi'
}});
}
else
{
$p
{
'id'
} =
$params
{
'-gi'
};
}
$p
{
'db'
} =
$db
if
defined
$db
;
my
$params
=
join
(
$UrlParamSeparatorValue
,
map
{
"$_="
.
$p
{
$_
} }
keys
%p
);
my
$url
=
sprintf
(
"%s%s?%s"
,
$self
->entrez_url,
$EntrezSummary
,
$params
);
$self
->debug(
"url is $url\n"
);
my
@ids
;
if
(
exists
$DATA_CACHE
->{gi_to_ids}->{
$url
}) {
@ids
= @{
$DATA_CACHE
->{gi_to_ids}->{
$url
}};
}
else
{
my
$response
=
$self
->get(
$url
);
if
(
$response
->is_success) {
$response
=
$response
->content;
}
else
{
$self
->throw(
"Can't query website: "
.
$response
->status_line);
}
$self
->debug(
"resp is $response\n"
);
my
$twig
= XML::Twig->new;
$twig
->parse(
$response
);
my
$root
=
$twig
->root;
for
my
$topnode
(
$root
->children(
'DocSum'
) ) {
for
my
$child
(
$topnode
->children(
'Item'
) ) {
if
(
uc
(
$child
->{att}->{
'Name'
}) eq
'TAXID'
) {
push
@ids
,
$child
->text;
}
}
}
$DATA_CACHE
->{gi_to_ids}->{
$url
} = \
@ids
;
}
return
$self
->get_taxon(
-taxonid
=> \
@ids
);
}
elsif
(
$params
{
'-name'
}) {
@taxonids
=
$self
->get_taxonid(
$params
{
'-name'
});
}
else
{
$self
->
warn
(
"Need to have provided either a -taxonid or -name value to get_taxon"
);
}
if
(
$params
{
'-full'
}) {
$want_full
= 1;
}
}
else
{
$taxonid
=
shift
;
}
if
(
ref
(
$taxonid
) =~ /ARRAY/i ) {
@taxonids
= @{
$taxonid
};
}
else
{
push
(
@taxonids
,
$taxonid
)
if
$taxonid
;
}
my
@results
;
my
@uncached
;
foreach
my
$taxonid
(
@taxonids
) {
$taxonid
||
$self
->throw(
"In taxonids list one was undef! '@taxonids'\n"
);
if
(
defined
$DATA_CACHE
->{full_info}->{
$taxonid
}) {
push
(
@results
,
$self
->_make_taxon(
$DATA_CACHE
->{full_info}->{
$taxonid
}));
}
elsif
(!
$want_full
&&
defined
$DATA_CACHE
->{minimal_info}->{
$taxonid
}) {
push
(
@results
,
$self
->_make_taxon(
$DATA_CACHE
->{minimal_info}->{
$taxonid
}));
}
else
{
push
(
@uncached
,
$taxonid
);
}
}
if
(
@uncached
> 0) {
$taxonid
=
join
(
','
,
@uncached
);
$p
{
'id'
} =
$taxonid
;
$self
->debug(
"id is $taxonid\n"
);
my
$params
=
join
(
$UrlParamSeparatorValue
,
map
{
"$_="
.
$p
{
$_
} }
keys
%p
);
my
$url
=
sprintf
(
"%s%s?%s"
,
$self
->entrez_url,
$EntrezFetch
,
$params
);
$self
->debug(
"url is $url\n"
);
my
$response
=
$self
->get(
$url
);
if
(
$response
->is_success) {
$response
=
$response
->content;
}
else
{
$self
->throw(
"Can't query website: "
.
$response
->status_line);
}
$self
->debug(
"resp is $response\n"
);
my
$twig
= XML::Twig->new;
$twig
->parse(
$response
);
my
$root
=
$twig
->root;
for
my
$taxon
(
$root
->children(
'Taxon'
) ) {
my
$taxid
=
$taxon
->first_child_text(
'TaxId'
);
$self
->throw(
"Got a result with no TaxId!"
)
unless
$taxid
;
my
$data
= {};
if
(
exists
$DATA_CACHE
->{minimal_info}->{
$taxid
}) {
$data
=
$DATA_CACHE
->{minimal_info}->{
$taxid
};
}
$data
->{id} =
$taxid
;
$data
->{rank} =
$taxon
->first_child_text(
'Rank'
);
my
$other_names
=
$taxon
->first_child(
'OtherNames'
);
my
@other_names
=
$other_names
->children_text()
if
$other_names
;
my
$sci_name
=
$taxon
->first_child_text(
'ScientificName'
);
my
$orig_sci_name
=
$sci_name
;
$sci_name
=~ s/ \(class\)$//;
push
(
@other_names
,
$orig_sci_name
)
if
$orig_sci_name
ne
$sci_name
;
$data
->{scientific_name} =
$sci_name
;
$data
->{common_names} = \
@other_names
;
$data
->{division} =
$taxon
->first_child_text(
'Division'
);
$data
->{genetic_code} =
$taxon
->first_child(
'GeneticCode'
)->first_child_text(
'GCId'
);
$data
->{mitochondrial_genetic_code} =
$taxon
->first_child(
'MitoGeneticCode'
)->first_child_text(
'MGCId'
);
$data
->{create_date} =
$taxon
->first_child_text(
'CreateDate'
);
$data
->{update_date} =
$taxon
->first_child_text(
'UpdateDate'
);
$data
->{pub_date} =
$taxon
->first_child_text(
'PubDate'
);
my
$lineage_ex
=
$taxon
->first_child(
'LineageEx'
);
my
(
$ancestor
,
$lineage_data
,
@taxa
);
foreach
my
$lineage_taxon
(
$lineage_ex
->children) {
my
$lineage_taxid
=
$lineage_taxon
->first_child_text(
'TaxId'
);
if
(
exists
$DATA_CACHE
->{minimal_info}->{
$lineage_taxid
} ||
exists
$DATA_CACHE
->{full_info}->{
$lineage_taxid
}) {
$lineage_data
=
$DATA_CACHE
->{minimal_info}->{
$lineage_taxid
} ||
$DATA_CACHE
->{full_info}->{
$lineage_taxid
};
next
;
}
else
{
$lineage_data
= {};
}
$lineage_data
->{id} =
$lineage_taxid
;
$lineage_data
->{scientific_name} =
$lineage_taxon
->first_child_text(
'ScientificName'
);
$lineage_data
->{rank} =
$lineage_taxon
->first_child_text(
'Rank'
);
$RELATIONS
->{ancestors}->{
$lineage_taxid
} =
$ancestor
->{id}
if
$ancestor
;
$DATA_CACHE
->{minimal_info}->{
$lineage_taxid
} =
$lineage_data
;
}
continue
{
$ancestor
=
$lineage_data
;
unshift
(
@taxa
,
$lineage_data
); }
$RELATIONS
->{ancestors}->{
$taxid
} =
$ancestor
->{id}
if
$ancestor
;
my
$child
=
$data
;
foreach
my
$lineage_data
(
@taxa
) {
$RELATIONS
->{children}->{
$lineage_data
->{id}}->{
$child
->{id}} = 1;
}
continue
{
$child
=
$lineage_data
; }
delete
$DATA_CACHE
->{minimal_info}->{
$taxid
};
$DATA_CACHE
->{full_info}->{
$taxid
} =
$data
;
push
(
@results
,
$self
->_make_taxon(
$data
));
}
}
wantarray
() ?
@results
:
shift
@results
;
}
*get_Taxonomy_Node
= \
&get_taxon
;
sub
get_taxonids {
my
(
$self
,
$query
) =
@_
;
my
%p
=
$self
->entrez_params;
if
(
$query
=~ /<.+>/) {
$query
=~ s/ <(.+?)>//;
my
$desired_parent_name
=
lc
($1);
ID:
foreach
my
$start_id
(
$self
->get_taxonids(
$query
)) {
my
$node
=
$self
->get_taxon(
$start_id
) ||
next
ID;
while
(1) {
my
$parent_node
=
$self
->ancestor(
$node
) ||
next
ID;
my
$parent_sci_name
=
$parent_node
->scientific_name ||
next
ID;
my
@parent_common_names
=
$parent_node
->common_names;
unless
(
@parent_common_names
) {
$parent_node
=
$self
->get_taxon(
-taxonid
=>
$parent_node
->id,
-full
=> 1);
@parent_common_names
=
$parent_node
->common_names;
}
foreach
my
$name
(
$parent_sci_name
,
@parent_common_names
) {
if
(
lc
(
$name
) eq
$desired_parent_name
) {
return
wantarray
() ? (
$start_id
) :
$start_id
;
}
}
my
$parent_rank
=
$parent_node
->rank ||
'no rank'
;
if
(
$parent_rank
ne
'no rank'
) {
last
;
}
else
{
$node
=
$parent_node
;
}
}
}
return
;
}
$query
=~ s/[\"\(\)]//g;
$query
=~ s/\s/+/g;
my
@data
;
if
(
defined
$DATA_CACHE
->{name_to_id}->{
$query
}) {
@data
= @{
$DATA_CACHE
->{name_to_id}->{
$query
}};
}
else
{
$p
{
'term'
} =
$query
;
my
$params
=
join
(
$UrlParamSeparatorValue
,
map
{
"$_="
.
$p
{
$_
} }
keys
%p
);
my
$url
=
sprintf
(
"%s%s?%s"
,
$self
->entrez_url,
$EntrezGet
,
$params
);
my
$response
=
$self
->get(
$url
);
if
(
$response
->is_success) {
$response
=
$response
->content;
}
else
{
$self
->throw(
"Can't query website: "
.
$response
->status_line);
}
$self
->debug(
"response is $response\n"
);
my
$twig
= XML::Twig->new;
$twig
->parse(
$response
);
my
$root
=
$twig
->root;
my
$list
=
$root
->first_child(
'IdList'
);
@data
=
map
{
$_
->text }
$list
->children(
'Id'
);
$DATA_CACHE
->{name_to_id}->{
$query
} = [
@data
];
}
wantarray
() ?
@data
:
shift
@data
;
}
*get_taxonid
= \
&get_taxonids
;
sub
ancestor {
my
(
$self
,
$taxon
) =
@_
;
$self
->throw(
"Must supply a FAST::Bio::Taxon"
)
unless
ref
(
$taxon
) &&
$taxon
->isa(
'FAST::Bio::Taxon'
);
$self
->throw(
"The supplied Taxon must belong to this database"
)
unless
$taxon
->db_handle &&
$taxon
->db_handle eq
$self
;
my
$id
=
$taxon
->id ||
$self
->throw(
"The supplied Taxon is missing its id!"
);
my
$ancestor_id
=
$RELATIONS
->{ancestors}->{
$id
} ||
return
;
return
$self
->_make_taxon(
$DATA_CACHE
->{full_info}->{
$ancestor_id
} ||
$DATA_CACHE
->{minimal_info}->{
$ancestor_id
});
}
sub
each_Descendent {
my
(
$self
,
$taxon
) =
@_
;
$self
->throw(
"Must supply a FAST::Bio::Taxon"
)
unless
ref
(
$taxon
) &&
$taxon
->isa(
'FAST::Bio::Taxon'
);
$self
->throw(
"The supplied Taxon must belong to this database"
)
unless
$taxon
->db_handle &&
$taxon
->db_handle eq
$self
;
my
$id
=
$taxon
->id ||
$self
->throw(
"The supplied Taxon is missing its id!"
);
my
@children_ids
=
keys
%{
$RELATIONS
->{children}->{
$id
} || {}};
my
@children
;
foreach
my
$child_id
(
@children_ids
) {
push
(
@children
,
$self
->_make_taxon(
$DATA_CACHE
->{full_info}->{
$child_id
} ||
$DATA_CACHE
->{minimal_info}->{
$child_id
}));
}
return
@children
;
}
sub
entrez_url{
my
$self
=
shift
;
return
$self
->{
'_entrez_url'
} =
shift
if
@_
;
return
$self
->{
'_entrez_url'
};
}
sub
entrez_params{
my
$self
=
shift
;
my
$f
;
if
(
@_
) {
$f
=
$self
->{
'_entrez_params'
} =
shift
;
}
else
{
$f
=
$self
->{
'_entrez_params'
};
}
return
%$f
;
}
sub
_make_taxon {
my
(
$self
,
$data
) =
@_
;
my
$taxon
= FAST::Bio::Taxon->new();
my
$taxid
;
while
(
my
(
$method
,
$value
) =
each
%{
$data
}) {
if
(
$method
eq
'id'
) {
$method
=
'ncbi_taxid'
;
$taxid
=
$value
;
}
$taxon
->
$method
(
ref
(
$value
) eq
'ARRAY'
? @{
$value
} :
$value
);
}
$taxon
->{
'db_handle'
} =
$self
;
$self
->_handle_internal_id(
$taxon
);
return
$taxon
;
}
1;