use
vars
qw(@Citations $Callback $Convert @ObjectStack @PCDataStack)
;
use
vars
qw(%PCDATA_NAMES %SIMPLE_TREATMENT %POP_DATA_AND_PEEK_OBJ %POP_OBJ_AND_PEEK_OBJ)
;
use
vars
qw(%POP_AND_ADD_ELEMENT %POP_AND_ADD_DATA_ELEMENT)
;
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
my
%param
=
@args
;
@param
{
map
{
lc
$_
}
keys
%param
} =
values
%param
;
my
$new_key
;
foreach
my
$key
(
keys
%param
) {
(
$new_key
=
$key
) =~ s/^-/_/;
$self
->{
lc
$new_key
} =
$param
{
$key
};
}
my
$result
=
$self
->{
'_result'
} ||
'medline2ref'
;
$result
=
"\L$result"
;
unless
(
$result
eq
'raw'
) {
if
(
defined
&Bio::Biblio::IO::_load_format_module
(
$result
)) {
$Convert
=
"Bio::Biblio::IO::$result"
->new (
@args
);
}
}
$self
->{
'_xml_parser'
} = new XML::Parser (
Handlers
=> {
Init
=> \
&handle_doc_start
,
Start
=> \
&handle_start
,
End
=> \
&handle_end
,
Char
=> \
&handle_char
,
Final
=> \
&handle_doc_end
})
unless
$self
->{
'_xml_parser'
};
if
(
$Callback
=
$self
->{
'_callback'
}) {
$self
->_parse;
}
}
sub
_parse {
my
(
$self
) =
shift
;
if
(
defined
$self
->{
'_file'
}) {
$self
->{
'_xml_parser'
}->parsefile (
$self
->{
'_file'
});
}
elsif
(
defined
$self
->{
'_fh'
}) {
my
$fh
=
$self
->{
'_fh'
};
if
(
ref
(
$fh
) and UNIVERSAL::isa (
$fh
,
'IO::Handler'
)) {
$self
->{
'_xml_parser'
}->parse (
$fh
);
}
else
{
my
$data
;
$data
.=
$_
while
<
$fh
>;
$self
->{
'_xml_parser'
}->parse (
$data
);
}
}
elsif
(
$self
->{
'_data'
}) {
$self
->{
'_xml_parser'
}->parse (
$self
->{
'_data'
});
}
else
{
$self
->throw (
"XML source to be parsed is unknown. Should be given in the new()."
);
}
if
(
@Citations
) {
$self
->{
'_citations'
} = [];
foreach
my
$cit
(
@Citations
) {
push
(@{
$self
->{
'_citations'
} },
$cit
);
undef
$cit
;
}
undef
@Citations
;
}
}
@Citations
= ();
$Callback
=
undef
;
$Convert
=
undef
;
@ObjectStack
= ();
@PCDataStack
= ();
sub
next_bibref {
my
(
$self
) =
@_
;
$self
->throw (
"Method 'next_bibref' should not be called when a '-callback' argument given."
)
if
$self
->{
'_callback'
};
$self
->_parse
unless
$self
->{
'_citations'
};
shift
(@{
$self
->{
'_citations'
} });
}
%PCDATA_NAMES
= (
'AbstractText'
=> 1,
'AccessionNumber'
=> 1,
'Acronym'
=> 1,
'Affiliation'
=> 1,
'Agency'
=> 1,
'ArticleTitle'
=> 1,
'CASRegistryNumber'
=> 1,
'CitationSubset'
=> 1,
'Coden'
=> 1,
'CollectionTitle'
=> 1,
'CollectiveName'
=> 1,
'CopyrightInformation'
=> 1,
'Country'
=> 1,
'DataBankName'
=> 1,
'DateOfElectronicPublication'
=> 1,
'Day'
=> 1,
'Descriptor'
=> 1,
'DescriptorName'
=> 1,
'EndPage'
=> 1,
'FirstName'
=> 1,
'ForeName'
=> 1,
'GeneralNote'
=> 1,
'GeneSymbol'
=> 1,
'GrantID'
=> 1,
'Hour'
=> 1,
'ISOAbbreviation'
=> 1,
'ISSN'
=> 1,
'Initials'
=> 1,
'Issue'
=> 1,
'Keyword'
=> 1,
'Language'
=> 1,
'LastName'
=> 1,
'MedlineCode'
=> 1,
'MedlineDate'
=> 1,
'MedlineID'
=> 1,
'MedlinePgn'
=> 1,
'MedlineTA'
=> 1,
'MiddleName'
=> 1,
'Minute'
=> 1,
'Month'
=> 1,
'NameOfSubstance'
=> 1,
'NlmUniqueID'
=> 1,
'Note'
=> 1,
'NumberOfReferences'
=> 1,
'OtherID'
=> 1,
'PMID'
=> 1,
'PublicationType'
=> 1,
'Publisher'
=> 1,
'QualifierName'
=> 1,
'RefSource'
=> 1,
'RegistryNumber'
=> 1,
'Season'
=> 1,
'Second'
=> 1,
'SpaceFlightMission'
=> 1,
'StartPage'
=> 1,
'SubHeading'
=> 1,
'Suffix'
=> 1,
'Title'
=> 1,
'VernacularTitle'
=> 1,
'Volume'
=> 1,
'Year'
=> 1,
);
%SIMPLE_TREATMENT
= (
'MeshHeading'
=> 1,
'Author'
=> 1,
'Article'
=> 1,
'Book'
=> 1,
'Investigator'
=> 1,
'Chemical'
=> 1,
'Pagination'
=> 1,
'MedlineJournalInfo'
=> 1,
'JournalIssue'
=> 1,
'Journal'
=> 1,
'DateCreated'
=> 1,
'DateCompleted'
=> 1,
'DateRevised'
=> 1,
'PubDate'
=> 1,
'Abstract'
=> 1,
'Grant'
=> 1,
'CommentsCorrections'
=> 1,
'CommentOn'
=> 1,
'CommentIn'
=> 1,
'ErratumFor'
=> 1,
'ErratumIn'
=> 1,
'OriginalReportIn'
=> 1,
'RepublishedFrom'
=> 1,
'RepublishedIn'
=> 1,
'RetractionOf'
=> 1,
'RetractionIn'
=> 1,
'SummaryForPatientsIn'
=> 1,
'UpdateIn'
=> 1,
'UpdateOf'
=> 1,
'DataBank'
=> 1,
'KeywordList'
=> 1,
'DeleteCitation'
=> 1,
);
%POP_DATA_AND_PEEK_OBJ
= (
'Descriptor'
=> 1,
'DescriptorName'
=> 1,
'Year'
=> 1,
'Month'
=> 1,
'Day'
=> 1,
'LastName'
=> 1,
'Initials'
=> 1,
'FirstName'
=> 1,
'ForeName'
=> 1,
'NameOfSubstance'
=> 1,
'RegistryNumber'
=> 1,
'CASRegistryNumber'
=> 1,
'MiddleName'
=> 1,
'NlmUniqueID'
=> 1,
'MedlineTA'
=> 1,
'MedlinePgn'
=> 1,
'MedlineCode'
=> 1,
'Country'
=> 1,
'ISSN'
=> 1,
'ArticleTitle'
=> 1,
'Issue'
=> 1,
'AbstractText'
=> 1,
'VernacularTitle'
=> 1,
'GrantID'
=> 1,
'Agency'
=> 1,
'Acronym'
=> 1,
'MedlineDate'
=> 1,
'NumberOfReferences'
=> 1,
'RefSource'
=> 1,
'DataBankName'
=> 1,
'CopyrightInformation'
=> 1,
'Suffix'
=> 1,
'Note'
=> 1,
'CollectiveName'
=> 1,
'Hour'
=> 1,
'Minute'
=> 1,
'Second'
=> 1,
'Season'
=> 1,
'Coden'
=> 1,
'ISOAbbreviation'
=> 1,
'Publisher'
=> 1,
'CollectionTitle'
=> 1,
'DateOfElectronicPublication'
=> 1,
'StartPage'
=> 1,
'EndPage'
=> 1,
'Volume'
=> 1,
'Title'
=> 1,
);
%POP_OBJ_AND_PEEK_OBJ
= (
'Pagination'
=> 1,
'JournalIssue'
=> 1,
'Journal'
=> 1,
'DateCreated'
=> 1,
'Article'
=> 1,
'DateCompleted'
=> 1,
'DateRevised'
=> 1,
'CommentsCorrections'
=> 1,
'Book'
=> 1,
'PubDate'
=> 1,
'Abstract'
=> 1,
);
%POP_AND_ADD_DATA_ELEMENT
= (
'Keyword'
=>
'keywords'
,
'PublicationType'
=>
'publicationTypes'
,
'CitationSubset'
=>
'citationSubsets'
,
'Language'
=>
'languages'
,
'AccessionNumber'
=>
'accessionNumbers'
,
'GeneSymbol'
=>
'geneSymbols'
,
'SpaceFlightMission'
=>
'spaceFlightMissions'
,
);
%POP_AND_ADD_ELEMENT
= (
'OtherAbstract'
=>
'otherAbstracts'
,
'Chemical'
=>
'chemicals'
,
'KeywordList'
=>
'keywordLists'
,
'Grant'
=>
'grants'
,
'UpdateIn'
=>
'updateIns'
,
'CommentOn'
=>
'commentOns'
,
'CommentIn'
=>
'commentIns'
,
'DataBank'
=>
'dataBanks'
,
'PersonalNameSubject'
=>
'personalNameSubjects'
,
'ErratumFor'
=>
'erratumFors'
,
'ErratumIn'
=>
'erratumIns'
,
'RepublishedFrom'
=>
'republishedFroms'
,
'RepublishedIn'
=>
'republishedIns'
,
'RetractionOf'
=>
'retractionOfs'
,
'RetractionIn'
=>
'retractionIns'
,
'UpdateOf'
=>
'updateOfs'
,
'OriginalReportIn'
=>
'originalReportIns'
,
'SummaryForPatientsIn'
=>
'summaryForPatientsIns'
,
'MeshHeading'
=>
'meshHeadings'
,
);
sub
handle_doc_start {
@Citations
= ();
@ObjectStack
= ();
@PCDataStack
= ();
}
sub
handle_doc_end {
undef
@ObjectStack
;
undef
@PCDataStack
;
}
sub
handle_char {
my
(
$expat
,
$str
) =
@_
;
return
if
$#PCDataStack
< 0;
$PCDataStack
[
$#PCDataStack
] .=
$str
;
}
sub
handle_start {
my
(
$expat
,
$e
,
%attrs
) =
@_
;
if
(
$e
eq
'QualifierName'
or
$e
eq
'SubHeading'
) {
my
%p
= ();
$p
{
'majorTopic'
} =
"Y"
if
$attrs
{
'MajorTopicYN'
};
push
(
@ObjectStack
, \
%p
);
}
if
(
$e
eq
'GeneralNote'
) {
my
%p
= ();
$p
{
'owner'
} =
$attrs
{
'Owner'
}
if
$attrs
{
'Owner'
};
push
(
@ObjectStack
, \
%p
);
}
if
(
$e
eq
'OtherID'
) {
my
%p
= ();
$p
{
'source'
} =
$attrs
{
'Source'
};
push
(
@ObjectStack
, \
%p
);
}
if
(
$e
eq
'LastName'
or
$e
eq
'FirstName'
or
$e
eq
'MidleName'
or
$e
eq
'Initials'
or
$e
eq
'ForeName'
or
$e
eq
'Suffix'
) {
my
$peek
=
$ObjectStack
[
$#ObjectStack
];
push
(
@ObjectStack
, {
'type'
=>
'PersonalName'
})
unless
(
ref
$peek
and
&_eq_hash_elem
(
$peek
,
'type'
,
'PersonalName'
));
}
if
(
exists
$PCDATA_NAMES
{
$e
}) {
push
(
@PCDataStack
,
''
);
}
elsif
(
exists
$SIMPLE_TREATMENT
{
$e
}) {
push
(
@ObjectStack
, {});
}
elsif
(
$e
eq
'PersonalNameSubject'
) {
push
(
@ObjectStack
, {
'type'
=>
'PersonalName'
});
}
elsif
(
$e
eq
'DescriptorName'
or
$e
eq
'Descriptor'
) {
if
(
&_eq_hash_elem
(\
%attrs
,
'MajorTopicYN'
,
"Y"
)) {
my
$peek
=
$ObjectStack
[
$#ObjectStack
];
$$peek
{
'descriptorMajorTopic'
} =
"Y"
;
}
}
elsif
(
$e
eq
'MedlineCitation'
||
$e
eq
'NCBIArticle'
) {
my
%p
= (
'type'
=>
'MedlineCitation'
);
$p
{
'owner'
} =
$attrs
{
'Owner'
}
if
$attrs
{
'Owner'
};
$p
{
'status'
} =
$attrs
{
'Status'
}
if
$attrs
{
'Status'
};
push
(
@ObjectStack
, \
%p
);
}
elsif
(
$e
eq
'GrantList'
) {
if
(
&_eq_hash_elem
(\
%attrs
,
'CompleteYN'
,
"N"
)) {
my
$peek
=
$ObjectStack
[
$#ObjectStack
];
$$peek
{
'grantListComplete'
} =
"N"
;
}
}
elsif
(
$e
eq
'DataBankList'
) {
if
(
&_eq_hash_elem
(\
%attrs
,
'CompleteYN'
,
"N"
)) {
my
$peek
=
$ObjectStack
[
$#ObjectStack
];
$$peek
{
'dataBankListComplete'
} =
"N"
;
}
}
elsif
(
$e
eq
'AuthorList'
) {
if
(
&_eq_hash_elem
(\
%attrs
,
'CompleteYN'
,
"N"
)) {
my
$peek
=
$ObjectStack
[
$#ObjectStack
];
$$peek
{
'authorListComplete'
} =
"N"
;
}
}
elsif
(
$e
eq
'OtherAbstract'
) {
my
%p
= ();
$p
{
'type'
} =
$attrs
{
'Type'
}
if
$attrs
{
'Type'
};
push
(
@ObjectStack
, \
%p
);
}
}
sub
handle_end {
my
(
$expat
,
$e
) =
@_
;
if
(
$e
eq
'QualifierName'
or
$e
eq
'SubHeading'
) {
my
$p
=
pop
@ObjectStack
;
$$p
{
'subHeading'
} =
pop
@PCDataStack
;
&_add_element
(
'subHeadings'
,
$p
);
return
;
}
elsif
(
$e
eq
'GeneralNote'
) {
my
$p
=
pop
@ObjectStack
;
$$p
{
'generalNote'
} =
pop
@PCDataStack
;
&_add_element
(
'generalNotes'
,
$p
);
return
;
}
elsif
(
$e
eq
'OtherID'
) {
my
$p
=
pop
@ObjectStack
;
$$p
{
'otherID'
} =
pop
@PCDataStack
;
&_add_element
(
'otherIDs'
,
$p
);
return
;
}
if
(
exists
$POP_DATA_AND_PEEK_OBJ
{
$e
}) {
&_data2obj
(
"\l$e"
);
}
elsif
(
exists
$POP_OBJ_AND_PEEK_OBJ
{
$e
}) {
&_obj2obj
(
"\l$e"
);
}
elsif
(
exists
$POP_AND_ADD_ELEMENT
{
$e
}) {
&_add_element
(
$POP_AND_ADD_ELEMENT
{
$e
},
pop
@ObjectStack
);
}
elsif
(
exists
$POP_AND_ADD_DATA_ELEMENT
{
$e
}) {
&_add_element
(
$POP_AND_ADD_DATA_ELEMENT
{
$e
});
}
elsif
(
$e
eq
'Author'
or
$e
eq
'Investigator'
) {
my
$pAuthor
;
my
$p
=
pop
@ObjectStack
;
if
(
&_eq_hash_elem
(
$p
,
'type'
,
'PersonalName'
)) {
$pAuthor
=
pop
@ObjectStack
;
$$pAuthor
{
'personalName'
} =
$p
;
}
else
{
$pAuthor
=
$p
;
}
my
$peek
=
$ObjectStack
[
$#ObjectStack
]; # pMedlineCitation, pArticle or pBook
if
(
&_eq_hash_elem
(
$peek
,
'type'
,
'MedlineCitation'
)) {
&_add_element
(
'investigators'
,
$pAuthor
);
}
else
{
&_add_element
(
'authors'
,
$pAuthor
);
}
}
elsif
(
$e
eq
'MedlineJournalInfo'
) {
&_obj2obj
(
'journalInfo'
);
}
elsif
(
$e
eq
'PMID'
) {
my
$peek
=
$ObjectStack
[
$#ObjectStack
]; # pMedlineCitation, pReference or pDeleteCitation
if
(
&_eq_hash_elem
(
$peek
,
'type'
,
'DeleteCitation'
)) {
&_add_element
(
'PMIDs'
);
}
else
{
$$peek
{
'PMID'
} =
pop
@PCDataStack
;
}
}
elsif
(
$e
eq
'MedlineID'
) {
my
$peek
=
$ObjectStack
[
$#ObjectStack
]; # pMedlineCitation, pReference or pDeleteCitation
if
(
&_eq_hash_elem
(
$peek
,
'type'
,
'DeleteCitation'
)) {
&_add_element
(
'MedlineIDs'
);
}
else
{
$$peek
{
'medlineID'
} =
pop
@PCDataStack
;
}
}
elsif
(
$e
eq
'Affiliation'
) {
my
$peek
=
$ObjectStack
[
$#ObjectStack
];
if
(
&_eq_hash_elem
(
$peek
,
'type'
,
'PersonalName'
)) {
my
$peek2
=
$ObjectStack
[
$#ObjectStack
- 1];
$$peek2
{
'affiliation'
} =
pop
@PCDataStack
;
}
else
{
$$peek
{
'affiliation'
} =
pop
@PCDataStack
;
}
}
elsif
(
$e
eq
'DeleteCitation'
) {
pop
@ObjectStack
;
}
elsif
(
$e
eq
'MedlineCitation'
) {
&_process_citation
(
pop
@ObjectStack
);
}
elsif
(
exists
$PCDATA_NAMES
{
$e
}) {
pop
@PCDataStack
;
warn
(
"An unexpected element found: $e"
);
}
}
sub
_process_citation {
my
(
$citation
) =
@_
;
$citation
=
$Convert
->convert (
$citation
)
if
defined
$Convert
;
if
(
$Callback
) {
&$Callback
(
$citation
);
}
else
{
push
(
@Citations
,
$citation
);
}
}
sub
_add_element {
my
(
$key
,
$element
) =
@_
;
my
$peek
=
$ObjectStack
[
$#ObjectStack
];
$$peek
{
$key
} = []
unless
$$peek
{
$key
};
push
(@{
$$peek
{
$key
} }, (
defined
$element
?
$element
:
pop
@PCDataStack
));
}
sub
_data2obj {
my
(
$key
) =
@_
;
my
$peek
=
$ObjectStack
[
$#ObjectStack
];
$$peek
{
$key
} =
pop
@PCDataStack
;
}
sub
_obj2obj {
my
(
$key
) =
@_
;
my
$p
=
pop
@ObjectStack
;
my
$peek
=
$ObjectStack
[
$#ObjectStack
];
$$peek
{
$key
} =
$p
;
}
sub
_eq_hash_elem {
my
(
$rh
,
$key
,
$value
) =
@_
;
return
(
defined
$$rh
{
$key
} and
$$rh
{
$key
} eq
$value
);
}
use
vars
qw(%DEBUGSTACK)
;
%DEBUGSTACK
= ();
sub
_debug_object_stack {
my
(
$action
,
$element
) =
@_
;
if
(
$action
=~ /^START/o) {
$DEBUGSTACK
{
$element
} = (
@ObjectStack
+0);
}
else
{
return
if
$element
eq
'LastName'
;
print
"Element $element starts on "
.
$DEBUGSTACK
{
$element
} .
'and ends on '
. (
@ObjectStack
+0) .
"\n"
if
$DEBUGSTACK
{
$element
} != (
@ObjectStack
+0);
}
}
1;