# MetaMap::DataStructures::Citation # (Last Updated $Id: Citation.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $) # # Perl module that provides a perl interface to the # Unified Medical Language System (UMLS) # # Copyright (c) 2016 # # Sam Henry, Virginia Commonwealth University # henryst at vcu.edu # # Bridget T. McInnes, Virginia Commonwealth University # btmcinnes at vcu.edu # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to # # The Free Software Foundation, Inc., # 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. package MetaMap::DataStructures::Citation; use strict; use warnings; use MetaMap::DataStructures::Utterance; #---------------------------------------- # constructors #---------------------------------------- # constructor method to create a new Citation object # input : - # output: $self <- a instance of a Citation object sub new { #create and bless self my $class = shift; my $self = {}; bless $self, $class; #grab input and initialize $self->{id} = shift; $self->{utterances} = {}; return $self; } #----------------------------------------------------------------- # methods #----------------------------------------------------------------- # method summarizes this utterance as a string # input : - # output: $string <- a string describing $self sub toString { my $self = shift; #initiliaze the string my $string = "citation:\n"; $string .= " $self->{id}\n"; #add each utterance to the string my %utterances = %{$self->{utterances}}; foreach my $key(keys %utterances) { $string .= " ".$utterances{$key}->toString()."\n"; } return $string; } # method to compare this citation to another and returns 1 if the two # contain identical information # input : $other <- the citation object to compare against # output: boolean <- 1 if $self and $other are equivalent (contain equivalent # ID's and utterances), else 0 sub equals { #grab input my $self = shift; my $other = shift; #compare id's if ($self->{id} ne $other->{id}) { return 0; } #compare Utterances foreach my $keyA(sort _by_utterance keys %{$self->{utterances}}){ my $utteranceA = $self->{utterances}{$keyA}; #check each utterance in B my $match = 0; foreach my $keyB(sort _by_utterance keys %{$other->{utterances}}) { my $utteranceB = $self->{utterances}{$keyB}; if ($utteranceA->equals($utteranceB)) { $match = 1; last; } } #citationA has no equivalent citation in $other # so citations are not identical if ($match < 1) { return 0; } } #all tests passed, return true return 1; } # method to determine if this citation contains the CUI provided as input # returns 1 if this citation contains the CUI, else 0 # input : $cui <- a string CUI code # output: boolean <- 1 if any of $self's utterances contain $cui sub contains { #grab input my $self = shift; my $cui = shift; #check each phrase to see if it contains the CUI my $containsCUI = 0; foreach my $key(keys %{$self->{utterances}}) { if ($self->{utterances}{$key}->contains($cui)) { $containsCUI = 1; last; } } #return the result return $containsCUI; } # method to add a new utterance to the citation # input : $newUtterance <- the utterance to add to $self # output: - sub addUtterance { my $self = shift; my $newUtterance = shift; if($newUtterance->{id} =~ /((ti|ab|tx)\.[\d]+)/) { $self->{utterances}{$1} = $newUtterance; } else { print STDERR "error adding utterance to citation: $newUtterance->{id}\n"; } } # method to sort the utterances by order they appear (title followed # by abstract, number ascending) # (e.g. ti.000.1, ti.000.2, ab.000.1, ab.000.2, ab.000.3) # input : $a, $b <- implicit sort variables, the keys in a hash of utterances # which are the utterance IDs (e.g. ti.0000000.1) # output: integer <- -1 if a is before b, 0 if a and b are same order, # 1 if a is after b sub _by_utterance { #get the utterance type my $a_ab = ($a =~ /ab/); my $b_ab = ($b =~ /ab/); #check if both are abstracts or titles if ($a_ab == $b_ab) { $a =~ /(ti|ab)\.([\d]+)/; my $aNum = ($2); $b =~ /(ti|ab)\.([\d]+)/; return $aNum <=> $2; } #check if one is abstract, the other is title if ($a_ab && !$b_ab) { return 1; } if (!$a_ab && $b_ab) { return -1; } } #------------------------------ Get Components ------------------------------ # method to returns an ordered list of Utterances contained by the Citation. # Utterances are ordered by title, abstract, then number in ascending order # (e.g. ti.000.1, ti.000.2, ab.000.1, ab.000.2, ab.000.3) # input : - # output: \@utterances <- $self's utterances ordered as they appear in the # original text of $self sub getOrderedUtterances { #initialize my $self = shift; my @utterances = (); #add concepts in sorted order foreach my $key(sort _by_utterance keys %{$self->{utterances}}) { push @utterances, $self->{utterances}{$key}; } return \@utterances; } # method to get an array of concepts that appear in the citation # (not necassarily ordered). Use this method if order doesn't matter for # increased performance. # input : - # output: \@concepts <- a list of concept objects sub getConcepts { #initialize my $self = shift; my @concepts = (); #add concepts in sorted order foreach my $key(keys %{$self->{utterances}}) { push @concepts, @{ $self->{utterances}{$key}->getConcepts() }; } return \@concepts; } # method to get the unique concepts and return a hash of # concepts, CUIs are the keys # input : - # output: \%concepts <- $self's unique concepts with the key as the concept's # CUI. CUIs are considered unique by their CUI code only (e.g. C0000000 # and C0000000 are considered the same even if there are two different # Concept.pm objects associated with them) sub getUniqueConcepts { my $self = shift; my %concepts = (); #update concepts foreach my $key(keys %{$self->{utterances}}) { my $utteranceConceptsRef = $self->{utterances}{$key}->getConcepts(); foreach my $concept(@{ $utteranceConceptsRef }) { my $cui = $concept->{cui}; if (!exists $concepts{$cui}) { $concepts{$cui} = $concept; } } } return \%concepts; } # method to get the an array of concepts that appear in the citation # concepts are ordered as they appear in the utterance # however where there are multiple mappings for a single # token those two concepts will appear adjacent to one another # input : - # output: \@conceptList <- an array of arrays, where each sub-array contains a # list of 1 or more concept objects. Where more than # one concept object occurrs it means the token to # concept mapping was ambiguous. Arrays are ordered as # the tokens occurr in the utterance. sub getOrderedConcepts { #initialize my $self = shift; my @conceptsList = (); #add concepts in sorted order foreach my $key(sort _by_utterance keys %{$self->{utterances}}) { push @conceptsList, @{ $self->{utterances}{$key}->getOrderedConcepts() }; } return \@conceptsList; } # method to get a list of ordered mappings. There may be multiple # mappings for a single utterance, but they will appear in correct # utterance order # input : - # output: \@mappings <- a list of mapping objects ordered by their occurence in # $self. sub getOrderedMappings { #initialize my $self = shift; my @mappings = (); #add mappings in sorted order foreach my $key(sort _by_utterance keys %{$self->{utterances}}) { push @mappings, @{ $self->{utterances}{$key}->getMappings() }; } return \@mappings; } # method to get all the mappings of the citation (not necassarily ordered) # input : - # output: \@mappings <- a list of mapping objects sub getMappings { #initialize my $self = shift; my @mappings = (); #add mappings in sorted order foreach my $key(keys %{$self->{utterances}}) { push @mappings, @{ $self->{utterances}{$key}->getMappings() }; } return \@mappings; } # method to get an array of ordered tokens as they appear in the citation # input : - # output: \@tokens <- a list of token objects ordered by their appearance in # $self sub getOrderedTokens { #initialize my $self = shift; my @tokens = (); #add words in sorted order foreach my $key(sort _by_utterance keys %{$self->{utterances}}) { push @tokens, @{ $self->{utterances}{$key}->getTokens() }; } return \@tokens; } # method to get an array of tokens. Tokens are not necassarily in order # input : - # output: \@tokens <- a list of token objects sub getTokens { #initialize my $self = shift; my @tokens = (); #add words in sorted order foreach my $key(keys %{$self->{utterances}}) { push @tokens, @{ $self->{utterances}{$key}->getTokens() }; } return \@tokens; } #---------------------- Has Parts (title or abstract) ------------------------- # method to determine if the citation contains any title utterances # input : - # output: boolean <- 1 if $self contains a title utterance, else 0 sub hasTitle { my $self = shift; return $self->_hasPart('ti'); } # method to determine if the citation contains any abstract utterances # input : - # output: boolean <- 1 if $self contains an abstract utterance, else 0 sub hasAbstract { my $self = shift; return $self->_hasPart('ab'); } # method to determine if the citation contains any utterances of the # tag ('ti' or 'ab') # input : $tag <- the utterance tag to check for, should be 'ti' or 'ab' # output: boolean <- 1 if $self contains an utterance with the $tag, else 0 sub _hasPart { my $self = shift; my $tag = shift; #get the utterances that match the tag foreach my $key(keys %{$self->{utterances}}) { if ($key =~ /(ti|ab)/) { if ($1 eq $tag) { #tag found, returning true return 1; } } } #no matching tags found, returning false return 0; } #---------------------------------------------------------------------------- #------------------ Get Parts (Title or Abstract) --------------------- # method to create a new citation containing just the title of this citation # input : - # output: $part <- a citation object containing all utterances of $self's title sub getTitle { my $self = shift; return $self->_getPart('ti'); } # method to create a new citation containing just the abstract of this citation # input : - # output: $part <- a citation object containing all utterances of $self's # abstract sub getAbstract { my $self = shift; return $self->_getPart('ab'); } # method to get a part of this citation (title or abstract) # input is a match string, either 'ti' or 'ab' # input : $tag <- the utterance tag to extract, should be 'ti' or 'ab' # output: $part <- a citation object containing all utterance of $self # containing the $tag in their ID sub _getPart { my $self = shift; my $tag = shift; #get the utterances that match the tag my $part = MetaMap::DataStructures::Citation->new($self->{id}); foreach my $key(keys %{$self->{utterances}}) { if($key =~ /(ti|ab)/) { if ($1 eq $tag) { $part->addUtterance($self->{utterances}{$key}); } } } #return the title citation return $part; } #----------------------------------------------------------------------- 1; __END__ =head1 NAME MetaMap::DataStructure::Citation - provides a container for the citation information extracted from machine readable MetaMap mapped text. =head1 DESCRIPTION This package provides a container for the citation information extracted from machine readable MetaMap mapped text. For more information please see the MetaMap::DataStructure.pm documentation. =head1 SYNOPSIS Add synopsis =head1 INSTALL To install the module, run the following magic commands: perl Makefile.PL make make test make install This will install the module in the standard location. You will, most probably, require root privileges to install in standard system directories. To install in a non-standard directory, specify a prefix during the 'perl Makefile.PL' stage as: perl Makefile.PL PREFIX=/home/sam It is possible to modify other parameters during installation. The details of these can be found in the ExtUtils::MakeMaker documentation. However, it is highly recommended not messing around with other parameters, unless you know what you're doing. =head1 AUTHOR Sam Henry <henryst@vcu.edu> Bridget T McInnes <bmcinnes@vcu.edu> =head1 COPYRIGHT Copyright (c) 2016 Sam Henry, Virginia Commonwealth Univesrity henryst at vcu.edu Bridget T. McInnes, Virginia Commonwealth Univesrity btmcinnes at vcu.edu This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to The Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.