#!/usr/local/bin/perl -w
package UniProtAccessor; # this should be the same as your filename!
use FindBin; # where was script installed?
use lib "$FindBin::Bin"; # use that dir for libs, too
use strict;
use JSON;
#-----------------------------------------------------------------
# Configuration and Daemon
#-----------------------------------------------------------------
my $config = {
title => 'UniProt Slice FAIR Accessor - Aspergillus RNA Processing proteins',
serviceTextualDescription => 'Takes a SAPRQL query of the UniProt database specific to proteins and their GO annotations related to RNA Procssing proteins in Aspergillus and makes it a FAIR Accessor source',
textualAccessibilityInfo => "The information from this server requries no authentication; HTTP GET is sufficient", # this could also be a $URI describing the accessibiltiy
mechanizedAccessibilityInfo => "", # this must be a URI to an RDF document
textualLicenseInfo => "CC-BY-ND 4.0", # this could also be a URI to the license info
mechanizedLicenseInfo => "https://creativecommons.org/licenses/by-nd/4.0/", # this must be a URI to an RDF document
ETAG_Base => "TopLevelMetadata_Accessor_For_UniProtAnidulansRNAProcessing", # this is a unique identificaiton string for the service (required by the LDP specification)
localNamespaces => {
}, # add a few new namespaces to the list of known namespaces....
basePATH => '/Accessors/UniProtAccessor', # REQUIRED regexp to match the RESTful PATH part of the URL, before the ID number
};
my $SPARQL = '
SELECT distinct ?id
WHERE
{
?protein a up:Protein .
?protein up:organism ?organism .
?organism rdfs:subClassOf taxon:162425 .
?protein up:classifiedWith ?go .
bind(replace(str(?protein), "http://purl.uniprot.org/uniprot/", "", "i") as ?id)
}
';
my $service = UniProtAccessor->new(%$config);
# start daemon
$service->handle_requests;
#-----------------------------------------------------------------
# Accessor Implementation
#-----------------------------------------------------------------
#------------- Container Resource --------------
sub Container {
my ($self, %ARGS) = @_;
my $Container = FAIR::Accessor::Container->new(NS => $self->Configuration->Namespaces);
$self->fillContainerMetadata($Container);
my $BASE_URL = "http://" . $ENV{'SERVER_NAME'} . $ENV{'REQUEST_URI'};
my $query = RDF::Query::Client->new($SPARQL);
my $iterator = $query->execute('http://sparql.uniprot.org/sparql/');
my @records;
while (my $row = $iterator->next) {
my $ID = $row->{id}->value;
push @records, "$BASE_URL/$ID"; # need to make a URL for each of the meta-records, based on the ID of the PHIBase record, push it onto a list
}
$Container->addRecords(\@records); # the listref of record ids
return $Container;
}
# --------------------MetaRecord Resource ---------------
sub MetaRecord {
my ($self, %ARGS) = @_;
my $ID = $ARGS{'ID'};
my $MetaRecord = FAIR::Accessor::MetaRecord->new(ID => $ID,
NS => $self->Configuration->Namespaces);
$self->fillMetaRecordMetadata($MetaRecord);
$MetaRecord->addDistribution(availableformats => ['text/html'],
downloadURL => "http://www.uniprot.org/uniprot/$ID.html");
$MetaRecord->addDistribution(availableformats => ['application/rdf+xml'],
downloadURL => "http://www.uniprot.org/uniprot/$ID.rdf");
my $desc = $self->getDesc($ID);
$MetaRecord->addMetadata({
'dcat:description' => $desc});
my $encodedsubject = urlencode("http://identifiers.org/uniprot/$ID");
my $encodedpredicate = urlencode("http://purl.uniprot.org/core/organism");
my $TPF = "http://linkeddata.systems:3001/fragments?subject=$encodedsubject&predicate=$encodedpredicate";
$MetaRecord->addDistribution(availableformats => ["application/x-turtle", "application/rdf+xml", "text/html"],
downloadURL => $TPF,
subjecttemplate => "http://identifiers.org/uniprot/{ID}",
objecttemplate => "http://identifiers.org/taxon/{TAX}",
);
$encodedpredicate = urlencode("http://purl.uniprot.org/core/classifiedWith");
$TPF = "http://linkeddata.systems:3001/fragments?subject=$encodedsubject&predicate=$encodedpredicate";
$MetaRecord->addDistribution(availableformats => ["application/x-turtle", "application/rdf+xml", "text/html"],
downloadURL => $TPF,
subjecttemplate => "http://identifiers.org/uniprot/{ID}",
objecttemplate => "http://purl.obolibrary.org/obo/{GO}",
);
return $MetaRecord;
}
sub fillContainerMetadata {
my ($self, $Container) = @_;
$Container->addMetadata({
'dc:title' => "UniProt Slice FAIR Accessor - Aspergillus RNA Processing proteins",
'dcat:description' => "Takes a SPARQL query of the UniProt database specific to proteins and their GO annotations related to RNA Procssing proteins in Aspergillus and makes it a FAIR Accessor source. The precise query is:\n\n$SPARQL ",
'dcat:keyword' => ["Aspergillus", "Aspergillus nidulans", "RNA Processing", "Proteins"],
'dcat:landingPage' => 'http://uniprot.org',
'dc:language' => 'http://lexvo.org/id/iso639-3/eng',
'dcat:publisher' => ['http://wilkinsonlab.info'],
'dcat:theme' => 'http://linkeddata.systems/ConceptSchemes/RNA_Processing_conceptscheme.rdf', # this is the URI to a SKOS Concept Scheme
'pfund:hasPrincipalInvestigator' => ["Dr. Mark Wilkinson"],
'dc:creator' => 'http://wilkinsonlab.info',
'pav:authoredBy' => ['http://orcid.org/0000-0002-9699-485X'],
'pav:version' => 'UniProt release 2016_09',
'rdf:type' => ['prov:Collection', 'dctypes:Dataset'],
});
}
sub fillMetaRecordMetadata {
my ($self, $MetaRecord) = @_;
my $ID = $MetaRecord->ID;
$MetaRecord->addMetadata({
'foaf:primaryTopic' => "http://identifiers.org/uniprot/$ID",
'dc:title' => "UniProt Protein $ID",
'dcat:identifier' => "http://uniprot.org/$ID",
'dcat:keyword' => ["Aspergillus", "Aspergillus nidulans", "RNA Processing", "Proteins", "Annotation", "Functinal Annotation", "Gene Ontology", "GO"],
'dcat:landingPage' => 'http://uniprot.org',
'pav:version' => 'UniProt release 2016_09',
'dc:language' => 'http://lexvo.org/id/iso639-3/eng',
'dcat:publisher' => ['http://uniprot.org'],
'dc:creator' => 'UniProt Consortium',
'dc:bibliographicCitation' => "The UniProt Consortium (2015). UniProt: a hub for protein information. Nucleic Acids Res. 43: D204-D212",
'dcat:contactPoint' => 'http://www.uniprot.org/contact',
});
}
sub urlencode {
my $s = shift;
$s =~ s/ /+/g;
$s =~ s/([^A-Za-z0-9\+-])/sprintf("%%%02X", ord($1))/seg;
return $s;
}
sub getDesc {
my ($self, $ID) = @_;
my $store = RDF::Trine::Store::Memory->new();
my $model = RDF::Trine::Model->new($store);
RDF::Trine::Parser->parse_url_into_model( "http://uniprot.org/uniprot/$ID.rdf", $model );
my $query = RDF::Query->new( 'PREFIX core: <http://purl.uniprot.org/core/>
select ?desc where
{
?x a core:Structured_Name .
?x core:fullName ?desc .
}
' );
my $desc;
my $iterator = $query->execute( $model );
while (my $row = $iterator->next) {
# $row is a HASHref containing variable name -> RDF Term bindings
$desc .= ($row->{ 'desc' }->value). "\n";
}
return $desc
}