package Genealogy::Wills::DB;
=head1
Genealogy::Wills::DB
=cut
# Author Nigel Horne: njh@bandsman.co.uk
# Copyright (C) 2023, Nigel Horne
# Usage is subject to licence terms.
# The licence terms of this software are as follows:
# Personal single user, single computer use: GPL2
# All other users (including Commercial, Charity, Educational, Government)
# must apply in writing for a licence for use from Nigel Horne at the
# above e-mail.
# Abstract class giving read-only access to CSV, XML and SQLite databases via Perl without writing any SQL.
# Look for databases in $directory in this order;
# SQLite (file ends with .sql)
# PSV (pipe separated file, file ends with .psv)
# CSV (file ends with .csv or .db, can be gzipped)
# XML (file ends with .xml)
# For example, you can access the files in /var/db/foo.csv via this class:
# package MyPackageName::DB::foo;
# use NJH::Snippets::DB;
# our @ISA = ('NJH::Snippets::DB');
# 1;
# You can then access the data using:
# my $foo = MyPackageName::DB::foo->new(directory => '/var/db');
# my $row = $foo->fetchrow_hashref(customer_id => '12345);
# print Data::Dumper->new([$row])->Dump();
# CSV files can have empty lines of comment lines starting with '#', to make them more readable
# If the table has a column called "entry", sorts are based on that
# To turn that off, pass 'no_entry' to the constructor, for legacy
# reasons it's enabled by default
# TODO: Switch that to off by default, and enable by passing 'entry'
# TODO: support a directory hierarchy of databases
# TODO: consider returning an object or array of objects, rather than hashes
# TODO: Add redis database - could be of use for Geo::Coder::Free
# use select() to select a database - use the table arg
# new(database => 'redis://servername');
use warnings;
use strict;
use DBD::SQLite::Constants qw/:file_open/; # For SQLITE_OPEN_READONLY
use File::Basename;
use File::Spec;
use File::pfopen 0.02;
use File::Temp;
use Error::Simple;
use Carp;
our $directory;
our $logger;
our $cache;
sub new {
my $proto = shift;
my %args = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
my $class = ref($proto) || $proto;
if($class eq __PACKAGE__) {
die "$class: abstract class";
}
die "$class: where are the files?" unless($directory || $args{'directory'});
# init(\%args);
return bless {
logger => $args{'logger'} || $logger,
directory => $args{'directory'} || $directory, # The directory containing the tables in XML, SQLite or CSV format
cache => $args{'cache'} || $cache,
table => $args{'table'}, # The name of the file containing the table, defaults to the class name
no_entry => $args{'no_entry'} || 0,
}, $class;
}
# Can also be run as a class level __PACKAGE__::DB::init(directory => '../databases')
sub init {
my %args = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
$directory ||= $args{'directory'};
$logger ||= $args{'logger'};
$cache ||= $args{'cache'};
}
sub set_logger {
my $self = shift;
my %args;
if(ref($_[0]) eq 'HASH') {
%args = %{$_[0]};
} elsif(!ref($_[0])) {
Carp::croak('Usage: set_logger(logger => $logger)');
} elsif(scalar(@_) % 2 == 0) {
%args = @_;
} else {
$args{'logger'} = shift;
}
$self->{'logger'} = $args{'logger'};
return $self;
}
# Open the database.
sub _open {
my $self = shift;
my %args = (
sep_char => '!',
((ref($_[0]) eq 'HASH') ? %{$_[0]} : @_)
);
my $table = $self->{'table'} || ref($self);
$table =~ s/.*:://;
if($self->{'logger'}) {
$self->{'logger'}->trace("_open $table");
}
return if($self->{$table});
# Read in the database
my $dbh;
my $dir = $self->{'directory'} || $directory;
my $slurp_file = File::Spec->catfile($dir, "$table.sql");
if($self->{'logger'}) {
$self->{'logger'}->debug("_open: try to open $slurp_file");
}
if(-r $slurp_file) {
require DBI;
DBI->import();
$dbh = DBI->connect("dbi:SQLite:dbname=$slurp_file", undef, undef, {
sqlite_open_flags => SQLITE_OPEN_READONLY,
});
$dbh->do('PRAGMA synchronous = OFF');
$dbh->do('PRAGMA cache_size = 65536');
if($self->{'logger'}) {
$self->{'logger'}->debug("read in $table from SQLite $slurp_file");
}
$self->{'type'} = 'DBI';
} else {
my $fin;
($fin, $slurp_file) = File::pfopen::pfopen($dir, $table, 'csv.gz:db.gz');
if(defined($slurp_file) && (-r $slurp_file)) {
require Gzip::Faster;
Gzip::Faster->import();
close($fin);
$fin = File::Temp->new(SUFFIX => '.csv', UNLINK => 0);
print $fin gunzip_file($slurp_file);
$slurp_file = $fin->filename();
$self->{'temp'} = $slurp_file;
} else {
($fin, $slurp_file) = File::pfopen::pfopen($dir, $table, 'psv');
if(defined($fin)) {
# Pipe separated file
$args{'sep_char'} = '|';
} else {
($fin, $slurp_file) = File::pfopen::pfopen($dir, $table, 'csv:db');
}
}
if(defined($slurp_file) && (-r $slurp_file)) {
close($fin);
my $sep_char = $args{'sep_char'};
if($args{'column_names'}) {
$dbh = DBI->connect("dbi:CSV:csv_sep_char=$sep_char", undef, undef,
{
csv_tables => {
$table => {
col_names => $args{'column_names'},
},
},
}
);
} else {
$dbh = DBI->connect("dbi:CSV:csv_sep_char=$sep_char");
}
$dbh->{'RaiseError'} = 1;
if($self->{'logger'}) {
$self->{'logger'}->debug("read in $table from CSV $slurp_file");
}
$dbh->{csv_tables}->{$table} = {
allow_loose_quotes => 1,
blank_is_undef => 1,
empty_is_undef => 1,
binary => 1,
f_file => $slurp_file,
escape_char => '\\',
sep_char => $sep_char,
# Don't do this, causes "Bizarre copy of HASH
# in scalar assignment in error_diag
# RT121127
# auto_diag => 1,
auto_diag => 0,
# Don't do this, it causes "Attempt to free unreferenced scalar"
# callbacks => {
# after_parse => sub {
# my ($csv, @rows) = @_;
# my @rc;
# foreach my $row(@rows) {
# if($row->[0] !~ /^#/) {
# push @rc, $row;
# }
# }
# return @rc;
# }
# }
};
# my %options = (
# allow_loose_quotes => 1,
# blank_is_undef => 1,
# empty_is_undef => 1,
# binary => 1,
# f_file => $slurp_file,
# escape_char => '\\',
# sep_char => $sep_char,
# );
# $dbh->{csv_tables}->{$table} = \%options;
# delete $options{f_file};
# require Text::CSV::Slurp;
# Text::CSV::Slurp->import();
# $self->{'data'} = Text::CSV::Slurp->load(file => $slurp_file, %options);
if(0) {
require Text::xSV::Slurp;
Text::xSV::Slurp->import();
my @data = @{xsv_slurp(
shape => 'aoh',
text_csv => {
sep_char => $sep_char,
allow_loose_quotes => 1,
blank_is_undef => 1,
empty_is_undef => 1,
binary => 1,
escape_char => '\\',
},
# string => \join('', grep(!/^\s*(#|$)/, <DATA>))
file => $slurp_file
)};
# Ignore blank lines or lines starting with # in the CSV file
unless($self->{no_entry}) {
@data = grep { $_->{'entry'} !~ /^\s*#/ } grep { defined($_->{'entry'}) } @data;
}
# $self->{'data'} = @data;
my $i = 0;
$self->{'data'} = ();
foreach my $d(@data) {
$self->{'data'}[$i++] = $d;
}
}
$self->{'type'} = 'CSV';
} else {
$slurp_file = File::Spec->catfile($dir, "$table.xml");
if(-r $slurp_file) {
$dbh = DBI->connect('dbi:XMLSimple(RaiseError=>1):');
$dbh->{'RaiseError'} = 1;
if($self->{'logger'}) {
$self->{'logger'}->debug("read in $table from XML $slurp_file");
}
$dbh->func($table, 'XML', $slurp_file, 'xmlsimple_import');
} else {
my @call_details = caller(0);
throw Error::Simple("Can't open $slurp_file called from " .
$call_details[2] . ' of ' . $call_details[1]);
}
$self->{'type'} = 'XML';
}
}
$self->{$table} = $dbh;
my @statb = stat($slurp_file);
$self->{'_updated'} = $statb[9];
return $self;
}
# Returns a reference to an array of hash references of all the data meeting
# the given criteria
sub selectall_hashref {
my $self = shift;
my @rc = $self->selectall_hash(@_);
return \@rc;
}
# Returns an array of hash references
sub selectall_hash {
my $self = shift;
my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
my $table = $self->{table} || ref($self);
$table =~ s/.*:://;
$self->_open() if(!$self->{$table});
if((scalar(keys %params) == 0) && $self->{'data'}) {
if($self->{'logger'}) {
$self->{'logger'}->trace("$table: selectall_hash fast track return");
}
# This use of a temporary variable is to avoid
# "Implicit scalar context for array in return"
# return @{$self->{'data'}};
my @rc = @{$self->{'data'}};
return @rc;
}
# if((scalar(keys %params) == 1) && $self->{'data'} && defined($params{'entry'})) {
# }
my $query;
my $done_where = 0;
if(($self->{'type'} eq 'CSV') && !$self->{no_entry}) {
$query = "SELECT * FROM $table WHERE entry IS NOT NULL AND entry NOT LIKE '#%'";
$done_where = 1;
} else {
$query = "SELECT * FROM $table";
}
my @query_args;
foreach my $c1(sort keys(%params)) { # sort so that the key is always the same
my $arg = $params{$c1};
if(ref($arg)) {
if($self->{'logger'}) {
$self->{'logger'}->fatal("selectall_hash $query: argument is not a string");
}
throw Error::Simple("$query: argument is not a string");
}
if(!defined($arg)) {
my @call_details = caller(0);
throw Error::Simple("$query: value for $c1 is not defined in call from " .
$call_details[2] . ' of ' . $call_details[1]);
}
if($done_where) {
if($arg =~ /\@/) {
$query .= " AND $c1 LIKE ?";
} else {
$query .= " AND $c1 = ?";
}
} else {
if($arg =~ /\@/) {
$query .= " WHERE $c1 LIKE ?";
} else {
$query .= " WHERE $c1 = ?";
}
$done_where = 1;
}
push @query_args, $arg;
}
if(!$self->{no_entry}) {
$query .= ' ORDER BY entry';
}
if(!wantarray) {
$query .= ' LIMIT 1';
}
if($self->{'logger'}) {
if(defined($query_args[0])) {
$self->{'logger'}->debug("selectall_hash $query: ", join(', ', @query_args));
} else {
$self->{'logger'}->debug("selectall_hash $query");
}
}
my $key;
my $c;
if($c = $self->{cache}) {
$key = $query;
if(defined($query_args[0])) {
$key .= ' ' . join(', ', @query_args);
}
if(my $rc = $c->get($key)) {
# This use of a temporary variable is to avoid
# "Implicit scalar context for array in return"
# return @{$rc};
my @rc = @{$rc};
return @rc;
}
}
if(my $sth = $self->{$table}->prepare($query)) {
$sth->execute(@query_args) ||
throw Error::Simple("$query: @query_args");
my @rc;
while(my $href = $sth->fetchrow_hashref()) {
# FIXME: Doesn't store in the cache
return $href if(!wantarray);
push @rc, $href;
}
if($c && wantarray) {
$c->set($key, \@rc, '1 hour');
}
return @rc;
}
if($self->{'logger'}) {
$self->{'logger'}->warn("selectall_hash failure on $query: @query_args");
}
throw Error::Simple("$query: @query_args");
}
# Returns a hash reference for one row in a table
# Special argument: table: determines the table to read from if not the default,
# which is worked out from the class name
sub fetchrow_hashref {
my $self = shift;
my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
my $table = $self->{'table'} || ref($self);
$table =~ s/.*:://;
$self->_open() if(!$self->{$table});
my $query = 'SELECT * FROM ';
if(my $t = delete $params{'table'}) {
$query .= $t;
} else {
$query .= $table;
}
my $done_where = 0;
if(($self->{'type'} eq 'CSV') && !$self->{no_entry}) {
$query .= " WHERE entry IS NOT NULL AND entry NOT LIKE '#%'";
$done_where = 1;
}
my @query_args;
foreach my $c1(sort keys(%params)) { # sort so that the key is always the same
if(my $arg = $params{$c1}) {
if($done_where) {
if($arg =~ /\@/) {
$query .= " AND $c1 LIKE ?";
} else {
$query .= " AND $c1 = ?";
}
} else {
if($arg =~ /\@/) {
$query .= " WHERE $c1 LIKE ?";
} else {
$query .= " WHERE $c1 = ?";
}
$done_where = 1;
}
push @query_args, $arg;
}
}
# $query .= ' ORDER BY entry LIMIT 1';
$query .= ' LIMIT 1';
if($self->{'logger'}) {
if(defined($query_args[0])) {
my @call_details = caller(0);
$self->{'logger'}->debug("fetchrow_hashref $query: ", join(', ', @query_args),
' called from ', $call_details[2], ' of ', $call_details[1]);
} else {
$self->{'logger'}->debug("fetchrow_hashref $query");
}
}
my $key;
if(defined($query_args[0])) {
$key = "fetchrow $query " . join(', ', @query_args);
} else {
$key = "fetchrow $query";
}
my $c;
if($c = $self->{cache}) {
if(my $rc = $c->get($key)) {
return $rc;
}
}
my $sth = $self->{$table}->prepare($query) or die $self->{$table}->errstr();
$sth->execute(@query_args) || throw Error::Simple("$query: @query_args");
if($c) {
my $rc = $sth->fetchrow_hashref();
$c->set($key, $rc, '1 hour');
return $rc;
}
return $sth->fetchrow_hashref();
}
# Execute the given SQL on the data
# In an array context, returns an array of hash refs,
# in a scalar context returns a hash of the first row
sub execute {
my $self = shift;
my %args;
if(ref($_[0]) eq 'HASH') {
%args = %{$_[0]};
} elsif(ref($_[0])) {
Carp::croak('Usage: execute(query => $query)');
} elsif(scalar(@_) % 2 == 0) {
%args = @_;
} else {
$args{'query'} = shift;
}
Carp::croak('Usage: execute(query => $query)') unless(defined($args{'query'}));
my $table = $self->{table} || ref($self);
$table =~ s/.*:://;
$self->_open() if(!$self->{$table});
my $query = $args{'query'};
if($self->{'logger'}) {
$self->{'logger'}->debug("execute $query");
}
my $sth = $self->{$table}->prepare($query);
$sth->execute() || throw Error::Simple($query);
my @rc;
while(my $href = $sth->fetchrow_hashref()) {
return $href if(!wantarray);
push @rc, $href;
}
return @rc;
}
# Time that the database was last updated
sub updated {
my $self = shift;
return $self->{'_updated'};
}
# Return the contents of an arbitrary column in the database which match the
# given criteria
# Returns an array of the matches, or just the first entry when called in
# scalar context
# Set distinct to 1 if you're after a unique list
sub AUTOLOAD {
our $AUTOLOAD;
my $column = $AUTOLOAD;
$column =~ s/.*:://;
return if($column eq 'DESTROY');
my $self = shift or return;
my $table = $self->{table} || ref($self);
$table =~ s/.*:://;
$self->_open() if(!$self->{$table});
my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
my $query;
my $done_where = 0;
if(wantarray && !delete($params{'distinct'})) {
if(($self->{'type'} eq 'CSV') && !$self->{no_entry}) {
$query = "SELECT $column FROM $table WHERE entry IS NOT NULL AND entry NOT LIKE '#%'";
$done_where = 1;
} else {
$query = "SELECT $column FROM $table";
}
} else {
if(($self->{'type'} eq 'CSV') && !$self->{no_entry}) {
$query = "SELECT DISTINCT $column FROM $table WHERE entry IS NOT NULL AND entry NOT LIKE '#%'";
$done_where = 1;
} else {
$query = "SELECT DISTINCT $column FROM $table";
}
}
my @args;
while(my ($key, $value) = each %params) {
if(defined($value)) {
if($done_where) {
$query .= " AND $key = ?";
} else {
$query .= " WHERE $key = ?";
$done_where = 1;
}
push @args, $value;
} else {
if($self->{'logger'}) {
$self->{'logger'}->debug("AUTOLOAD params $key isn't defined");
}
if($done_where) {
$query .= " AND $key IS NULL";
} else {
$query .= " WHERE $key IS NULL";
$done_where = 1;
}
}
}
$query .= " ORDER BY $column";
if(!wantarray) {
$query .= ' LIMIT 1';
}
if($self->{'logger'}) {
if(scalar(@args) && $args[0]) {
$self->{'logger'}->debug("AUTOLOAD $query: ", join(', ', @args));
} else {
$self->{'logger'}->debug("AUTOLOAD $query");
}
}
my $sth = $self->{$table}->prepare($query) || throw Error::Simple($query);
$sth->execute(@args) || throw Error::Simple($query);
if(wantarray) {
return map { $_->[0] } @{$sth->fetchall_arrayref()};
}
return $sth->fetchrow_array(); # Return the first match only
}
sub DESTROY {
if(defined($^V) && ($^V ge 'v5.14.0')) {
return if ${^GLOBAL_PHASE} eq 'DESTRUCT'; # >= 5.14.0 only
}
my $self = shift;
if($self->{'temp'}) {
unlink delete $self->{'temp'};
}
if(my $table = delete $self->{'table'}) {
$table->finish();
}
}
1;