package Genealogy::Wills::DB; =head1 Genealogy::Wills::DB =cut # Author Nigel Horne: njh@bandsman.co.uk # Copyright (C) 2023, Nigel Horne # Usage is subject to licence terms. # The licence terms of this software are as follows: # Personal single user, single computer use: GPL2 # All other users (including Commercial, Charity, Educational, Government) # must apply in writing for a licence for use from Nigel Horne at the # above e-mail. # Abstract class giving read-only access to CSV, XML and SQLite databases via Perl without writing any SQL. # Look for databases in $directory in this order; # SQLite (file ends with .sql) # PSV (pipe separated file, file ends with .psv) # CSV (file ends with .csv or .db, can be gzipped) # XML (file ends with .xml) # For example, you can access the files in /var/db/foo.csv via this class: # package MyPackageName::DB::foo; # use NJH::Snippets::DB; # our @ISA = ('NJH::Snippets::DB'); # 1; # You can then access the data using: # my $foo = MyPackageName::DB::foo->new(directory => '/var/db'); # my $row = $foo->fetchrow_hashref(customer_id => '12345); # print Data::Dumper->new([$row])->Dump(); # CSV files can have empty lines of comment lines starting with '#', to make them more readable # If the table has a column called "entry", sorts are based on that # To turn that off, pass 'no_entry' to the constructor, for legacy # reasons it's enabled by default # TODO: Switch that to off by default, and enable by passing 'entry' # TODO: support a directory hierarchy of databases # TODO: consider returning an object or array of objects, rather than hashes # TODO: Add redis database - could be of use for Geo::Coder::Free # use select() to select a database - use the table arg # new(database => 'redis://servername'); use warnings; use strict; use DBD::SQLite::Constants qw/:file_open/; # For SQLITE_OPEN_READONLY use File::Basename; use File::Spec; use File::pfopen 0.02; use File::Temp; use Error::Simple; use Carp; our $directory; our $logger; our $cache; sub new { my $proto = shift; my %args = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_; my $class = ref($proto) || $proto; if($class eq __PACKAGE__) { die "$class: abstract class"; } die "$class: where are the files?" unless($directory || $args{'directory'}); # init(\%args); return bless { logger => $args{'logger'} || $logger, directory => $args{'directory'} || $directory, # The directory containing the tables in XML, SQLite or CSV format cache => $args{'cache'} || $cache, table => $args{'table'}, # The name of the file containing the table, defaults to the class name no_entry => $args{'no_entry'} || 0, }, $class; } # Can also be run as a class level __PACKAGE__::DB::init(directory => '../databases') sub init { my %args = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_; $directory ||= $args{'directory'}; $logger ||= $args{'logger'}; $cache ||= $args{'cache'}; } sub set_logger { my $self = shift; my %args; if(ref($_[0]) eq 'HASH') { %args = %{$_[0]}; } elsif(!ref($_[0])) { Carp::croak('Usage: set_logger(logger => $logger)'); } elsif(scalar(@_) % 2 == 0) { %args = @_; } else { $args{'logger'} = shift; } $self->{'logger'} = $args{'logger'}; return $self; } # Open the database. sub _open { my $self = shift; my %args = ( sep_char => '!', ((ref($_[0]) eq 'HASH') ? %{$_[0]} : @_) ); my $table = $self->{'table'} || ref($self); $table =~ s/.*:://; if($self->{'logger'}) { $self->{'logger'}->trace("_open $table"); } return if($self->{$table}); # Read in the database my $dbh; my $dir = $self->{'directory'} || $directory; my $slurp_file = File::Spec->catfile($dir, "$table.sql"); if($self->{'logger'}) { $self->{'logger'}->debug("_open: try to open $slurp_file"); } if(-r $slurp_file) { require DBI; DBI->import(); $dbh = DBI->connect("dbi:SQLite:dbname=$slurp_file", undef, undef, { sqlite_open_flags => SQLITE_OPEN_READONLY, }); $dbh->do('PRAGMA synchronous = OFF'); $dbh->do('PRAGMA cache_size = 65536'); if($self->{'logger'}) { $self->{'logger'}->debug("read in $table from SQLite $slurp_file"); } $self->{'type'} = 'DBI'; } else { my $fin; ($fin, $slurp_file) = File::pfopen::pfopen($dir, $table, 'csv.gz:db.gz'); if(defined($slurp_file) && (-r $slurp_file)) { require Gzip::Faster; Gzip::Faster->import(); close($fin); $fin = File::Temp->new(SUFFIX => '.csv', UNLINK => 0); print $fin gunzip_file($slurp_file); $slurp_file = $fin->filename(); $self->{'temp'} = $slurp_file; } else { ($fin, $slurp_file) = File::pfopen::pfopen($dir, $table, 'psv'); if(defined($fin)) { # Pipe separated file $args{'sep_char'} = '|'; } else { ($fin, $slurp_file) = File::pfopen::pfopen($dir, $table, 'csv:db'); } } if(defined($slurp_file) && (-r $slurp_file)) { close($fin); my $sep_char = $args{'sep_char'}; if($args{'column_names'}) { $dbh = DBI->connect("dbi:CSV:csv_sep_char=$sep_char", undef, undef, { csv_tables => { $table => { col_names => $args{'column_names'}, }, }, } ); } else { $dbh = DBI->connect("dbi:CSV:csv_sep_char=$sep_char"); } $dbh->{'RaiseError'} = 1; if($self->{'logger'}) { $self->{'logger'}->debug("read in $table from CSV $slurp_file"); } $dbh->{csv_tables}->{$table} = { allow_loose_quotes => 1, blank_is_undef => 1, empty_is_undef => 1, binary => 1, f_file => $slurp_file, escape_char => '\\', sep_char => $sep_char, # Don't do this, causes "Bizarre copy of HASH # in scalar assignment in error_diag # RT121127 # auto_diag => 1, auto_diag => 0, # Don't do this, it causes "Attempt to free unreferenced scalar" # callbacks => { # after_parse => sub { # my ($csv, @rows) = @_; # my @rc; # foreach my $row(@rows) { # if($row->[0] !~ /^#/) { # push @rc, $row; # } # } # return @rc; # } # } }; # my %options = ( # allow_loose_quotes => 1, # blank_is_undef => 1, # empty_is_undef => 1, # binary => 1, # f_file => $slurp_file, # escape_char => '\\', # sep_char => $sep_char, # ); # $dbh->{csv_tables}->{$table} = \%options; # delete $options{f_file}; # require Text::CSV::Slurp; # Text::CSV::Slurp->import(); # $self->{'data'} = Text::CSV::Slurp->load(file => $slurp_file, %options); if(0) { require Text::xSV::Slurp; Text::xSV::Slurp->import(); my @data = @{xsv_slurp( shape => 'aoh', text_csv => { sep_char => $sep_char, allow_loose_quotes => 1, blank_is_undef => 1, empty_is_undef => 1, binary => 1, escape_char => '\\', }, # string => \join('', grep(!/^\s*(#|$)/, )) file => $slurp_file )}; # Ignore blank lines or lines starting with # in the CSV file unless($self->{no_entry}) { @data = grep { $_->{'entry'} !~ /^\s*#/ } grep { defined($_->{'entry'}) } @data; } # $self->{'data'} = @data; my $i = 0; $self->{'data'} = (); foreach my $d(@data) { $self->{'data'}[$i++] = $d; } } $self->{'type'} = 'CSV'; } else { $slurp_file = File::Spec->catfile($dir, "$table.xml"); if(-r $slurp_file) { $dbh = DBI->connect('dbi:XMLSimple(RaiseError=>1):'); $dbh->{'RaiseError'} = 1; if($self->{'logger'}) { $self->{'logger'}->debug("read in $table from XML $slurp_file"); } $dbh->func($table, 'XML', $slurp_file, 'xmlsimple_import'); } else { my @call_details = caller(0); throw Error::Simple("Can't open $slurp_file called from " . $call_details[2] . ' of ' . $call_details[1]); } $self->{'type'} = 'XML'; } } $self->{$table} = $dbh; my @statb = stat($slurp_file); $self->{'_updated'} = $statb[9]; return $self; } # Returns a reference to an array of hash references of all the data meeting # the given criteria sub selectall_hashref { my $self = shift; my @rc = $self->selectall_hash(@_); return \@rc; } # Returns an array of hash references sub selectall_hash { my $self = shift; my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_; my $table = $self->{table} || ref($self); $table =~ s/.*:://; $self->_open() if(!$self->{$table}); if((scalar(keys %params) == 0) && $self->{'data'}) { if($self->{'logger'}) { $self->{'logger'}->trace("$table: selectall_hash fast track return"); } # This use of a temporary variable is to avoid # "Implicit scalar context for array in return" # return @{$self->{'data'}}; my @rc = @{$self->{'data'}}; return @rc; } # if((scalar(keys %params) == 1) && $self->{'data'} && defined($params{'entry'})) { # } my $query; my $done_where = 0; if(($self->{'type'} eq 'CSV') && !$self->{no_entry}) { $query = "SELECT * FROM $table WHERE entry IS NOT NULL AND entry NOT LIKE '#%'"; $done_where = 1; } else { $query = "SELECT * FROM $table"; } my @query_args; foreach my $c1(sort keys(%params)) { # sort so that the key is always the same my $arg = $params{$c1}; if(ref($arg)) { if($self->{'logger'}) { $self->{'logger'}->fatal("selectall_hash $query: argument is not a string"); } throw Error::Simple("$query: argument is not a string"); } if(!defined($arg)) { my @call_details = caller(0); throw Error::Simple("$query: value for $c1 is not defined in call from " . $call_details[2] . ' of ' . $call_details[1]); } if($done_where) { if($arg =~ /\@/) { $query .= " AND $c1 LIKE ?"; } else { $query .= " AND $c1 = ?"; } } else { if($arg =~ /\@/) { $query .= " WHERE $c1 LIKE ?"; } else { $query .= " WHERE $c1 = ?"; } $done_where = 1; } push @query_args, $arg; } if(!$self->{no_entry}) { $query .= ' ORDER BY entry'; } if(!wantarray) { $query .= ' LIMIT 1'; } if($self->{'logger'}) { if(defined($query_args[0])) { $self->{'logger'}->debug("selectall_hash $query: ", join(', ', @query_args)); } else { $self->{'logger'}->debug("selectall_hash $query"); } } my $key; my $c; if($c = $self->{cache}) { $key = $query; if(defined($query_args[0])) { $key .= ' ' . join(', ', @query_args); } if(my $rc = $c->get($key)) { # This use of a temporary variable is to avoid # "Implicit scalar context for array in return" # return @{$rc}; my @rc = @{$rc}; return @rc; } } if(my $sth = $self->{$table}->prepare($query)) { $sth->execute(@query_args) || throw Error::Simple("$query: @query_args"); my @rc; while(my $href = $sth->fetchrow_hashref()) { # FIXME: Doesn't store in the cache return $href if(!wantarray); push @rc, $href; } if($c && wantarray) { $c->set($key, \@rc, '1 hour'); } return @rc; } if($self->{'logger'}) { $self->{'logger'}->warn("selectall_hash failure on $query: @query_args"); } throw Error::Simple("$query: @query_args"); } # Returns a hash reference for one row in a table # Special argument: table: determines the table to read from if not the default, # which is worked out from the class name sub fetchrow_hashref { my $self = shift; my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_; my $table = $self->{'table'} || ref($self); $table =~ s/.*:://; $self->_open() if(!$self->{$table}); my $query = 'SELECT * FROM '; if(my $t = delete $params{'table'}) { $query .= $t; } else { $query .= $table; } my $done_where = 0; if(($self->{'type'} eq 'CSV') && !$self->{no_entry}) { $query .= " WHERE entry IS NOT NULL AND entry NOT LIKE '#%'"; $done_where = 1; } my @query_args; foreach my $c1(sort keys(%params)) { # sort so that the key is always the same if(my $arg = $params{$c1}) { if($done_where) { if($arg =~ /\@/) { $query .= " AND $c1 LIKE ?"; } else { $query .= " AND $c1 = ?"; } } else { if($arg =~ /\@/) { $query .= " WHERE $c1 LIKE ?"; } else { $query .= " WHERE $c1 = ?"; } $done_where = 1; } push @query_args, $arg; } } # $query .= ' ORDER BY entry LIMIT 1'; $query .= ' LIMIT 1'; if($self->{'logger'}) { if(defined($query_args[0])) { my @call_details = caller(0); $self->{'logger'}->debug("fetchrow_hashref $query: ", join(', ', @query_args), ' called from ', $call_details[2], ' of ', $call_details[1]); } else { $self->{'logger'}->debug("fetchrow_hashref $query"); } } my $key; if(defined($query_args[0])) { $key = "fetchrow $query " . join(', ', @query_args); } else { $key = "fetchrow $query"; } my $c; if($c = $self->{cache}) { if(my $rc = $c->get($key)) { return $rc; } } my $sth = $self->{$table}->prepare($query) or die $self->{$table}->errstr(); $sth->execute(@query_args) || throw Error::Simple("$query: @query_args"); if($c) { my $rc = $sth->fetchrow_hashref(); $c->set($key, $rc, '1 hour'); return $rc; } return $sth->fetchrow_hashref(); } # Execute the given SQL on the data # In an array context, returns an array of hash refs, # in a scalar context returns a hash of the first row sub execute { my $self = shift; my %args; if(ref($_[0]) eq 'HASH') { %args = %{$_[0]}; } elsif(ref($_[0])) { Carp::croak('Usage: execute(query => $query)'); } elsif(scalar(@_) % 2 == 0) { %args = @_; } else { $args{'query'} = shift; } Carp::croak('Usage: execute(query => $query)') unless(defined($args{'query'})); my $table = $self->{table} || ref($self); $table =~ s/.*:://; $self->_open() if(!$self->{$table}); my $query = $args{'query'}; if($self->{'logger'}) { $self->{'logger'}->debug("execute $query"); } my $sth = $self->{$table}->prepare($query); $sth->execute() || throw Error::Simple($query); my @rc; while(my $href = $sth->fetchrow_hashref()) { return $href if(!wantarray); push @rc, $href; } return @rc; } # Time that the database was last updated sub updated { my $self = shift; return $self->{'_updated'}; } # Return the contents of an arbitrary column in the database which match the # given criteria # Returns an array of the matches, or just the first entry when called in # scalar context # Set distinct to 1 if you're after a unique list sub AUTOLOAD { our $AUTOLOAD; my $column = $AUTOLOAD; $column =~ s/.*:://; return if($column eq 'DESTROY'); my $self = shift or return; my $table = $self->{table} || ref($self); $table =~ s/.*:://; $self->_open() if(!$self->{$table}); my %params = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_; my $query; my $done_where = 0; if(wantarray && !delete($params{'distinct'})) { if(($self->{'type'} eq 'CSV') && !$self->{no_entry}) { $query = "SELECT $column FROM $table WHERE entry IS NOT NULL AND entry NOT LIKE '#%'"; $done_where = 1; } else { $query = "SELECT $column FROM $table"; } } else { if(($self->{'type'} eq 'CSV') && !$self->{no_entry}) { $query = "SELECT DISTINCT $column FROM $table WHERE entry IS NOT NULL AND entry NOT LIKE '#%'"; $done_where = 1; } else { $query = "SELECT DISTINCT $column FROM $table"; } } my @args; while(my ($key, $value) = each %params) { if(defined($value)) { if($done_where) { $query .= " AND $key = ?"; } else { $query .= " WHERE $key = ?"; $done_where = 1; } push @args, $value; } else { if($self->{'logger'}) { $self->{'logger'}->debug("AUTOLOAD params $key isn't defined"); } if($done_where) { $query .= " AND $key IS NULL"; } else { $query .= " WHERE $key IS NULL"; $done_where = 1; } } } $query .= " ORDER BY $column"; if(!wantarray) { $query .= ' LIMIT 1'; } if($self->{'logger'}) { if(scalar(@args) && $args[0]) { $self->{'logger'}->debug("AUTOLOAD $query: ", join(', ', @args)); } else { $self->{'logger'}->debug("AUTOLOAD $query"); } } my $sth = $self->{$table}->prepare($query) || throw Error::Simple($query); $sth->execute(@args) || throw Error::Simple($query); if(wantarray) { return map { $_->[0] } @{$sth->fetchall_arrayref()}; } return $sth->fetchrow_array(); # Return the first match only } sub DESTROY { if(defined($^V) && ($^V ge 'v5.14.0')) { return if ${^GLOBAL_PHASE} eq 'DESTRUCT'; # >= 5.14.0 only } my $self = shift; if($self->{'temp'}) { unlink delete $self->{'temp'}; } if(my $table = delete $self->{'table'}) { $table->finish(); } } 1;