package IO::BlockSync;

# Basic
use 5.010;
use strict;
use warnings FATAL => 'all';

# Build in
use Carp;
use Fcntl qw(:DEFAULT :seek);
use POSIX qw(ceil);
use Scalar::Util qw(reftype);

# CPAN
use Log::Log4perl;
use Log::Log4perl::Level;
use Moo;
use Try::Tiny;

# These two come last - in that order
use namespace::clean;
use Exporter qw(import);

################################################################

# Moo roles to implement
with('MooseX::Log::Log4perl');

# Make sure log4perl doesn't come with errors
if ( not Log::Log4perl->initialized() ) {
    Log::Log4perl->easy_init( Log::Log4perl::Level::to_priority('OFF') );
}

################################################################

=head1 NAME

IO::BlockSync - Syncronize changed blocks

=head1 VERSION

Version 0.002

=cut

our $VERSION = '0.002';

################################################################

=head1 SYNOPSIS

BlockSync can some of the same stuff that bigsync (by Egor Egorov) can
- it's just written in perl.

BlockSync copies data from source file to destination file (can be a block
device) and calculates checksum on each block it copies.
On all runs after the first only the changed blocks will be copied.

    use IO::BlockSync;

    # OOP way
    my $bs = IO::BlockSync->new(
        src => '/path/to/source/file',
        dst => '/path/to/destination/file',
        chk => '/path/to/chk/file',
    );
    $bs->run;

    # Non OOP way
    BlockSync(
        src => '/path/to/source/file',
        dst => '/path/to/destination/file',
        chk => '/path/to/chk/file',
    );

=cut

################################################################

=head1 INSTALLATION

Look in C<README.pod>

Can also be found on
L<GitHub|https://github.com/thordreier/perl-IO-BlockSync/blob/master/README.pod>
or L<meta::cpan|https://metacpan.org/pod/distribution/IO-BlockSync/README.pod>

=cut

################################################################

=head1 EXPORT

=cut

our @EXPORT = qw(BlockSync);

################################################################

=head2 BlockSync

Run BlockSync non-object-oriented

=cut

sub BlockSync {
    return __PACKAGE__->new(@_)->run;
}

################################################################

=head1 ATTRIBUTES

=cut

################################################################

=head2 src

Path to source file.

mandatory - string (containing path) or filehandle

=cut

has 'src' => (
    is       => 'ro',
    required => 1,
);

################################################################

=head2 dst

Destination file. If not set, then only checksum file will be updated.

optional - string (containing path) or filehandle

=cut

has 'dst' => ( is => 'ro', );

################################################################

=head2 chk

Path to checksum file.

mandatory - string (containing path) or filehandle

=cut

has 'chk' => (
    is       => 'ro',
    required => 1,
);

################################################################

=head2 bs

Block size to use in bytes.

optional - integer - defaults to 1_048_576 B (1 MB)

=cut

has 'bs' => (
    is      => 'ro',
    default => 1_048_576,
);

################################################################

=head2 hash

Sub that retrurn hashed data.

optional - sub - defaults to sub that return MD5 hash followed by newline

=cut

has 'hash' => (
    is      => 'ro',
    default => sub {
        require Digest::MD5;
        sub {
            Digest::MD5::md5_hex(shift) . "\n";
        }
    },
);

################################################################

=head2 sparse

Seek in dst file, instead of writing blocks only containing \0

optional - boolean - defaults to 0 (false)

=cut

has 'sparse' => (
    is      => 'ro',
    default => 0,
);

################################################################

=head2 truncate

Truncate the destination file to same size as source file. Does not work on block devices. Will only be tried if C<data> has default value (whole file is copied).

optional - boolean - defaults to 0 (false)

=cut

has 'truncate' => (
    is      => 'ro',
    default => 0,
);

################################################################

=head2 data

List of areas (in bytes) inside the source file that should be looked at.
Usefull if you know excactly which blocks in src that could have changed.

data => [
    {start => 0, end => 9999},
    {start => 88888, end => 777777},
]

optional - array of hashes - defaults to "whole file"

=cut

has 'data' => (
    is      => 'ro',
    default => sub {
        [
            {
                start => 0,
                end   => 0
            }
        ]
    },
);

################################################################

=head2 status

Sub that will be run everytime a block has been read (and written).

optional - sub - default to sub doing nothing

=cut

has 'status' => (
    is      => 'ro',
    default => sub {
        sub { }
    },
);

################################################################

=head1 METHODS

=cut

################################################################

=head2 run

This is the method that starts copying data.

=cut

sub run {    ## no critic (Subroutines::ProhibitExcessComplexity)
    my $self = shift;

    my ( $srcFh, $srcClose, $dstFh, $dstClose, $chkFh, $chkClose );

    try {
        # Get file handles for source, destination and checksum files
        $srcFh = $self->_getFh( 'src', $self->src, \$srcClose, O_RDONLY );
        $chkFh =
          $self->_getFh( 'chk', $self->chk, \$chkClose, O_RDWR | O_CREAT );
        if ( $self->dst ) {
            $dstFh = $self->_getFh( 'dst', $self->dst, \$dstClose,
                O_WRONLY | O_CREAT );
        }
        else {
            $self->logger->debug('No dst file, only calculating checksums');
        }

        # Calculate hash for a block only containing ASCII 0
        my $nullHash = &{ $self->hash }( "\0" x $self->bs );

        # Get number of bytes that a hash takes up
        my $hashSize = length($nullHash);

        my $srcSeek;

        # Loop through "areas" that should be copied
        # Default i one area containing the whole source file
        foreach my $dataBlocks ( @{ $self->data } ) {

            # Start and end of this "area" (default is $start=0, $end=0)
            my $start = $dataBlocks->{start};
            my $end   = $dataBlocks->{end};

            $self->logger->debug(
                "Going to process data from <$start> to <$end>");

            # Seek to $start
            # (or the beginning of the block that $start is in,
            # if $start is not aligned with bs)
            $srcSeek = int( $start / $self->bs ) * $self->bs;
            sysseek( $srcFh, $srcSeek, SEEK_SET )
              || $self->logger->logcroak(
                "Cannot seek to block <$srcSeek> in src file");

            # Just die! Muhahaha. Or not
            my $die = 0;

            # Can be either sparse, new, unchanged or changed
            my $status;

            # Read block from source
            while ( my $srcReadSize = sysread( $srcFh, my $data, $self->bs ) ) {

                # It's ok to read a block smaller than bs if it's the last
                # block. But it's not ok if it's not the last.
                if ($die) {
                    croak 'not reading full block';
                }
                $die = $srcReadSize != $self->bs;

                # $block = block number in source with the specified block size
                my $block = $srcSeek / $self->bs;

                # We start by assuming that we should write to dst
                # - if dst is set (= we are not just calculating checksum)
                my $writeData = 1 && $dstFh;

                # We start be assuming that we should write checksum to chk
                my $writeHash = 1;

                # Calculate hash for data read from src
                my $newHash = &{ $self->hash }($data);

                # Get old hash for the same block
                sysseek( $chkFh, $block * $hashSize, SEEK_SET )
                  || $self->logger->logcroak('Cannot seek in chk file');
                my $oldHashSize = sysread( $chkFh, my $oldHash, $hashSize );

                # Test source against checksum
                if ( $oldHashSize != $hashSize || $oldHash eq "\0" x $hashSize )
                {
                    if ( $self->sparse && $newHash eq $nullHash ) {

                        # Sparse is only for new blocks
                        # Blocks that have been nulled out in source will
                        # also get nulled out in destination
                        $status    = 'sparse';
                        $writeData = 0;
                    }
                    else {
                        $status = 'new';
                    }
                }
                elsif ( $newHash eq $oldHash ) {
                    $status    = 'unchanged';
                    $writeData = 0;
                    $writeHash = 0;
                }
                else {
                    $status = 'changed';
                }

                # Write data to destination
                if ($writeData) {
                    sysseek( $dstFh, $srcSeek, SEEK_SET )
                      || $self->logger->logcroak('Cannot seek in dst file');
                    syswrite( $dstFh, $data );
                }

                # Update hash in checksum
                if ($writeHash) {
                    sysseek( $chkFh, $block * $hashSize, SEEK_SET )
                      || $self->logger->logcroak('Cannot seek in chk file');
                    syswrite( $chkFh, $newHash );
                }

                $self->logger->debug(
                    sprintf 'Block <%u> was <%s> (<%u> to <%u>)',
                    $block, $status, $srcSeek, $srcSeek + $srcReadSize - 1 );
                &{ $self->status }
                  ( $block, $status, $srcSeek, $srcSeek + $srcReadSize - 1 );

                # Next block will start here
                $srcSeek += $srcReadSize;

                # Was this the last block in this batch
                if ( $end && $srcSeek > $end ) {
                    last;
                }
            }

            # If last block is sparse, it is not enough to seek to where the
            # EOF should be. We need to at least write a single \0
            if ( $dstFh && $status eq 'sparse' ) {
                sysseek( $dstFh, $srcSeek - 1, SEEK_SET )
                  || $self->logger->logcroak('Cannot seek in dst file');
                syswrite( $dstFh, "\0" );
            }
        }

        if (   $self->truncate
            && $dstFh
            && $srcSeek
            && @{ $self->data } == 1
            && $self->data->[0]->{start} == 0
            && $self->data->[0]->{end} == 0 )
        {
            $self->logger->debug("Truncating dst file at <$srcSeek>");
            truncate( $dstFh, $srcSeek );

            my $chkSeek = ceil( $srcSeek / $self->bs ) * $hashSize;
            $self->logger->debug("Truncating chk file at <$chkSeek>");
            truncate( $chkFh, $chkSeek );
        }

    }
    catch {
        croak $_;
    }
    finally {
        # If we opened the files (we got string with path), then we close them
        # if we got a filehandle then we do nothing
        if ($srcClose) {
            $self->logger->debug( sprintf 'closing src file <%s>', $self->src );
        }
        if ($dstClose) {
            $self->logger->debug( sprintf 'closing dst file <%s>', $self->dst );
        }
        if ($chkClose) {
            $self->logger->debug( sprintf 'closing chk file <%s>', $self->chk );
        }
    };

    # Make Perl::Critic happy
    return;
}

################################################################

=begin comment

Private
Get file handle

=end comment

=cut

sub _getFh {
    my ( $self, $name, $file, $closeFile, $mode ) = @_;

    if ( my $t = reftype($file) ) {
        if ( $t eq 'GLOB' ) {
            $self->logger->debug(
                sprintf '%s is a file handle, using that directly', $name );
            return $file;
        }
        else {
            $self->logger->logcroak(
                sprintf '<%s> is not a supported type for %s',
                $t, $name );
        }
    }
    else {
        $self->logger->debug( sprintf 'opening %s file <%s>', $name, $file );
        sysopen( my $fh, $file, $mode )
          || $self->logger->logcroak( sprintf 'error opening <%s>', $file );
        ${$closeFile} = 1;
        return $fh;
    }

    # Make Perl::Critic happy
    croak 'We should never end here!';
}

################################################################

=head1 LICENSE AND COPYRIGHT

This software is copyright (c) 2019 by Thor Dreier-Hansen.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

Terms of the Perl programming language system itself:

=over

=item * the 
        L<GNU General Public License|http://dev.perl.org/licenses/gpl1.html>
        as published by the Free Software Foundation; either
        L<version 1|http://dev.perl.org/licenses/gpl1.html>,
        or (at your option) any later version, or

=item * the L<"Artistic License"|http://dev.perl.org/licenses/artistic.html>

=back

See L<http://dev.perl.org/licenses/> for more information.

=cut

1;    # End of IO::BlockSync