# Copyright (c) 2025 Löwenfelsen UG (haftungsbeschränkt)

# licensed under Artistic License 2.0 (see LICENSE file)

# ABSTRACT: generic module for extracting information from filesystems


package File::Information::Deep;

use v5.10;
use strict;
use warnings;

use parent 'File::Information::Base';

use Carp;
use Scalar::Util qw(weaken);

our $VERSION = v0.06;

my %_properties = (
    pdf_version     => {loader => \&_load_pdf},
    pdf_pages       => {loader => \&_load_pdf},
    odf_keywords    => {loader => \&_load_odf},
);

my @_odf_medadata_keys = qw(title description subject creator language initial_creator editing_cycles editing_duration generator creation_date date);
my @_image_info_keys   = qw(height width file_media_type file_ext color_type resolution SamplesPerPixel BitsPerSample Comment Interlace Compression Gamma LastModificationTime);
my @_image_extra_keys  = qw(Thumb::URI Thumb::Image::Width Thumb::Image::Height Thumb::MTime Software);
my @_dynamic_loaders   = (\&_load_odf, \&_load_audio_scan);

my %_audio_scan_tags = (
    vorbiscomments => {
        title => 'title',
    },
    riffwave => {
        title => 'inam',
    },
    id3 => {
        title => 'tit2',
    },
);

foreach my $keyword (qw(Author CreationDate ModDate Creator Producer Title Subject Keywords)) {
    $_properties{'pdf_info_'.lc($keyword)} = {loader => \&_load_pdf};
}
foreach my $keyword (qw(CreationDate ModDate)) {
    $_properties{'pdf_info_'.lc($keyword)}{parsing} = 'pdf_date';
}

foreach my $key (@_odf_medadata_keys) {
    $_properties{'odf_info_'.$key} = {loader => \&_load_odf};
}
foreach my $key (qw(creation_date date)) {
    $_properties{'odf_info_'.$key}{parsing} = 'iso8601';
}

foreach my $key (@_image_info_keys) {
     $_properties{'image_info_'.lc($key)} = {loader => \&_load_image_info};
}
foreach my $key (@_image_extra_keys) {
     $_properties{'image_info_extra_'.lc($key =~ s/::/_/r)} = {loader => \&_load_image_info};
}
$_properties{image_info_extra_thumb_mtime}{rawtype} = 'unixts';
$_properties{image_info_extra_thumb_uri}{rawtype} = 'uri';


#@returns File::Information::Base
sub parent {
    my ($self) = @_;
    return $self->{parent};
}

# ----------------
sub property_info {
    my ($self, @args) = @_;

    unless (defined $self->{_dynamic}) {
        $self->{_dynamic} = 1;
        foreach my $cb (@_dynamic_loaders) {
            $self->$cb('__dummy__');
        }
    }

    return $self->SUPER::property_info(@args);
}

sub _new {
    my ($pkg, %opts) = @_;
    my $self = $pkg->SUPER::_new(%opts, properties => \%_properties);
    my $pv = ($self->{properties_values} //= {})->{current} //= {};
    my $parent = $self->{parent};

    weaken($self->{parent});

    # copy a few critical values:
    $pv->{contentise} = {raw => $parent->get('contentise', lifecycle => 'current', as => 'uuid')};
    $pv->{mediatype}  = {raw => $parent->get('mediatype',  lifecycle => 'current', as => 'mediatype')};

    return $self;
}

sub _dynamic_property {
    my ($self, $prefix, $property) = @_;
    my $key;

    $property = lc($property);
    $property =~ s/::/_/g;
    $property =~ s/[^a-z0-9]/_/g;
    $_properties{$key = $prefix.'_'.$property} //= {};

    return $key;
}

sub _check_mediatype {
    my ($self, @mediasubtypes) = @_;
    my $v = $self->{properties_values}{current}{mediatype}{raw};

    foreach my $mediasubtype (@mediasubtypes) {
        return 1 if $v eq $mediasubtype;
    }

    return undef;
}

sub _pdf_extract_date {
    my ($self, $value) = @_;
    require DateTime::Format::Strptime;

    state $pdf_date_core_pattern = '%Y%m%d%H%M%S';
    state $pdf_date_format_0 = DateTime::Format::Strptime->new('pattern' => $pdf_date_core_pattern, 'time_zone' => 'UTC');
    my $dt;
    my $core;
    my $parser;

    # General format: D:YYYYMMDDHHmmSSOHH'mm'

    if (($core) = $value =~ /^D:([0-9]{14})Z'{0,2}$/) {
        $parser = $pdf_date_format_0;
    } elsif (my ($mycore, $tz_dir, $tz_h, $tz_m) = $value =~ /^D:([0-9]{14})(\+|\-)([0-9]{2})'([0-9]{2})'$/) {
        my $tz = sprintf('%s%s%s', $tz_dir, $tz_h, $tz_m);
        $core = $mycore;
        $parser = DateTime::Format::Strptime->new('pattern' => $pdf_date_core_pattern, 'time_zone' => $tz);
    }

    return undef unless defined($core) && defined($parser);

    return $parser->parse_datetime($core);
}

sub _load_pdf {
    my ($self, $key, %opts) = @_;
    my $pv = ($self->{properties_values} //= {})->{current} //= {};

    return if defined $self->{_loaded_pdf};
    $self->{_loaded_pdf} = 1;

    return unless defined $self->{path};
    return unless $self->_check_mediatype('application/pdf');

    # Check for module;
    if (eval {
            require PDF::API2;
            PDF::API2->import();
            1;
        }) {
        my $pdf = PDF::API2->open($self->{path});
        my %info = $pdf->info_metadata();

        $pv->{pdf_version} = {raw => $pdf->version};
        $pv->{pdf_pages}   = {raw => $pdf->page_count};

        foreach my $key (keys %info) {
            if (defined(my $value = $info{$key})) {
                my $pv_key = 'pdf_info_'.lc($key);

                $value = $self->_pdf_extract_date($value) if ($_properties{$pv_key}{parsing} // '') eq 'pdf_date';
                $pv->{$pv_key} = {raw => $value};
            }
        }

        $pdf->close;
    }
}

sub _load_odf {
    my ($self, $key, %opts) = @_;
    my $pv = ($self->{properties_values} //= {})->{current} //= {};

    return if defined $self->{_loaded_odf};
    $self->{_loaded_odf} = 1;

    return unless defined $self->{path};
    return unless $self->_check_mediatype(qw(application/vnd.oasis.opendocument.text));

    # Check for module;
    if (eval {
            require OpenOffice::OODoc;
            require DateTime::Format::ISO8601;
            OpenOffice::OODoc->import();
            DateTime::Format::ISO8601->import();
            1;
        }) {
        my $document = odfDocument(file => $self->{path});
        my $meta = odfMeta(file => $document);

        foreach my $key (@_odf_medadata_keys) {
            my $func = $meta->can($key);
            my $value = $meta->$func();
            my $pv_key = 'odf_info_'.$key;
            next unless defined($value) && length($value);

            $value = DateTime::Format::ISO8601->parse_datetime($value) if ($_properties{$pv_key}{parsing} // '') eq 'iso8601';

            $pv->{$pv_key} = {raw => $value};
        }

        $pv->{odf_keywords} = [map {{raw => $_}} $meta->keywords];
        delete $pv->{odf_keywords} unless scalar @{$pv->{odf_keywords}};

        {
            my %stats = $meta->statistic;
            foreach my $key (keys %stats) {
                my $pv_key = $self->_dynamic_property(odf_stats => $key);
                my $value = $stats{$key};
                next unless defined($value) && length($value);
                $pv->{$pv_key} = {raw => $value};
            }
        }

        foreach my $el ($meta->getUserPropertyElements) {
            my $pv_key = $self->_dynamic_property(odf_user_properties => $el->att('meta:name'));
            my $value = $el->text;
            $pv->{$pv_key} = {raw => $value};
        }
    }
}

sub _load_image_info {
    my ($self, $key, %opts) = @_;
    my $pv = ($self->{properties_values} //= {})->{current} //= {};

    return if defined $self->{_loaded_image_info};
    $self->{_loaded_image_info} = 1;

    return unless defined $self->{path};

    foreach my $data (eval {
            require Image::Info;
            Image::Info->import();
            Image::Info::image_info($self->{path});
        }) {
        next if defined($data->{error}) && length($data->{error});

        foreach my $key (@_image_info_keys) {
            my $pv_key = 'image_info_'.lc($key);
            my $value = delete $data->{$key};

            next unless defined($value) && length($value);

            $pv->{$pv_key} = {raw => $value};
        }
        foreach my $key (@_image_extra_keys) {
            my $pv_key = 'image_info_extra_'.lc($key =~ s/::/_/r);
            my $value = delete $data->{$key};

            next unless defined($value) && length($value);

            $pv->{$pv_key} = {raw => $value};
        }
    }
}

sub _load_audio_scan {
    my ($self, $key, %opts) = @_;
    my $pv = ($self->{properties_values} //= {})->{current} //= {};

    return if defined $self->{_loaded_audio_scan};
    $self->{_loaded_audio_scan} = 1;

    return unless defined $self->{path};

    # Check for module;
    if (defined(my $data = eval {
                local $ENV{AUDIO_SCAN_NO_ARTWORK} = 1;
                require Audio::Scan;
                Audio::Scan->import();
                Audio::Scan->scan($self->{path});
            })) {
        my $info = $data->{info};
        my $tags = $data->{tags};

        foreach my $key (keys %{$info}) {
            my $value = $info->{$key};
            my $pv_key;

            next unless defined($value) && length($value);
            next if ref $value;

            $pv_key = $self->_dynamic_property(audio_scan_info => $key);
            $pv->{$pv_key} = {raw => $value};
        }

        foreach my $key (keys %{$tags}) {
            my $value = $tags->{$key};
            my $pv_key;

            next unless defined($value) && length($value);
            next if ref $value;

            $pv_key = $self->_dynamic_property(audio_scan_tags => $key);
            $pv->{$pv_key} = {raw => $value};
        }

        {
            my $style;

            if ($self->_check_mediatype(qw(application/ogg audio/ogg video/ogg audio/flac))) {
                $style = 'vorbiscomments';
            } elsif ($self->_check_mediatype(qw(audio/x-wav))) {
                $style = 'riffwave';
            } else {
                $style = 'id3'; # bad guess
            }

            if (defined($style) && defined(my $map = $_audio_scan_tags{$style})) {
                foreach my $key (keys %{$map}) {
                    my $src_pv_key  = $self->_dynamic_property(audio_scan_tags => $map->{$key});
                    my $pv_key      = $self->_dynamic_property(audio_scan => $key);
                    my $value       = $pv->{$src_pv_key};

                    if (defined($value) && ref($value) eq 'HASH' && defined($value->{raw})) {
                        $pv->{$pv_key} = {raw => $value->{raw}};
                    }
                }
            }
        }
    }
}

1;

__END__

=pod

=encoding UTF-8

=head1 NAME

File::Information::Deep - generic module for extracting information from filesystems

=head1 VERSION

version v0.06

=head1 SYNOPSIS

    use File::Information;

    my File::Information::Deep $deep = $obj->deep;

    # ...

B<Note:> This package inherits from L<File::Information::Base>.

This package allows for deep inspection of files.
This permits to read data directly from files, not just external metadata
(such as filesystem attributes).
This however comes at the price of performance.

B<Note:>
If you want to use data from deep inspection, you need to load this object (by calling C<$obj-E<gt>deep>)
before calling any L<File::Information::Base/get> or similar methods.

=head1 METHODS

=head2 parent

    my File::Information::Base $parent = $editor->parent;

Returns the parent that was used to create this object.

=head1 AUTHOR

Löwenfelsen UG (haftungsbeschränkt) <support@loewenfelsen.net>

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2024-2025 by Löwenfelsen UG (haftungsbeschränkt) <support@loewenfelsen.net>.

This is free software, licensed under:

  The Artistic License 2.0 (GPL Compatible)

=cut