The Perl Toolchain Summit 2025 Needs You: You can help 🙏 Learn more

$File::Sticker::Scribe::Exif::VERSION = '4.0101';
=head1 NAME
File::Sticker::Scribe::Exif - read, write and standardize meta-data from EXIF file
=head1 VERSION
version 4.0101
=head1 SYNOPSIS
use File::Sticker::Scribe::Exif;
my $obj = File::Sticker::Scribe::Exif->new(%args);
my %meta = $obj->read_meta($filename);
$obj->write_meta(%args);
=head1 DESCRIPTION
This will write meta-data from EXIF files, and standardize it to a common
nomenclature, such as "tags" for things called tags, or Keywords or Subject etc.
=cut
use v5.10;
use Carp;
use Image::ExifTool qw(:Public);
use List::MoreUtils qw(uniq);
BEGIN {
# Set the user-defined fields for EXIF
%Image::ExifTool::UserDefined::sticker = (
GROUPS => { 0 => 'XMP', 1 => 'XMP-sticker', 2 => 'Image' },
NAMESPACE => { 'sticker' => 'http://ns.katspace.org/sticker/1.0/' },
WRITABLE => 'string',
# To maximize flexibility, this is going to be a plain string
# which we will populate with YAML data.
FreeFields => { },
);
%Image::ExifTool::UserDefined = (
# new XMP namespaces (ie. XMP-xxx) must be added to the Main XMP table:
'Image::ExifTool::XMP::Main' => {
sticker => {
SubDirectory => {
TagTable => 'Image::ExifTool::UserDefined::sticker'
},
},
}
);
Image::ExifTool::XMP::RegisterNamespace(\%Image::ExifTool::UserDefined::sticker);
}
# FOR DEBUGGING
=head1 DEBUGGING
=head2 whoami
Used for debugging info
=cut
sub whoami { ( caller(1) )[3] }
=head1 METHODS
=head2 init
Initialize the object.
$scribe->init(wanted_fields=>{title=>'TEXT',count=>'NUMBER',tags=>'MULTI'});
=cut
sub init {
my $self = shift;
my %parameters = @_;
$self->SUPER::init(%parameters);
} # init
=head2 priority
The priority of this writer. Scribes with higher priority get tried first.
=cut
sub priority {
my $class = shift;
return 1;
} # priority
=head2 allowed_file
If this scribe can be used for the given file, then this returns true.
File must be one of: PDF or an image which is not a GIF.
(GIF files need to be treated separately)
(ExifTool can't write to EPUB)
=cut
sub allowed_file {
my $self = shift;
my $file = shift;
say STDERR whoami(), " file=$file" if $self->{verbose} > 2;
$file = $self->_get_the_real_file(filename=>$file);
my $ft = $self->{file_magic}->info_from_filename($file);
if ($ft->{mime_type} =~ /(image|pdf)/
and $ft->{mime_type} !~ /gif/)
{
return 1;
}
return 0;
} # allowed_file
=head2 known_fields
Returns the fields which this scribe knows about.
my $known_fields = $scribe->known_fields();
=cut
sub known_fields {
my $self = shift;
return {
title=>'TEXT',
creator=>'TEXT',
description=>'TEXT',
location=>'TEXT',
tags=>'MULTI',
%{$self->{wanted_fields}},
};
} # known_fields
=head2 readonly_fields
Returns the fields which this scribe knows about, which can't be overwritten,
but are allowed to be "wanted" fields. Things like file-size etc.
my $readonly_fields = $scribe->readonly_fields();
=cut
sub readonly_fields {
my $self = shift;
return {
date=>'TEXT',
copyright=>'TEXT',
flash=>'TEXT',
filesize=>'NUMBER',
imagesize=>'TEXT',
imageheight=>'NUMBER',
imagewidth=>'NUMBER',
megapixels=>'NUMBER'};
} # readonly_fields
=head2 read_meta
Read the meta-data from the given file.
my $meta = $obj->read_meta($filename);
=cut
sub read_meta {
my $self = shift;
my $filename = shift;
say STDERR whoami(), " filename=$filename" if $self->{verbose} > 2;
$filename = $self->_get_the_real_file(filename=>$filename);
my $exif_options = {DateFormat => "%Y-%m-%d %H:%M:%S"};
my $info = ImageInfo($filename,$exif_options);
my %meta = ();
# Check if this is a Gutenberg book; they have quirks.
my $is_gutenberg_book = 0;
if (exists $info->{Identifier}
and $info->{'Identifier'} =~ m!http://www.gutenberg.org/ebooks/\d+!)
{
$is_gutenberg_book = 1;
# If this is a Gutenberg book, the Identifier holds the correct URL
$meta{'url'} = $info->{'Identifier'};
}
# There are multiple fields which could be used as a file "description".
# Check through them until you find a non-empty one.
my $description = '';
foreach my $field (qw(Caption-Abstract Comment UserComment ImageDescription Description))
{
if (exists $info->{$field}
and $info->{$field}
and $info->{$field} !~ /^---/ # YAML - not a description!
and !$description)
{
$description = $info->{$field};
$description =~ s/\n$//; # remove trailing newlines
}
}
$meta{description} = $description if $description;
# There are multiple fields which could be used as a file content creator.
# Check through them until you find a non-empty one.
my $creator = '';
foreach my $field (qw(Author Artist Creator))
{
if (exists $info->{$field} and $info->{$field} and !$creator)
{
$creator = $info->{$field};
}
}
$meta{creator} = $creator if $creator;
# There are multiple fields which could be used as a copyright notice.
# Check through them until you find a non-empty one.
my $copyright = '';
foreach my $field (qw(License Rights))
{
if (exists $info->{$field} and $info->{$field} and !$copyright)
{
$copyright = $info->{$field};
}
}
$meta{copyright} = $copyright if $copyright;
# There are multiple fields which could be used as a file date.
# Check through them until you find a non-empty one.
my $date = '';
foreach my $field (qw(CreateDate DateTimeOriginal Date PublishedDate PublicationDate))
{
if (exists $info->{$field} and $info->{$field} and !$date)
{
$date = $info->{$field};
}
}
$meta{date} = $date if $date;
# Use a consistent naming for tag fields.
# Combine the tag-like fields together.
# Preserve the order and check for dupicates later with uniq
my @tags = ();
foreach my $field (qw(Keywords Subject))
{
if (exists $info->{$field} and $info->{$field})
{
my $val = $info->{$field};
my @these_tags;
if ($is_gutenberg_book)
{
# gutenberg tags are multi-word, separated by comma-space or ' -- '
# and can have parens in them
$val =~ s/\(//g;
$val =~ s/\)//g;
$val =~ s/\s--\s/,/g;
@these_tags = split(/,\s?/, $val);
}
else
{
@these_tags = split(/,\s*/, $val);
}
foreach my $t (@these_tags)
{
$t =~ s/ - / /g; # remove isolated dashes
$t =~ s/[^\w\s,-]//g; # remove non-word characters
push @tags, $t;
}
}
}
# Are there any tags?
if (@tags)
{
# remove duplicates
$meta{tags} = [uniq @tags];
}
else # remove empty tag-field
{
delete $meta{tags};
}
# There are SOOOOOO many fields in EXIF data, just remember a subset of them
foreach my $field (qw(
Flash
ImageHeight
ImageSize
ImageWidth
Megapixels
PageCount
Location
Title
))
{
if (exists $info->{$field} and $info->{$field})
{
$meta{lc($field)} = $info->{$field};
}
}
# -------------------------------------------------
# Freeform Fields
# These are stored as YAML data in the XMP-sticker:FreeFields field.
# They used to be stored in the XMP:Description field,
# before then the ImageDescription field, before then the UserComment field
# so they need to be checked too.
# -------------------------------------------------
if (exists $info->{FreeFields}
and $info->{FreeFields}
and $info->{FreeFields} =~ /^---/)
{
say STDERR sprintf("FreeFields='%s'", $info->{FreeFields}) if $self->{verbose} > 2;
my $data;
eval {$data = Load($info->{FreeFields});};
if ($@)
{
warn __PACKAGE__, " Load of YAML data failed: $@";
}
elsif (!$data)
{
warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 2;
}
else # okay
{
foreach my $field (sort keys %{$data})
{
$meta{$field} = $data->{$field};
}
}
}
elsif (exists $info->{Description}
and $info->{Description}
and $info->{Description} =~ /^---/)
{
say STDERR sprintf("Description='%s'", $info->{Description}) if $self->{verbose} > 2;
my $data;
eval {$data = Load($info->{Description});};
if ($@)
{
warn __PACKAGE__, " Load of YAML data failed: $@";
}
elsif (!$data)
{
warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 2;
}
else # okay
{
foreach my $field (sort keys %{$data})
{
$meta{$field} = $data->{$field};
}
}
}
elsif (exists $info->{ImageDescription}
and $info->{ImageDescription}
and $info->{ImageDescription} =~ /^---/)
{
say STDERR sprintf("ImageDescription='%s'", $info->{ImageDescription}) if $self->{verbose} > 2;
my $data;
eval {$data = Load($info->{ImageDescription});};
if ($@)
{
warn __PACKAGE__, " Load of YAML data failed: $@";
}
elsif (!$data)
{
warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 2;
}
else # okay
{
foreach my $field (sort keys %{$data})
{
$meta{$field} = $data->{$field};
}
}
}
elsif (exists $info->{UserComment}
and $info->{UserComment}
and $info->{UserComment} =~ /^---/)
{
say STDERR sprintf("UserComment='%s'", $info->{UserComment}) if $self->{verbose} > 2;
my $data;
eval {$data = Load($info->{UserComment});};
if ($@)
{
warn __PACKAGE__, " Load of YAML data failed: $@";
}
elsif (!$data)
{
warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 2;
}
else # okay
{
foreach my $field (sort keys %{$data})
{
$meta{$field} = $data->{$field};
}
}
}
return \%meta;
} # read_meta
=head1 Helper Functions
Private interface.
=head2 replace_one_field
Overwrite the given field. This does no checking.
$writer->replace_one_field(filename=>$filename,field=>$field,value=>$value);
=cut
sub replace_one_field {
my $self = shift;
my %args = @_;
say STDERR whoami(), " field=$args{field},value=$args{value}" if $self->{verbose} > 2;
my $filename = $self->_get_the_real_file(filename=>$args{filename});
my $field = $args{field};
my $value = $args{value};
my $ft = $self->{file_magic}->info_from_filename($filename);
my $et = new Image::ExifTool;
$et->Options(ListSep=>',',ListSplit=>',');
$et->ExtractInfo($filename);
my $success;
if ($field eq 'creator')
{
$success = $et->SetNewValue('Creator', $value);
}
elsif ($field eq 'copyright')
{
$success = $et->SetNewValue('License', $value);
}
elsif ($field eq 'title')
{
$success = $et->SetNewValue('Title', $value);
}
elsif ($field eq 'location')
{
$success = $et->SetNewValue('Location', $value);
}
elsif ($field eq 'description')
{
# Okay, here's the messy relationship between the description,
# the freeform data, and GIMP.
#
# I originally wrote the freeform data in the UserComment field, and
# the description into the Description and Comment fields, then I found
# that GIMP uses the UserComment field as the description field at a
# higher priority than the Comment field.
#
# So then I wrote the freeform data in the ImageDescription field,
# then I found that GIMP ALSO uses THAT field as the description field
# at a higher priority than the Comment field.
#
# And that GIMP overwrites ALL THREE fields (Comment, UserComment,
# ImageDescription) with what it considers the description
# when saving a file.
#
# So then I used the XMP:Description (Description) field for the
# freeform data, because GIMP neither reads nor overwrites that.
#
# But then I discovered you could DEFINE YOUR OWN fields in EXIF,
# so I defined XMP-sticker:FreeFields to put my freeform YAML data into.
# Before the decription is written, the freeform data
# needs to be converted to its new home.
$self->_convert_freeform_data(exif=>$et);
# GIMP reads and overwrites the Comment, UserComment
# and ImageDescription fields, so we need to do that too.
# Escpecially since GIMP does not look at the Comment field
# if one of the other two is not empty.
$success = $et->SetNewValue('UserComment', $value);
$success = $et->SetNewValue('ImageDescription', $value);
if ($ft->{mime_type} =~ /image\/jpeg/)
{
$success = $et->SetNewValue('Comment', $value);
}
}
elsif ($field eq 'tags')
{
if (ref $value eq 'ARRAY')
{
$success = $et->SetNewValue('Keywords', $value);
$success = $et->SetNewValue('Subject', $value);
}
else
{
my @tags = split(/,/,$value);
$success = $et->SetNewValue('Keywords', \@tags);
$success = $et->SetNewValue('Subject', \@tags);
}
}
else # freeform field
{
# Need to read all the YAML, change this field, and write it again
my $fdata = $self->_read_freeform_data(exif=>$et);
$fdata->{$field} = $value;
$success = $self->_write_freeform_data(newdata=>$fdata,exif=>$et);
}
if ($success)
{
$et->WriteInfo($filename);
}
return $success;
} # replace_one_field
=head2 delete_field_from_file
Completely remove the given field. This does no checking.
$writer->delete_field_from_file(filename=>$filename,field=>$field);
=cut
sub delete_field_from_file {
my $self = shift;
my %args = @_;
say STDERR whoami(), " field=$args{field}" if $self->{verbose} > 2;
my $filename = $self->_get_the_real_file(filename=>$args{filename});
my $field = $args{field};
my $ft = $self->{file_magic}->info_from_filename($filename);
my $et = new Image::ExifTool;
$et->Options(ListSep=>',',ListSplit=>',');
$et->ExtractInfo($filename);
my $success;
if ($field eq 'creator')
{
$success = $et->SetNewValue('Creator')
}
elsif ($field eq 'title')
{
$success = $et->SetNewValue('Title')
}
elsif ($field eq 'description')
{
# GIMP reads and overwrites the Comment, UserComment
# and ImageDescription fields, so we need to do that too.
if ($ft->{mime_type} =~ /image\/jpeg/)
{
$success = $et->SetNewValue('Comment');
}
$success = $et->SetNewValue('ImageDescription');
$success = $et->SetNewValue('UserComment');
}
elsif ($field eq 'tags')
{
$success = $et->SetNewValue('Keywords');
$success = $et->SetNewValue('Subject');
}
else # freeform field
{
# Need to read all the YAML, change this field, and write it again
my $fdata = $self->_read_freeform_data(exif=>$et);
if (exists $fdata->{$field})
{
delete $fdata->{$field};
$success = $self->_write_freeform_data(newdata=>$fdata,exif=>$et);
}
}
if ($success)
{
$et->WriteInfo($filename);
}
return $success;
} # delete_field_from_file
=head2 _get_the_real_file
If the file is a directory, look for a cover file.
If the file is a soft link, look for the file it is pointing to
(because ExifTool behaves badly with soft links).
my $real_file = $scribe->_get_the_real_file(filename=>$filename);
=cut
sub _get_the_real_file {
my $self = shift;
my %args = @_;
say STDERR whoami(), " filename=$args{filename}" if $self->{verbose} > 2;
my $filename = $args{filename};
if (-d $filename) # is a directory, look for a cover file
{
my $cover_file = ($self->{cover_file} ? $self->{cover_file} : 'cover.jpg');
$cover_file = File::Spec->catfile($filename, $cover_file);
if (-f $cover_file)
{
$filename = $cover_file;
}
else # give up and die
{
croak "$args{filename} is directory, cannot find $cover_file";
}
}
# ExifTool has a wicked habit of replacing soft-linked files with the
# contents of the file rather than honouring the link. While using the
# exiftool script offers -overwrite_original_in_place to deal with this,
# the Perl module does not appear to have such an option available.
# So the way to get around this is to check if the file is a soft link, and
# if it is, find the real file, and write to that. And if *that* file is
# a soft link... go down the rabbit-hole as deep as it goes.
while (-l $filename)
{
my $realfile = readlink $filename;
if (-f $realfile)
{
$filename = $realfile;
}
else # give up and die
{
croak "$args{filename} is soft link, cannot find $realfile";
}
}
return $filename;
} # _get_the_real_file
=head2 _read_freeform_data
Read the freeform data as YAML data from the XMP-sticker:FreeFields field.
my $ydata = $self->_read_freeform_data(exif=>$exif);
=cut
sub _read_freeform_data {
my $self = shift;
my %args = @_;
say STDERR whoami() if $self->{verbose} > 2;
# CONVERT FREEFORM DATA if needed BEFOREHAND
$self->_convert_freeform_data(%args);
my $ydata;
my $et = $args{exif};
my $ystring = $et->GetValue('FreeFields');
$ystring = $et->GetNewValue('FreeFields') if !$ystring;
say STDERR "ystring=$ystring" if $self->{verbose} > 2;
if ($ystring and $ystring =~ /^---/) # YAML data needs prefix
{
eval {$ydata = Load($ystring);};
if ($@)
{
warn __PACKAGE__, " Load of YAML data failed: $@ // '$ystring'";
}
elsif (!$ydata)
{
warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 1;
}
}
say STDERR Dump($ydata) if $self->{verbose} > 2;
return $ydata;
} # _read_freeform_data
=head2 _write_freeform_data
Write the freeform data as YAML data into the XML-sticker:FreeFields field
This overwrites whatever is there, it does not check.
$self->_write_freeform_data(newdata=>\%newdata,exif=>$exif);
=cut
sub _write_freeform_data {
my $self = shift;
my %args = @_;
say STDERR whoami() if $self->{verbose} > 2;
my $newdata = $args{newdata};
my $et = $args{exif};
# restore multi-value comma-separated fields to arrays
foreach my $fn (keys %{$self->{wanted_fields}})
{
if ($self->{wanted_fields}->{$fn} eq 'MULTI'
and exists $newdata->{$fn}
and defined $newdata->{$fn}
and $newdata->{$fn} =~ /,/)
{
my @vals = split(/,/, $newdata->{$fn});
$newdata->{$fn} = \@vals;
}
}
my $ystring = Dump($newdata);
say STDERR "ystring=$ystring" if $self->{verbose} > 2;
my $success = $et->SetNewValue('XMP-sticker:FreeFields', $ystring);
return $success;
} # _write_freeform_data
=head2 _convert_freeform_data
Convert the freeform data so that it is placed into the XMP-sticker:FreeFields
field rather than the XMP:Description, UserComment or ImageDescription field.
$self->_convert_freeform_data(exif=>$exif);
=cut
sub _convert_freeform_data {
my $self = shift;
my %args = @_;
say STDERR whoami() if $self->{verbose} > 2;
my $et = $args{exif};
# Check if it needs conversion at all.
# If the XMP-sticker:FreeFields field is not empty
# and contains YAML data, then nothing needs to be done.
my $ystring = $et->GetValue('XMP-sticker:FreeFields');
$ystring = $et->GetNewValue('XMP-sticker:FreeFields') if !$ystring;
if ($ystring and $ystring =~ /^---/) # Assume YAML data
{
# no conversion needed
return 1;
}
# ------------------------------------
# Conversion needed
# Read from XMP:Description, write into XMP-sticker:FreeFields
# Otherwise read from ImageDescription.
# The YAML data might be in UserComment instead if old.
# ------------------------------------
$ystring = $et->GetValue('Description');
$ystring = $et->GetNewValue('Description') if !$ystring;
if (!$ystring or $ystring !~ /^---/) # Try ImageDescription
{
$ystring = $et->GetValue('ImageDescription');
$ystring = $et->GetNewValue('ImageDescription') if !$ystring;
}
if (!$ystring or $ystring !~ /^---/) # Try UserComment
{
$ystring = $et->GetValue('UserComment');
$ystring = $et->GetNewValue('UserComment') if !$ystring;
}
my $ydata;
my $success = 0;
if ($ystring and $ystring =~ /^---/) # Probably YAML data
{
# Check if the YAML data is valid
eval {$ydata = Load($ystring);};
if ($@)
{
warn __PACKAGE__, " Load of YAML data failed: $@ ## '$ystring'";
}
elsif (!$ydata)
{
warn __PACKAGE__, " no legal YAML" if $self->{verbose} > 1;
}
else # data is okay
{
$success = $et->SetNewValue('XMP-sticker:FreeFields', $ystring);
if ($success)
{
# Clear out the XMP:Description field
$et->SetNewValue('XMP:Description');
# Put the description, if there is one,
# into the UserComment, ImageDescription
my $desc = $et->GetValue('Comment');
$desc = $et->GetNewValue('Comment') if !$desc;
$desc = $et->GetValue('Caption-Abstract') if !$desc;
if ($desc)
{
$et->SetNewValue('UserComment', $desc);
$et->SetNewValue('ImageDescription', $desc);
}
else
{
# Otherwise clear out no longer valid data
$et->SetNewValue('ImageDescription');
$et->SetNewValue('UserComment');
}
}
}
}
else # No YAML data to convert
{
# Put some empty data in there.
my %newdata = ();
my $nystring = Dump(\%newdata);
$success = $et->SetNewValue('XMP-sticker:FreeFields', $nystring);
}
return $success;
} # _convert_freeform_data
=head1 BUGS
Please report any bugs or feature requests to the author.
=cut
1; # End of File::Sticker::Scribe
__END__