###########################################################
# A Perl package for showing/modifying JPEG (meta)data. #
# Copyright (C) 2004,2005,2006 Stefano Bettelli #
# See the COPYING and LICENSE files for license terms. #
###########################################################
package Image::MetaData::JPEG::Segment;
use Image::MetaData::JPEG::data::Tables
qw(:JPEGgrammar :Endianness :RecordTypes);
use Image::MetaData::JPEG::Backtrace;
use Image::MetaData::JPEG::Record;
no integer;
use strict;
use warnings;
###########################################################
# These simple methods should be used instead of standard #
# "warn" and "die" in this package; they print a much #
# more elaborated error message (including a stack trace).#
# Warnings can be turned off altogether simply by setting #
# Image::MetaData::JPEG::show_warnings to false. #
###########################################################
sub warn { my ($this, $message) = @_;
warn Image::MetaData::JPEG::Backtrace::backtrace
($message, "Warning" . $this->info(), $this)
if $Image::MetaData::JPEG::show_warnings; }
sub die { my ($this, $message) = @_;
die Image::MetaData::JPEG::Backtrace::backtrace
($message, "Fatal error" . $this->info(), $this);}
sub info { my ($this) = @_; my $name = $this->{name} || '<no name>';
return " [segment type $name]"; }
###########################################################
# JPEG segment header constructor. Its arguments are: the #
# segment type (a multicharacter string, not the marker), #
# a reference to a raw data buffer and a parse flag. The #
# raw buffer is saved internally through its reference #
# (no copy is done). If its parse flag does not match #
# "NOPARSE", and its type is parseable, the Segment has #
# its key-value pairs extracted to JPEG::Record's in the #
# 'records' list. #
#=========================================================#
# The first four bytes in the Segment mean: #
# #
# 2 bytes segment marker (0xff..) #
# 2 bytes length (including this value) #
# #
# The marker is a two byte value, whose first byte is #
# always 0xff. The value of the second byte defines the #
# segment type. It is assumed that the buffer which is #
# passed to this constructor DOES NOT contain these four #
# bytes; in fact, the segment type can be deduced by its #
# symbolic name (first argument), and the buffer size can #
# be calculated with the length() function. This simpli- #
# fies a lot of repetitive code, but it must be kept in #
# mind when the file is written back to the filesystem. #
#=========================================================#
# $this->{endianness} (a private field) contains the #
# current endianness, i.e. the endianness to be used for #
# reading the next values while parsing the data area. #
# Its significant is therefore only transient, and it is #
# set to undef at the end of the constructor. #
#=========================================================#
# $this->{error} is normally set to "undef". If, however, #
# an error occurred during the parsing stage in the cons- #
# tructor, this variable is set to an error message. The #
# intended use is the following: a Segment with errors #
# can be inspected (partially, of course, because parsing #
# did not terminate correctly) but not modified (that is, #
# the update method, which overwrites the area pointed to #
# by $this->{dataref}, must be inhibited): it can only be #
# rewritten to disk as it is. #
###########################################################
sub new {
my ($pkg, $name, $dataref, $flag) = @_;
# if $dataref is undef, point it to a *modifiable* empty string
my $this = bless {
name => $name,
dataref => defined $dataref ? $dataref : \ (my $ns = ''),
records => [],
error => undef,
endianness => undef,
}, $pkg;
# die on various error conditions
$this->die('Invalid segment name') unless defined $name && ! ref $name;
$this->die('Invalid data reference') if defined $dataref && ! ref $dataref;
# parse the segment (pass the $flag)
$this->parse($flag);
# return a reference to the constructed object
return $this;
}
###########################################################
# This method parses or reparses the current segment. It #
# only dispatches the flow to specific subroutines based #
# on the segment name. The error flag is reset to undef #
# before parse_*, so that, at the end, it reflects only #
# errors occurred during this parse session. If the $flag #
# argument is set to "NOPARSE", this method simulates an #
# error and refuses to proceed further. The parsed data #
# array "@records" is flushed when entering this routine. #
#=========================================================#
# Segment parsing is enclosed in an eval block, so that #
# errors are not fatal (they work as trapped exceptions, #
# and the die-string is converted into a message). #
#=========================================================#
# See also the notes in the constructor about the private #
# var. $this->{endianness} and the use of $this->{error}. #
###########################################################
sub parse {
my ($this, $flag) = @_;
# locally set endianness to big endian
local $this->{endianness} = $BIG_ENDIAN;
# reset the error flag and clear the data set
$this->{error} = undef;
$this->{records} = [];
# call the specific parse routines inside an eval block,
# so that errors are not fatal...
eval {
# if $flag matches "NOPARSE", we don't need to parse
goto STOP_PARSING if ($flag && $flag =~ /NOPARSE/);
# this is a stupid Perl-style switch
for ($this->{name}) {
# parse all informative tags
$_ eq 'COM' ? $this->parse_com() : # User comments
$_ eq 'APP0' ? $this->parse_app0() : # JFIF
$_ eq 'APP1' ? $this->parse_app1() : # Exif or XMP
$_ eq 'APP2' ? $this->parse_app2() : # FPXR or ICC_Prof
$_ eq 'APP3' ? $this->parse_app3() : # Additonal metadata
$_ eq 'APP4' ? $this->parse_unknown() : # HPSC
$_ eq 'APP12' ? $this->parse_app12() : # PreExif ascii meta
$_ eq 'APP13' ? $this->parse_app13() : # IPTC and Photoshop
$_ eq 'APP14' ? $this->parse_app14() : # Adobe tags
# parse all JPEG image tags (SOI, EOI and RST* are trivial)
/^(SOI|EOI|RST)$/ ? do { /nothing/ } :
$_ eq 'DQT' ? $this->parse_dqt() :
$_ eq 'DHT' ? $this->parse_dht() :
$_ eq 'DAC' ? $this->parse_dac() :
/^SOF|DHP/ ? $this->parse_sof() :
$_ eq 'SOS' ? $this->parse_sos() :
$_ eq 'DNL' ? $this->parse_dnl() :
$_ eq 'DRI' ? $this->parse_dri() :
$_ eq 'EXP' ? $this->parse_exp() :
# this is the fallback case
$this->parse_unknown(); };
STOP_PARSING:
};
# parsing was ok if no error was catched by the eval.
# Update the "error" member here to reflect this fact.
$this->{error} = $@ if $@;
}
###########################################################
# This method re-executes the parsing of a segment after #
# changing the segment nature (well, its name). This is #
# very handy if you have a JPEG file with a correct appli-#
# cation segment exception made for its name. I used it #
# the first time for a file having an ICC_profile segment #
# (usually in APP2) stored as APP13. Note that the name #
# of the segment is permanently changed, so, if the file #
# is rewritten to disk, it will be "correct". #
###########################################################
sub reparse_as {
my ($this, $new_name) = @_;
# change the nature of this segment by overwriting its name
$this->{name} = $new_name;
# re-execute the parsing
$this->parse();
}
###########################################################
# This method is the entry point for dumping the data #
# structures stored in the records into the private data #
# area. This method needs to be called before rewriting a #
# file to the disk, if any record was changed/added/elimi-#
# nated. The routine dispatches to more specific methods. #
# ------------------------------------------------------- #
# A segment with errors cannot be updated (a security #
# measure: do not update what you do not understand). #
# Entropy-coded segments or past-the-end garbage do not #
# need being updated: the method returns immediately. #
# ------------------------------------------------------- #
# update() saves a reference to the old segment data area #
# and restores it if the specialised update routine fails.#
# This only generate a warning! Are there cleverer ways #
# to handle this case? It is however better to have a #
# corrupt object in memory, than a corrupt object written #
# over the original. Currently, this is restricted to the #
# possibility that an updated segment becomes too large. #
###########################################################
sub update {
my ($this) = @_;
# get the name of the segment
my $name = $this->{name};
# return immediately if this is an entropy-coded segment or
# past-the-end garbage. There is no need to "update" them
return if $name =~ /ECS|Post-EOI/;
# if the segment was not correctly parsed, warn and return
$this->die('This segment is faulty') if $this->{error};
# this might come also from 'NOPARSE'
$this->die('This segment has no records') unless @{$this->{records}};
# save a copy of the old data area.
my $old_content = $this->{dataref};
# blank the data area (do not assign directly to a reference to the
# null string, since it is not modifiable in some implementations!)
$this->{dataref} = \ (my $ns = '');
# an error variable for specific update routines
my $error = undef;
# call more specific routines for segments we know how
# to update. Generate an error if the type is not managed.
# (SOI, EOI and RST* are trivial and should not get here)
for ($name) {
$error = $this->dump_com(), next if $_ eq 'COM';
$error = $this->dump_app1(), next if $_ eq 'APP1';
$error = $this->dump_app13(), next if $_ eq 'APP13';
$error = "Update routine for '$_' not yet implemented"; }
# get the size of the new data area
my $length = $this->size();
# if new size is too large, set the error flag
$error = "Segment '${name}' too large (len=${length}, " .
"max=${JPEG_SEG_MAX_LEN})" if $length > $JPEG_SEG_MAX_LEN;
# if the update failed, revert to the old content
if ($error) {
$this->warn("Update failed [$error]: reverting to old content ...");
$this->{dataref} = $old_content; }
}
###########################################################
# This method outputs the current segment data area into #
# a file handle. The segment "preamble" is prepended, ex- #
# ception made for raw data (scans). The preamble always #
# includes the 0xff byte followed by the segment marker. #
# A Segment which can accept real data also requires a #
# two-byte data count. The return value is the error #
# status of the print calls. #
# ------------------------------------------------------- #
# If the segment size is too large, a warning is printed #
# and 0 is returned (this can make the file invalid); #
# this is however just for debugging, I hope .... #
#=========================================================#
# Note that the data area of a segment can be void and, #
# nonetheless, the segment might require a segment length #
# word (e.g., a "" comment). In practise, the only seg- #
# ments not needing the length word are SOI, EOI and RST*.#
###########################################################
sub output_segment_data {
my ($this, $out) = @_;
# collect the name of the segment and the length of the data area
my $name = $this->{name};
my $length = $this->size();
# Check segment length and throw an exception in case it is too
# large. Do not run the check for raw data or past-the-end data.
$this->die(sprintf('Segment %s too large (len=%d, max=%d)',
$this->{name}, $length, $JPEG_SEG_MAX_LEN))
if $length > $JPEG_SEG_MAX_LEN && $name !~ /ECS|Post-EOI/;
# prepare the segment header (not needed for a raw data segment)
my $preamble = ( $name =~ /ECS|Post-EOI/ ? "" :
pack("CC", $JPEG_PUNCTUATION, $JPEG_MARKER{$name}) );
# prepare the length word (not all segment types need it)
$preamble .= pack("n", 2 + $length)
unless $name =~ /SOI|EOI|RST|ECS|Post-EOI/;
# output the preamble and the data buffer (return the status)
return print {$out} $preamble . $this->data(0, $length);
}
###########################################################
# This method shows the content of the segment. It prints #
# a header, then inspects the directory recursively. #
###########################################################
sub get_description {
my ($this) = @_;
# prepare the marker and the error message
my $amarker = $JPEG_MARKER{$this->{name}};
my $error = $this->{error}; chomp $error if defined $error;
# prepare a header for this segment (was Segment_Banner)
my $description = sprintf("%7dB ", $this->size()) .
($amarker ? sprintf "<0x%02x %5s>", $amarker, $this->{name} :
sprintf "<%10s>", $this->{name} ) .
($error ? " {Faulty segment:\n $error}" : "") . "\n";
# a list for successive keys for numeric tag descriptions
my $names = [ $this->{name} ];
# show all the records we have in our structures (recursively)
$description .= $this->show_directory($this->{records}, $names);
}
###########################################################
# This method shows the content of a record directory in #
# a segment; the first argument is a record list refe- #
# rence; the second argument is a list to a list of names #
# used to resolve numeric tags. A string is returned. #
###########################################################
sub show_directory {
my ($this, $records, $names) = @_;
# protection againts invalid references
return "" unless ref $records eq 'ARRAY';
# prepare the string to be returned at the end
my $description = "";
# an initially empty list for remembering sub-dirs
my @subdirs = ();
# show all records in this directory
foreach (@$records) {
# show the record content
$description .= $_->get_description($names);
# if this is a subdir, remember its reference
push @subdirs, $_ if $_->get_category() eq 'p';
}
# for every subdir we found, recurse
foreach (@subdirs) {
# get the directory name and reference
my ($dir_name, $directory) = ($_->{key}, $_->get_value());
# update the $names list
push @$names, $dir_name;
# print a sub-header for this directory
$description .= Directory_Banner($names, $directory);
# show the sub directory
$description .= $this->show_directory($directory, $names);
# pop the last dir name from @$names
pop @$names;
}
# return the string we cooked up
return $description;
}
###########################################################
# This helper function returns a string to be used as a #
# generic header for a segment directory. #
###########################################################
sub Directory_Banner {
my ($names, $dirref) = @_;
# protections against invalid references
$names = [] unless ref $names eq 'ARRAY';
$dirref = [], push @$names, "[invalid]" unless ref $dirref eq 'ARRAY';
# prepare parts of the description
my $buffer = join " --> ", @$names;
my $decoration = "*" x 10;
my $indentation = " \t" x scalar @$names;
# complete the description and return it
my $description = sprintf "%s%s %s %s (%2d records)",
$indentation, $decoration, $buffer, $decoration, scalar @$dirref;
return $description . "\n";
}
###########################################################
# This helper method is used to test a size condition, #
# i.e. that there is enough data (or exactly some amount #
# of data) in the data buffer. If the test fails, it dies #
###########################################################
sub test_size {
my ($this, $required, $message) = @_;
# positive $require: test not greater
return if $required >= 0 && $this->size() >= $required;
# negative $require: test equality (on -$required)
return if $required < 0 && $this->size() == (- $required);
# if test fails, call die and hope it is intercepted
my $precise = ""; $message = defined $message ? "($message)" : "";
$required *= -1, $precise = "exactly " if $required < 0;
$this->die(sprintf 'Size mismatch in segment %s %s:'
. ' required %s%dB, found %dB.', $this->{name},
$message, $precise, $required, $this->size());
}
###########################################################
# This is a helper method returning the size in bytes of #
# the data area, i.e. that pointed to by $this->{dataref} #
###########################################################
sub size { return length ${$_[0]{dataref}}; }
###########################################################
# This helper method returns a substring of the data area #
# (the arguments are offset and length). #
###########################################################
sub data { substr(${$_[0]{dataref}}, $_[1], $_[2]); }
###########################################################
# This helper method writes into the segment data area. #
# The first argument is a scalar or a scalar reference, #
# which (or whose content) is appended to the current #
# buffer. The method returns the appended string length. #
###########################################################
sub set_data {
my ($this, $addenda) = @_;
# get a reference to new data (remember that the
# first argument can be a scalar or a scalar reference)
my $addref = (ref $addenda) ? $addenda : \$addenda;
# append the new data through the ref
${$this->{dataref}} .= $$addref;
# return the amount of appended data
return length $$addref;
}
###########################################################
# This private method processes the arguments for search #
# routines, like search_record and provide_subdirectory. #
# 1) a start directory is chosen by looking at the last #
# argument: if it is an ARRAY ref it is popped out #
# and used, otherwise the top-level directory (i.e., #
# $this->{records}) is selected; #
# 2) a $keystring is created by joining all remaining #
# arguments on '@', then this string is exploded into #
# a @keylist on the same character; #
# 3) the start directory and the @keylist is returned. #
###########################################################
sub process_search_args {
my $this = shift;
# empty list ==> push a single undefined value
@_ = (undef) unless @_;
# initialise the search directory: use the last argument if
# it is an array reference, the top-level directory otherwise
my $directory = ref $_[$#_] eq 'ARRAY' ? pop : $this->{records};
# delete all undefined or "false" arguments
@_ = grep { defined $_ } @_;
# join all remaining arguments
my $keystring = join('@', @_);
# split the resulting string on '@'
my @keylist = split('@', $keystring);
# delete all false arguments
@keylist = grep { $_ } @keylist;
# return processed arguments
return ($directory, @keylist);
}
###########################################################
# This method searches for a record with a given key in a #
# given record directory, returning a reference to the #
# record if the search was fruitful, undef otherwise. #
# The search is specified as follows: #
# 1) a start directory is chosen by looking at the last #
# argument: if it is an ARRAY ref it is popped out #
# and used, otherwise the top-level directory (i.e., #
# $this->{records}) is selected; #
# 2) a $keystring is created by joining all remaining #
# arguments on '@', then this string is exploded into #
# a @keylist on the same character; #
# 3) these keys are used for an iterative search start- #
# ing from the initially chosen directory: all but #
# the last key must correspond to $REFERENCE records. #
# ------------------------------------------------------- #
# If $key is exactly "FIRST_RECORD" / "LAST_RECORD", the #
# first/last record in the current directory is selected. #
###########################################################
sub search_record {
my $this = shift;
# transform the arguments
my ($directory, @keylist) = $this->process_search_args(@_);
# reset the searched $record to a fake record pointing to the root
my $record = $this->create_record('Fake', $REFERENCE, \ $this->{records});
# search iteratively with all elements in @keylist
for my $key (@keylist) {
# exit the loop as soon as a key is undefined
($record = undef), last unless $key;
# update the current $record
$record =
# reserved key "FIRST_RECORD" returns first record
$key eq "FIRST_RECORD" ? $$directory[0] :
# reserved key "LAST_RECORD" returns last record
$key eq "LAST_RECORD" ? $$directory[$#$directory] :
# standard search (get first matching record or undef)
((grep { $_->{key} eq $key } @$directory), undef)[0];
# stop if $record is undefined or is not a $REFERENCE
last unless $record && $record->get_category() eq 'p';
# update $directory for next search
$directory = $record->get_value(); }
# return the search result
return $record;
}
###########################################################
# A simple wrapper around search_record(): it returns the #
# record value if the search is ok, undef otherwise. #
###########################################################
sub search_record_value {
my $this = shift;
# call search_record passing all arguments through
my $record = $this->search_record(@_);
# return the record value if record is defined
return $record ? $record->get_value() : undef;
}
###########################################################
# This method looks for a path of subdirectories from a #
# given record list. The treatment of arguments is simi- #
# lar to that of search_record: all arguments are joined #
# to form a path specification, which is followed, and #
# the last directory (record list) is returned. An optio- #
# nal last argument may specify an initial directory for #
# the search (this defaults to $this->{records}). If any #
# subdir entry is not there, it is created on the fly. #
###########################################################
sub provide_subdirectory {
my $this = shift;
# transform the arguments
my ($dirref, @keylist) = $this->process_search_args(@_);
# search iteratively with all elements in @keylist
for my $key (@keylist) {
# keys cannot be undefined
$this->die('Undefined key') unless $key;
# search the subdirectory record
my $record = $this->search_record($key, $dirref) ||
$this->store_record($dirref, $key, $REFERENCE, \ []);
# die if $record is not a $REFERENCE
$this->die('Not a reference') unless $record->get_category() eq 'p';
# update $dirref for next search
$dirref = $record->get_value(); }
# return the search result
return $dirref;
}
###########################################################
# This method creates a (possibly multi-valued) JPEG seg- #
# ment record from a data buffer or from the segment data #
# area, and it is the lowest level record-related method, #
# the only one actually calling the JPEG::Record ctor. #
# It needs the record identifier, the value type, [a sca- #
# lar reference to read data from] or [the offset of the #
# memory to read in the data area], and an optional count.#
# A reference to the record is returned at the end . #
#=========================================================#
# If a scalar reference is passed, no check is performed #
# on the size of the referenced scalar, because it is as- #
# sumed that this is dealt with in the caller routine (be #
# sure that $count is correct in this case!), and all the #
# arguments are simply passed to the Record constructor. #
# The correct endianness is read from the value of the #
# current endianness, which is a private object member. #
###########################################################
sub create_record {
my ($this, $identifier, $type, $dataref, $count) = @_;
# if the third argument is an offset, we need to convert it
unless (ref $dataref) {
# the data reference is indeed an offset
my $offset = $dataref;
# buffer length is calculated by the Record class
my $length = Image::MetaData::JPEG::Record->get_size($type, $count);
# for variable-length types, $count is the real length
$length = $count if $length == 0;
# replace the third argument with a scalar reference
$dataref = \ $this->data($offset, $length);
# update the offset through its alias (dangerous)
# but don't complain if we have a read-only offset
eval { $_[3] += $length; };
}
# call the record constructor and return its value (a reference)
return new Image::MetaData::JPEG::Record
($identifier, $type, $dataref, $count, $this->{endianness});
}
###########################################################
# This method is a wrapper for create_record returning #
# the parsed value and NOT storing the record internally #
# (for this reason we can set $identifier = 0). So, the #
# arguments are: type, data reference, count. The data #
# reference can be replaced by an offset, used to access #
# the internal segment data buffer. If the offset is an #
# lvalue, it is updated to point after the memory just #
# read. The count can be undefined (it defaults to 1). #
###########################################################
sub read_record {
# @_ = (this, type, dataref/offset, count)
my $this = shift;
# invoke create_record: the first argument (the identifier)
# is dummy, for the others we can use @_. Return the value
return $this->create_record(0, @_)->get_value();
}
###########################################################
# This method creates a generic JPEG segment record just #
# like read_record, stores it in the "records" list, and #
# returns a reference to the newly created record. If the #
# offset is an lvalue, it is updated to point after the #
# memory just read. See read_record for further details. #
#=========================================================#
# A list reference can be prepended to the argument list; #
# in this case it is used instead of $this->{records}. #
###########################################################
sub store_record {
# @_ = (this, [record list,] identifier, type, dataref/offset, count)
my $this = shift;
# get a reference to the record list; but if next argument
# is a reference, use it instead (and take it out of @_)
my $records = $this->{records};
$records = shift if ref $_[0];
# create a new record and insert it into the record
# list; we can use @_ for all the arguments.
push @$records, $this->create_record(@_);
# return a reference to the last record
return $$records[$#$records];
}
###########################################################
# Load other parts for this package. In order to avoid #
# that this file becomes too large, only general interest #
# methods are written here. #
###########################################################
require 'Image/MetaData/JPEG/parsers/parsers.pl';
require 'Image/MetaData/JPEG/dumpers/dumpers.pl';
# successful package load
1;