#!/usr/bin/env perl # Copyright (c) 2015-2020 Christian Jaeger, copying@christianjaeger.ch # This is free software. See the file COPYING.md that came bundled # with this file. use strict; use warnings; use warnings FATAL => 'uninitialized'; use experimental "signatures"; # find modules from functional-perl working directory (not installed) use Cwd 'abs_path'; our ($mydir, $myname); BEGIN { my $location = (-l $0) ? abs_path($0) : $0; $location =~ /(.*?)([^\/]+?)_?\z/s or die "?"; ($mydir, $myname) = ($1, $2); } use lib "$mydir/../lib"; sub usage { print "usage: $myname file.pdf [file2.pdf ..] Convert a pdf file to SVG images (by way of `pdf2svg`) and a set of html pages embedding them. Options: --single create a single html page with all pages (default: one page per html file) --outdir default: file path with .pdf suffix stripped "; exit 1; } use Getopt::Long; my $verbose = 0; my $opt_single; my $opt_outdir; GetOptions( "verbose" => \$verbose, "help" => sub {usage}, "single-page" => \$opt_single, "outdir=s" => \$opt_outdir, ) or exit 1; use FP::IOStream qw(xdirectory_paths); use FP::List qw(list cons); use FP::Stream qw(Keep); use Chj::xperlfunc qw(xstat xxsystem_safe xunlink basename dirname); use FP::Combinators qw(compose_scalar); use FP::Ops qw(the_method real_cmp regex_match regex_xsubstitute); use PXML::XHTML ':all'; use PXML::Serialize qw(puthtmlfile); use FP::Array_sort qw(on); use Chj::xIOUtil qw(xputfile_utf8); use Chj::TEST ":all"; use FP::Div qw(min max); use Chj::singlequote qw(quote_javascript); sub note { print STDERR "$myname: note: ", @_, "\n"; } sub css_link($src) { LINK({ rel => "stylesheet", href => $src, type => "text/css" }) } # svgfile and html paths our $svgfile_template = 'page-%02d.svg'; our $svgpath_re = qr{(^|.*/)page-(\d+)\.svg$}s; *svgpath_to_htmlpath = regex_xsubstitute($svgpath_re, sub {"$1/page-$2.html"}); *svgpath_to_pageno = regex_xsubstitute($svgpath_re, sub { $2 +0 }); our $css_src = "$myname.css"; # CSS contents my $css_code = ' ul.menu { border: 1px solid #000; background-color: #eee; padding: 5px; list-style: none; padding-left: 0.5em; } li.menu { border-right: 1px solid #000; list-style: none; padding-left: 0.5em; padding-right: 0.3em; display: inline; } li.menu_last { list-style: none; padding-left: 0.5em; padding-right: 0.3em; display: inline; } '; sub svgpaths($dir) { xdirectory_paths($dir)->filter(regex_match $svgpath_re) ->sort(on \&svgpath_to_pageno, \&real_cmp) } # ------------------------------------------------------------------ # file conversion sub possibly_symlink ($old, $new) { symlink $old, $new or note "could not add symlink at '$new': $!"; } # wrapper just because Perl's core ops can't be passed by reference sub possibly_unlink($path) { unlink $path } # convert pdf to svg unless already done sub possibly_do_pdf2svg ($infile, $outdir) { my $outfiles = svgpaths($outdir); my $t_in = sub { xstat($infile)->mtime }; my $t_oldest = sub { Keep($outfiles)->map(compose_scalar the_method("mtime"), \&xstat)->min }; if ($outfiles->is_null or &$t_in >= &$t_oldest) { $outfiles->for_each(\&xunlink); xxsystem_safe "pdf2svg", $infile, "$outdir/$svgfile_template", 'all'; 1 } else { 0 } } # shorten the navigation to only the pages around the current one plus # first and last if necessary sub possibly_shortened ($l, $selected_i, $window_sidelen, $before, $after) { my $len = $l->length; my $i1 = max(0, $selected_i - $window_sidelen); my $i2 = min($len, $selected_i + $window_sidelen + 1); my $remainder = sub ($l, $li) { if ($i2 < ($len - 1)) { # cut out right part $l->take($li + $i2 - $i1)->append($after, list($l->last)); } else { $l } }; if ($i1 > 1) { # cut out left part cons( $l->first, $before->append( &$remainder($l->drop($i1), 0) # XX need to turn purearray into a list # or it will be an improper end of the # new list. Ugly. ->list ) ) } else { &$remainder($l, $i1) } } # 0 1 2 3 4 5 6 7 my $l = list(qw(a b c d e f g h)) unless no_tests; my $lu = list(undef) unless no_tests; # right TEST { possibly_shortened($l, 4, 1, $lu, $lu) } list('a', undef, 'd', 'e', 'f', undef, 'h'); TEST { possibly_shortened($l, 5, 1, $lu, $lu) } list('a', undef, 'e', 'f', 'g', 'h'); TEST { possibly_shortened($l, 6, 1, $lu, $lu) } list('a', undef, 'f', 'g', 'h'); TEST { possibly_shortened($l, 7, 1, $lu, $lu) } list('a', undef, 'g', 'h'); TEST { possibly_shortened($l, 7, 1, $lu, $lu) } list('a', undef, 'g', 'h'); # left TEST { possibly_shortened($l, 0, 1, $lu, $lu) } list('a', 'b', undef, 'h'); TEST { possibly_shortened($l, 1, 1, $lu, $lu) } list('a', 'b', 'c', undef, 'h'); TEST { possibly_shortened($l, 2, 1, $lu, $lu) } list('a', 'b', 'c', 'd', undef, 'h'); TEST { possibly_shortened($l, 3, 1, $lu, $lu) } list('a', undef, 'c', 'd', 'e', undef, 'h'); TEST { possibly_shortened($l, 3, 1, $lu, list(0)) } list('a', undef, 'c', 'd', 'e', 0, 'h'); # width TEST { possibly_shortened($l, 3, 3, $lu, $lu) } $l; TEST { possibly_shortened($l, 3, 4, $lu, $lu) } $l; TEST { possibly_shortened($l, 3, 44, $lu, $lu) } $l; TEST { possibly_shortened($l, 7, 6, $lu, $lu) } $l; TEST { possibly_shortened($l, 7, 44, $lu, $lu) } $l; TEST { possibly_shortened($l, 7, 5, $lu, $lu) } list('a', undef, qw(c d e f g h)); sub paging_js_fragment ($keycode, $svgpath) { my $htmlpath = svgpath_to_htmlpath($svgpath); # HACK: make path correctly locally relative, and avoid having to # add parent-taking code to the js: $htmlpath =~ s|.*/|/../|s; my $quotedpath = quote_javascript($htmlpath); " case $keycode: window.location.pathname= window.location.pathname + $quotedpath; break;" } sub paging_js ($svgpaths, $maybe_i) { if (defined $maybe_i) { my $len = $svgpaths->length; my $i = $maybe_i; my $prev_js = $i == 0 ? "" : paging_js_fragment(37, $svgpaths->ref($i - 1)); my $next_js = $i == ($len - 1) ? "" : paging_js_fragment(39, $svgpaths->ref($i + 1)); SCRIPT( { language => "JavaScript", type => "text/javascript" }, ' function actUp(evt) { evt = (evt) ? evt : ((event) ? event : null); if (evt) { switch (evt.keyCode) {' . $prev_js . $next_js . ' } } } document.onkeyup = actUp; ' ) } else { undef # XX: add anchor based js in this case? } } TEST { paging_js(list(map {"page-$_.svg"} 0 .. 3), 3) } SCRIPT( { language => 'JavaScript', type => 'text/javascript' }, ' function actUp(evt) { evt = (evt) ? evt : ((event) ? event : null); if (evt) { switch (evt.keyCode) { case 37: window.location.pathname= window.location.pathname + "/../page-2.html"; break; } } } document.onkeyup = actUp; ' ); TEST { paging_js(list(map {"page-$_.svg"} 0 .. 3), 2) } SCRIPT( { language => 'JavaScript', type => 'text/javascript' }, ' function actUp(evt) { evt = (evt) ? evt : ((event) ? event : null); if (evt) { switch (evt.keyCode) { case 37: window.location.pathname= window.location.pathname + "/../page-1.html"; break; case 39: window.location.pathname= window.location.pathname + "/../page-3.html"; break; } } } document.onkeyup = actUp; ' ); TEST { paging_js(list(map {"page-$_.svg"} 0 .. 3), 0) } SCRIPT( { language => 'JavaScript', type => 'text/javascript' }, ' function actUp(evt) { evt = (evt) ? evt : ((event) ? event : null); if (evt) { switch (evt.keyCode) { case 39: window.location.pathname= window.location.pathname + "/../page-1.html"; break; } } } document.onkeyup = actUp; ' ); our $nav_window_sidelen = 10; my $insert = list(undef); sub navigation_html ($svgpaths, $for_svgpath, $is_single) { my $is_selected = sub($path) { $path eq $for_svgpath }; my $possibly_shortened_svgpaths = possibly_shortened($svgpaths, svgpath_to_pageno($for_svgpath), $nav_window_sidelen, $insert, $insert); my $ul = UL( { class => "menu" }, $possibly_shortened_svgpaths->map_with_islast( sub ($is_last, $maybe_svgpath) { if (defined $maybe_svgpath) { my $svgpath = $maybe_svgpath; my $pageno = svgpath_to_pageno($svgpath); my $href = $is_single ? "#p$pageno" : basename svgpath_to_htmlpath($svgpath); LI( { class => ($is_last ? "menu_last" : "menu") }, ( &$is_selected($svgpath) ? SPAN({ class => "menu_selected" }, $pageno) : A({ href => $href }, $pageno) ) ) } else { # never the last LI({ class => "menu" }, "...") } } ) ); $is_single ? A({ name => "p" . svgpath_to_pageno($for_svgpath) }, $ul) : $ul } # pure function that returns the actions to be taken (this allows us # to inspect them before their execution, for debugging or testing): sub _svgpaths_to_html_actions ($svgpaths, $title, $outdir) { # (No need to protect $svgpaths with `Keep` here since it's a # purearray because of the sorting) # the html fragment for one page from the pdf my $page_htmlfragment = sub ($is_last, $for_svgpath) { # sub needed to work around destruction of document by # weakening done in serializer (ugly, really replace all # weakening and Keep stuff with a fixed perl?) my $TR_TD_nav = sub { TR TD { align => "center" }, navigation_html($svgpaths, $for_svgpath, $opt_single) }; [ &$TR_TD_nav, TR(TD(IMG { src => basename($for_svgpath), width => "100%" })), $opt_single ? ($is_last ? (TR TD HR) : ()) : &$TR_TD_nav ] }; my $html = sub ($title, $body, $maybe_for_svgpath) { HTML( { lang => 'en' }, # XX should not assume 'en' (use HTML5) HEAD( TITLE($title), css_link($css_src), paging_js($svgpaths, $maybe_for_svgpath) ), BODY(TABLE({ width => "100%", border => 0 }, $body)) ) }; cons( [\&xputfile_utf8, "$outdir/$css_src", $css_code], $opt_single ? # all PDF pages in a single HTML page list( [\&possibly_unlink, "$outdir/index.html"], [ \&puthtmlfile, "$outdir/index.html", &$html( $title, $svgpaths->map_with_islast($page_htmlfragment), undef ) ] ) : # one HTML page per PDF page cons( [ \&possibly_symlink, basename(svgpath_to_htmlpath($svgpaths->first)), "$outdir/index.html" ], $svgpaths->map_with_index( sub ($i, $svgpath) { [ \&puthtmlfile, svgpath_to_htmlpath($svgpath), &$html( "$title - page " . svgpath_to_pageno($svgpath), &$page_htmlfragment(0, $svgpath), $i ), ] } ) ) ) } sub svgpaths_to_html_actions ($infile, $outdir) { _svgpaths_to_html_actions(svgpaths($outdir), basename($infile), $outdir) } sub pdf_to_html($infile) { my $outdir = $opt_outdir // dirname($infile) . "/" . basename($infile, ".pdf", 1); mkdir $outdir; possibly_do_pdf2svg($infile, $outdir) or note "svg files are up to date"; svgpaths_to_html_actions($infile, $outdir)->for_each( sub($action) { my ($proc, @args) = @$action; &$proc(@args) } ); } if ($ENV{DEBUG}) { require FP::Repl::AutoTrap; FP::Repl::repl(); } else { perhaps_run_tests __PACKAGE__ or do { usage unless @ARGV; pdf_to_html($_) for @ARGV; } }