The Perl Toolchain Summit 2025 Needs You: You can help 🙏 Learn more

#!/usr/bin/env perl
#
# Copyright (c) 2014-2021 Christian Jaeger, copying@christianjaeger.ch
#
# This is free software, offered under either the same terms as perl 5
# or the terms of the Artistic License version 2 or the terms of the
# MIT License (Expat version). See the file COPYING.md that came
# bundled with this file.
#
use strict;
use warnings FATAL => 'uninitialized';
use experimental "signatures";
# find modules from functional-perl working directory (not installed)
use Cwd 'abs_path';
our ($mydir, $myname);
BEGIN {
my $location = (-l $0) ? abs_path($0) : $0;
$location =~ /(.*?)([^\/]+?)_?\z/s or die "?";
($mydir, $myname) = ($1, $2);
}
use lib "$mydir/../lib";
use lib "$mydir/../meta"; # for FunctionalPerl::Indexing
# and the htmlgen/ directory
use lib $mydir;
use Hash::Util 'lock_hash';
use Chj::xperlfunc qw(basename dirname);
use Chj::xIOUtil qw(xgetfile_utf8 xcopyfile);
use FP::HashSet ":all";
use PXML::XHTML ":all";
use PXML::Serialize 'puthtmlfile';
use FP::Array ":all";
use FP::Ops qw(string_cmp real_cmp the_method cut_method);
use FP::Equal qw(equal);
use Chj::TEST ":all";
use FP::List qw(is_pair list);
use PXML::Util qw(pxml_map_elements_exhaustively);
use FP::Hash ":all";
use PXML::Tags qw(with_toc);
use FP::Predicates qw(complement);
use FunctionalPerl::Htmlgen::PathUtil qw(path_add path_diff path0);
use FunctionalPerl::Htmlgen::FileUtil qw(existingpath_or create_parent_dirs);
use FunctionalPerl::Htmlgen::MarkdownPlus qw(markdownplus_parse);
use FunctionalPerl::Htmlgen::Mediawiki qw(mediawiki_prepare mediawiki_replace
mediawiki_rexpand);
use PXML::Preserialize qw(pxmlpre);
use FP::Equal qw(pointer_eq);
our $css_path = "htmlgen.css";
sub usage {
print "$myname config inbase outbase
config is the path to a Perl file ending in a hash with config
values, see functional-perl/website/gen-config.pl for an example.
inbase needs to be a git working directory.
Assumes that there is a file '$css_path', which is included in the
<head/> and copied to outbase.
Options:
--repl open a repl instead of running the main action
--trap trap uncached exception in a repl (implied by --repl)
";
exit 1;
}
our $verbose = 0;
our ($opt_repl, $opt_trap);
GetOptions(
"verbose" => \$verbose,
"help" => sub {usage},
"repl" => \$opt_repl,
"trap" => \$opt_trap,
) or exit 1;
usage unless @ARGV == 3 or ($opt_repl and @ARGV >= 1);
our ($configpath, $inbase, $outbase) = @ARGV;
our $user_config = require $configpath;
require FP::Repl::Trap if $opt_trap;
our $config = hashset_union($user_config, $default_config);
lock_hash %$config;
our $pathtranslate = FunctionalPerl::Htmlgen::PathTranslate->new__(
subhash($config, "is_indexpath0", "downcaps"));
our $gitrepository = FP::Git::Repository->new_(chdir => $inbase);
sub path0_to_inpath($path0) {
"$inbase/" . $path0
}
sub path0_to_outpath($path0) {
#"$outbase/".$pathtranslate->xsuffix_md_to_html($path0,0)
# nope, also used for .pl file copying,
"$outbase/" . $pathtranslate->possibly_suffix_md_to_html($path0, 0)
}
# XX make configurable
# (This is getting a bit too layered now. Still Ok?)
# normal simply gives a keyword arg based constructor that creates it
sub normal($classname) {
sub {
$classname->new_(@_)
}
}
# This one adds in the functional_perl_base_dir argument:
sub with_args ($classname, @args) {
sub {
$classname->new_(@_, @args)
}
}
our $pxml_mappers =
# These implement FunctionalPerl::Htmlgen::PXMLMapper; subarray
# for those that match on the same tagname, in order of preference
# (the chain exits once an element has been replaced).
[
normal("FunctionalPerl::Htmlgen::Linking::Anchors"),
normal("FunctionalPerl::Htmlgen::Toc"), # <with_toc>
[
normal("FunctionalPerl::Htmlgen::PerlTidy"),
with_args(
"FunctionalPerl::Htmlgen::Linking::code",
# For names that we never want to auto-link as modules to
# CPAN since they are something else ("CPAN-exceptions"):
# This directory is scanned for all sub definitions (not
# package names), including method names, and they are all
# excluded from being auto-linked to CPAN. (Note: this is
# relative to cwd which is set by website/gen to be inside
# website/; this is a hack, should be passed as an
# argument instead.)
functional_perl_base_dir => "..",
# These names are also being ignored (things which do not
# exist as a subroutine but still shouldn't be linked):
static_ignore_names => [
qw( map tail grep head join test shift inverse Maybe
Just Nothing Int Integer undef let my our foo bar
baz sub return open all )
]
)
],
normal("FunctionalPerl::Htmlgen::Linking::a_href"),
];
sub pxml_name_to_mapper(@PXMLMapper_args) {
purehash(
map {
my $ref = ref $_ or die "bug";
if (not $ref eq "CODE") {
# subarray given (autobox coming into play; can be
# "ARRAY" or purearray depending on time...)
my $ms = $_->map(sub ($constr) { $constr->(@PXMLMapper_args) });
my $elnames = $ms->first->match_element_names;
$ms->rest->every(
sub($v) {
equal $elnames, $v->match_element_names
}
) or die "not all have the same elnames";
# chain element $e through them until it was processed:
my $fn = $ms->fold_right(
sub ($m, $prev) {
sub ($e, $uplist) {
#warn "checking out $m..";
my $e2 = $m->map_element($e, $uplist);
if (pointer_eq $e, $e2) {
&$prev($e, $uplist)
} else {
$e2
}
}
}, sub ($e, $uplist) {$e}
);
map { ($_, $fn) } @$elnames
} else {
# instantiate a mapper
my $m = $_->(@PXMLMapper_args);
# register it for all the element names it wants to see
map {
($_, sub { $m->map_element(@_) })
} @{ $m->match_element_names }
}
} @$pxml_mappers
)
}
sub default_pxml_name_to_mapper () {
# fake instance for tests only
pxml_name_to_mapper(
path0 => "NOPATH",
maybe_have_path0 => sub { die "NOIMPL" },
perhaps_filename_to_path0 => sub { die "NOIMPL" },
pathtranslate => $pathtranslate
)
}
sub process_body (
$v,
$pxml_name_to_mapper = default_pxml_name_to_mapper(),
$mediawikitoken = "NOTOKEN",
$mediawikitable = {}
)
{
__ "([PXML::Element],..) -> [PXML::Element] "
. "-- applies the transformations in $pxml_mappers";
# HACK: can't mediawiki_expand the whole document in string format
# before the markdown parsing, since it would expand examples in
# code sections as well. So instead leave the [[ ]] in (unharmed
# by markdown) and expand it here on individual text segments,
# then map it (and here comes the hacky part, you will forget to
# update here when prepending another mapping phase or so, right?)
# the same way the rest of the document is treated.
my $map_text_mediawiki = sub ($v, $uplist) {
# ($uplist might be empty in tests)
if (is_pair $uplist and $uplist->first->name eq "code") {
# don't expand it in code segments
mediawiki_replace($v, $mediawikitoken, $mediawikitable)
} else {
my $body = mediawiki_rexpand($v, $mediawikitoken, $mediawikitable);
array_map(
sub($e) {
pxml_map_elements_exhaustively($e, $pxml_name_to_mapper,
undef);
},
$body
)
}
};
pxml_map_elements_exhaustively($v, $pxml_name_to_mapper,
$map_text_mediawiki)
}
TEST {
HTML(process_body(["Hello", WITH_TOC({ level => 1 }, H1 "world")]))->string
}
'<html>Hello<div class="toc_box"><dir class="toc"><h3 class="toc_title">Contents</h3><li><dir class="toc"><a href="#world">1. world</a></dir></li></dir></div><a name="world"><h1>1. world</h1></a></html>';
TEST {
HTML(process_body(["Hello", WITH_TOC ["some", H2 "world"]]))->string
}
'<html>Hello<div class="toc_box"><dir class="toc"><h3 class="toc_title">Contents</h3><li><dir class="toc"><a href="#world">1. world</a></dir></li></dir></div>some<a name="world"><h2>1. world</h2></a></html>';
TEST {
HTML(
process_body(
[
P("Hello"),
WITH_TOC { level => 1 },
[" ", P("blabla"), A({ name => "a" }, " ", DIV H1("for one")),]
]
)
)->string
}
'<html><p>Hello</p><div class="toc_box"><dir class="toc"><h3 class="toc_title">Contents</h3><li><dir class="toc"><a href="#for_one">1. for one</a></dir></li></dir></div> <p>blabla</p><a name="a"> <div><a name="for_one"><h1>1. for one</h1></a></div></a></html>';
TEST {
HTML(
process_body(
[
P("Hello"),
WITH_TOC { level => 1 },
[
" ", P("blabla"), H1("for one"), H2("more one"),
TABLE(TR TD P("blah"), H1("sub two"), DIV("bla")),
]
]
)
)->string
}
'<html><p>Hello</p>'
. '<div class="toc_box"><dir class="toc"><h3 class="toc_title">Contents</h3><li><dir class="toc"><a href="#for_one">1. for one</a><li><dir class="toc"><a href="#more_one">1.1. more one</a></dir></li></dir></li><li><dir class="toc"><a href="#sub_two">2. sub two</a></dir></li></dir></div>'
. ' <p>blabla</p><a name="for_one"><h1>1. for one</h1></a><a name="more_one"><h2>1.1. more one</h2></a><table><tr><td><p>blah</p><a name="sub_two"><h1>2. sub two</h1></a><div>bla</div></td></tr></table></html>';
# Note: the cost range feature is not used on the Functional Perl
# website. It could be used to connect issues to a cost for budgeting.
use FP::Struct [
'filesinfo', 'groupedfiles', 'nonbugfiles',
'costranges', # path0 -> costrange-string; filled
# in by mutation when processing
# non-buglist groups
];
sub set_costrange ($self, $path0, $maybe_costrange) {
__ '~ugly, mutates';
$$self{costranges}{$path0} = $maybe_costrange;
}
sub costrange ($self, $path0) {
$$self{costranges}{$path0}
}
_END_
}
use FP::Hash "hash_perhaps_ref";
use FP::Array qw(array_to_hash_group_by);
use FunctionalPerl::Htmlgen::PathUtil qw(path_add path_diff path0);
sub groupkey($path) {
__ 'a string key for grouping this path amongst other files, to '
. 'process them in an order that satisfies dependency on costranges';
my $p0 = path0 $path;
if ($p0 =~ m|^bugs/|) {
"bugs"
} elsif ($p0 =~ m|^docs/bugs.*\.md$|) {
"buglist"
} else {
"normal"
}
}
use FP::Struct [
'files', 'filename_to_path0', 'all_path0_exists',
'path0_exists', # path0 => ()
'all_path0_used', # path0 => usecount, mutated
];
sub filename_to_path0 ($self, $filename) {
$$self{filename_to_path0}{$filename}
// die "no mapping for filename '$filename'"
}
sub filename_to_maybe_path0 ($self, $filename) {
$$self{filename_to_path0}{$filename}
}
# still very unclear about the exact name styling. Prefer perhaps,
# and "then" also move the qualificator to the front? (but leave
# the one for hash_ where it is since hash_ is the type prefix?) :
sub perhaps_filename_to_path0 ($self, $filename) {
hash_perhaps_ref($$self{filename_to_path0}, $filename)
}
sub all_path0_exists ($self, $path0) {
defined $$self{all_path0_exists}{$path0}
or defined $$self{all_path0_exists}{ $path0 . "/" }
}
sub path0_is_directory ($self, $path0) {
$path0 =~ s|/+$||s;
defined $$self{all_path0_exists}{ $path0 . "/" }
}
sub path0_exists ($self, $path0) {
defined $$self{path0_exists}{$path0}
}
sub all_path0_used_inc ($self, $path0) {
__ '~ugly, mutates';
$$self{all_path0_used}{$path0}++
}
sub new_genfilestate($self) {
my $groupedfiles = array_to_hash_group_by $self->files, \&groupkey;
my $nonbugfiles = purearray(@{ $$groupedfiles{normal} },
@{ $$groupedfiles{buglist} || [] });
FunctionalPerl::Htmlgen::_::Genfilestate->new(
$self, # just so as to bundle it up, too, ugly?
$groupedfiles, $nonbugfiles, {} # costranges (mutated)
)
}
_END_
}
sub get_filesinfo () {
__ '() -> Filesinfo '
. '-- read info about files from disk as immutable struct';
my $all_files = $gitrepository->ls_files->array;
my $files = array_filter(cut_method($pathtranslate, "is_md"), $all_files);
my $filename_to_path0 = +{ map { basename($_) => path0($_) } @$files };
my $all_path0_exists = +{
map { path0($_) => 1 } @$all_files,
# and their directories, ok? Any directory that has files
# from git will be ok as link target, ok?
map { dirname($_) . "/" } @$all_files
};
my $path0_exists = +{ map { path0($_) => 1 } @$files };
FunctionalPerl::Htmlgen::_::Filesinfo->new(
$files, $filename_to_path0, $all_path0_exists, $path0_exists,
{}, # all_path0_used
)
}
# Navigation:
sub nav_bar ($items_in_level, $item_selected, $viewed_at_item) {
UL(
{ class => "menu" },
$items_in_level->map_with_islast(
sub ($is_last, $item) {
my $filetitle = $pathtranslate->path0_to_title($item->path0);
my $is_viewed = equal($item, $viewed_at_item);
my $is_open = equal($item, $item_selected);
LI(
{ class => ($is_last ? "menu_last" : "menu") },
(
$is_viewed
? SPAN({ class => "menu_selected" }, $filetitle)
: A(
{
class => ($is_open ? "menu_open" : "menu"),
href => File::Spec->abs2rel(
$pathtranslate->xsuffix_md_to_html(
$item->path0, 0
),
dirname($viewed_at_item->path0)
)
},
$filetitle
)
),
" "
)
}
)
)
}
# For access from main:: by the code in the configuration file (hacky):
use FunctionalPerl::Htmlgen::Nav qw(_nav entry);
sub nav {
_nav(list(@_), \&nav_bar)
}
# /for access from main:: by the code in the configuration file.
our $HEAD = pxmlpre 3, sub ($title, $csspath, $user_additions) {
HEAD(TITLE($title),
LINK({ rel => "stylesheet", href => $csspath, type => "text/css" }),
$user_additions)
};
sub genfile ($path, $groupname, $genfilestate) {
__ '($path, $groupname, $genfilestate) -> () '
. '-- convert markdown file at $path to xhtml file at '
. 'corresponding output path';
my $path0 = path0 $path;
my $outpath = path0_to_outpath($path0);
mkdir dirname($outpath);
my $filetitle = $pathtranslate->path0_to_title($path0);
my $str = xgetfile_utf8 "$inbase/$path";
if ($$config{warn_hint}) {
$str =~ s/^\(?Check the.*?website.*?---\s+//s
or $path =~ /COPYING|bugs|licenses\//
or warn "'$path' is missing hint";
}
if (my $hdl = $config->{path0_handlers}->{$path0}) {
$str = $hdl->($path, $path0, $str);
}
my $maybe_costrange = do {
# extract Cost indicators:
my $namere = qr/\w+/;
my $nameplusre = qr/\(?$namere\)?/;
my $possibly_nameplus_to_name = sub($maybe_nameplus) {
if (defined $maybe_nameplus) {
my ($name) = $maybe_nameplus =~ qr/($namere)/ or die "bug";
$name
} else {
undef
}
};
local our $costs = [];
while (
$str =~ m{\b[Cc]ost
# name: parentheses for "library cost"
(?:\s+($nameplusre))?
:
\s*
# base costs
((?:$nameplusre\s*\+\s*)*)
\s*
# amount
\$\s*(\d+)
}gx
)
{
my ($nameplus, $basecosts, $val) = ($1, $2, $3);
my $name = &$possibly_nameplus_to_name($nameplus);
my @basecosts
= map { &$possibly_nameplus_to_name($_) } split /\s*\+\s*/,
$basecosts;
push @$costs,
new FunctionalPerl::Htmlgen::Cost::_::Cost($name,
(not $nameplus or not($nameplus =~ /^\(/)),
\@basecosts, $val);
}
@$costs
? FunctionalPerl::Htmlgen::Cost::_::Totalcost->new($costs)->range
: undef
};
if (defined $maybe_costrange) {
$genfilestate->set_costrange($path0, $maybe_costrange);
}
my $mediawikitoken = rand; # not fork safe!
my ($h1, $body, $mediawikitable)
= markdownplus_parse($str,
lazy { $pathtranslate->path0_to_title($path0) },
$mediawikitoken);
my $maybe_buglist = $groupname eq "buglist" && do {
my $bugs = array_sort(
array_map(
sub($path) {
my $path0 = path0 $path;
my $title = $pathtranslate->path0_to_title($path0);
[$title, $path0, $genfilestate->costrange($path0)]
},
$genfilestate->groupedfiles->{bugs}
),
on sub { $_[0][0] },
\&string_cmp # XX not a good cmp.
);
TABLE(
{ class => "costlist" },
THEAD(TH("Type"), TH("Title"), TH("Cost range (USD)")),
map {
my ($title, $p0, $costrange) = @$_;
my $relurl
= File::Spec->abs2rel(
$pathtranslate->xsuffix_md_to_html($p0, 0),
basename($path0));
TR(
TD($pathtranslate->path0_to_bugtype($p0)),
TD(A({ href => $relurl }, $title)),
TD({ align => "center" }, $costrange)
)
} @$bugs
)
};
my $filesinfo = $genfilestate->filesinfo;
my $pxml_name_to_mapper = pxml_name_to_mapper(
path0 => $path0,
maybe_have_path0 => sub($path0) {
if ($filesinfo->all_path0_exists($path0)) {
$filesinfo->all_path0_used_inc($path0);
$path0
} else {
undef
}
},
perhaps_filename_to_path0 => sub($filename) {
$filesinfo->perhaps_filename_to_path0($filename)
},
map_code_body => hash_maybe_ref($config, "map_code_body"),
pathtranslate => $pathtranslate,
);
my $nav = $$config{nav};
my $nav_index = $nav->index;
# ^ this could be cached across all documents (but measurements
# haves shown it to be insignificant)
my $nav_upitems = $nav_index->path0_to_upitems($path0);
my $nav_self = $nav_upitems->first;
my $html = HTML(
&$HEAD(
[$config->{title}->($filetitle)],
scalar path_diff($path0, $css_path),
scalar $config->{head}->($path0)
),
BODY(
$config->{header}->($path0),
(
1
?
# make the top nav level contain all unknown pages
# (and don't include pages declared in the nav that
# don't exist)
(
$nav->nav_bar_level0(
$genfilestate->nonbugfiles->map (
sub($p0) {
$nav_index->path0_to_item($p0)
}
),
$nav_upitems->last,
$nav_upitems->first
),
$nav->nav_bar_levels($nav_self, $nav_upitems)->rest
)
:
# strictly follow the navigation declaration
$nav->nav_bar_levels($nav_self, $nav_upitems)
),
$config->{belownav}->($path0),
$h1,
process_body(
$body, $pxml_name_to_mapper,
$mediawikitoken, $mediawikitable
),
$maybe_buglist,
BR, HR,
(
$maybe_costrange
? P("\x{21d2} Cost range: \$", $maybe_costrange)
: ()
),
DIV({ class => "footer_date" }, $gitrepository->author_date($path)),
$config->{footer}->($path0)
)
);
puthtmlfile($outpath, $html);
}
sub genfiles($filesinfo) {
my $genfilestate = $filesinfo->new_genfilestate;
for my $groupname (qw(bugs normal buglist)) {
for (@{ $genfilestate->groupedfiles->{$groupname} }) {
genfile $_, $groupname, $genfilestate
}
}
}
# copy referenced non-.md files:
sub copyfiles($filesinfo) {
for my $path0 (
hashset_keys hashset_union(
$filesinfo->all_path0_used,
array_to_hashset(hash_ref_or($config, "copy_paths", []))
)
)
{
next if $filesinfo->path0_exists($path0); # md path
next if $filesinfo->path0_is_directory($path0);
create_parent_dirs($path0, \&path0_to_outpath);
xcopyfile(path0_to_inpath($path0), path0_to_outpath($path0));
}
if (my ($separate) = hash_perhaps_ref($config, "copy_paths_separate")) {
for my $root (keys %$separate) {
for my $path0 (@{ $$separate{$root} }) {
xcopyfile "$root/$path0", path0_to_outpath $path0
}
}
}
# copy htmlgen CSS file
xcopyfile(
existingpath_or(
path0_to_inpath($css_path),
path0_to_inpath("htmlgen/$css_path")
),
path0_to_outpath($css_path)
);
}
sub main () {
mkdir $outbase;
warn "running get_filesinfo..";
my $filesinfo = get_filesinfo;
warn "running genfiles..";
genfiles($filesinfo);
warn "running copyfiles..";
copyfiles($filesinfo);
}
perhaps_run_tests __PACKAGE__ or do {
$opt_repl
? do {
require FP::Repl;
require FP::Repl::Trap;
FP::Repl::repl();
}
: main;
};