From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

#!/usr/bin/env perl
use strict;
use warnings FATAL => 'uninitialized';
use utf8;
use experimental 'signatures';
use Cwd 'abs_path';
my ($mydir, $myname);
BEGIN {
my $location = (-l $0) ? abs_path($0) : $0;
$location =~ /(.*?)([^\/]+)\z/s or die "?";
($mydir, $myname) = ($1, $2);
}
use lib "$mydir/../lib";
use Chj::xopen 'xopen_read';
use List::Util 'pairkeys';
(my $email = 'ch%christianjaeger,ch') =~ tr/%,/@./;
sub usage {
print STDERR map {"$_\n"} @_ if @_;
my $ops = join("", map {" $_\n"} pairkeys ops());
print "Usage: $myname --op opname [--op opname ...] file...
opname can be any of:
$ops
(Christian Jaeger <$email>)
";
exit(@_ ? 1 : 0);
}
my $verbose = 0;
my @opt_op;
my $opt_test;
my $opt_repl;
my $opt_run;
#our $opt_dry;
GetOptions(
"verbose" => \$verbose,
"help" => sub {usage},
"op=s" => sub {
my (undef, $val) = @_;
push @opt_op, $val;
},
"test" => \$opt_test,
"repl" => \$opt_repl,
"run=s" => \$opt_run,
#"dry-run"=> \$opt_dry,
) or exit 1;
sub protos_to_arity ($str) {
my @p = grep { length $_ } split /\s*/, $str;
my $s = join('', @p);
my @parts = split /;/, $s;
my $l0 = length($parts[0]);
if (@parts == 1) {
[$l0]
} elsif (@parts == 2) {
my $l1 = length($parts[1]);
[$l0, $l0 + $l1]
} elsif (@parts == 0) {
[0]
} else {
die "invalid prototype decl: '$s'"
}
}
TEST { protos_to_arity '$' } [1];
TEST { protos_to_arity ' $ $$' } [3];
TEST { protos_to_arity '$$ ; $' } [2, 3];
TEST { protos_to_arity '&$' } [2];
TEST { protos_to_arity '@$' } [2];
TEST { protos_to_arity '' } [0];
sub checkcode_for_arity ($arity) {
my ($min, $maybe_max) = @$arity;
if (!defined $maybe_max) {
"\@_ == $min or fp_croak_arity $min;\n"
} else {
"\@_ >= $min and \@_ <= $maybe_max or fp_croak_arity \"$min-$maybe_max\";\n"
}
}
my $compare_re = qr(<|>|<=|>=|==);
my %rising = map { $_ => 1 } qw(> >=);
my %falling = map { $_ => 1 } qw(< <=);
my %equal = map { $_ => 1 } qw(==);
sub compare_range {
my ($compare1, $n1, $maybe_andor, $maybe_compare2, $maybe_n2) = @_;
if (defined $maybe_andor) {
if ($maybe_andor eq '&&' or $maybe_andor eq 'and') {
if ($rising{$compare1} and $falling{$maybe_compare2}) {
"$n1-$maybe_n2"
} else {
die "don't know how to handle '$compare1 and $maybe_compare2'"
}
} elsif ($maybe_andor eq '||' or $maybe_andor eq 'or') {
if ($equal{$compare1} and $equal{$maybe_compare2}) {
"$n1 or $maybe_n2"
} else {
# XX could simply say "$compare1 $n1 or $maybe_compare2
# $maybe_n2" or 'optimize' it in the cases where it's
# ==
die "don't know how to handle '$compare1 and $maybe_compare2'"
}
} else {
die "invalid andor: $maybe_andor"
}
} else {
$compare1 eq "==" ? $n1 : "$compare1 $n1"
}
}
TEST { compare_range qw(> 5) } '> 5';
TEST { compare_range qw(<= 5) } '<= 5';
TEST { compare_range qw(== 5) } '5';
TEST_EXCEPTION { compare_range qw(== 5 and == 6) }
'don\'t know how to handle \'== and ==\'';
TEST { compare_range qw(== 5 or == 6) } '5 or 6';
TEST_EXCEPTION { compare_range qw(== 5 or > 6) }
'don\'t know how to handle \'== and >\'';
my $identifier_re = qr/[_a-zA-Z]\w*(?:::[_a-zA-Z]\w*)*/;
my $globref_re = qr/\{[A-Z]+\}/;
my %pre_identifiers_that_need_glob = map { $_ => 1 } (
'local', 'undef',
# '=item * Use of member names that are also used as subroutine names, perl'
# (ugly since should check for the '='?...)
'item'
);
sub glob2functionref ($str, $is_comment) {
my $perhapsspace = $is_comment ? "" : qr/ ?/;
$str =~ s{(
([\(\[\{=>#,]|$identifier_re) # pre
(\s*) # spacing
\* $perhapsspace ($identifier_re) )}{
my ($all, $pre, $spacing, $identifier)= ($1, $2, $3, $4);
my $post = substr($str, pos($str)+length($all), 10);
my $prepre = substr($str, 0, pos($str)); # XX performance?
# use FP::Repl;repl;
if ($identifier =~ m{^STD}
#or $post =~ m{^\*}
# including ' # possibly *map contents* of inline code or code sections':
or $post =~ m{^(?:[^*]{0,20}\w)?\*}
# *Foo{CODE} and such:
or $post =~ m{^$globref_re}
# 'local (*F);' etc.:
or $pre_identifiers_that_need_glob{$pre}
or ($prepre =~ /\b($identifier_re)\s*\z/s
and $pre_identifiers_that_need_glob{$1})
# 'our @EXPORT_OK = qw(*trace_failures ..)':
or $prepre =~ /\@EXPORT\w*\s*=.{0,70}\z/s
# 'use FP::Failure qw(*use_failure *fails);':
or $prepre =~ /\bqw\s*(?:[<\(\"\'\[](?:\s*[*&$@]$identifier_re)*\s*)?\z/s
# 'tequals *foo, *FP::Equal::t::foo':
or $prepre =~ /\btequals\b.{0,70}\z/s
# '$code =~ s/\buse\s*Sub::Call::Tail\b.*?;//s;':
or $prepre =~ /=~\s*.{0,70}\z/s
# 'our $get = $db->prepare("select * from, *examples/csv_to_xml-example.csv");':
or $prepre =~ /[^\\]\"[^\"]{0,70}\z/s
# '$n * fact($n - 1)'
or $prepre =~ /[\$]\z/s
# 'our @EXPORT* variables yet. Drop me a note if you would like to have that.)'
# and
# '|($PACKAGE)\s*->\s*new)' (regex rule)
or $prepre =~ /[\@\$\*\\]\s*\z/s
# 'to prevent redefinitions of subroutines *which are NOT in the namespace'
or ($pre =~ /^$identifier_re\z/
and $prepre =~ /^\s*(?:\s*\w+,?)*\s*\z/s)
) {
$all
} else {
"$pre$spacing\\\&$identifier"
}
}sgex;
$str
}
TEST {
glob2functionref
"; *bar = *foo1; # * foo2 is fine, *bar is fine, *not* unexpected.", 0
}
'; *bar = \\&foo1; # \\&foo2 is fine, \&bar is fine, *not* unexpected.';
TEST {
glob2functionref
"*bar = *foo; # * foo is fine, *bar is fine, *not* unexpected.", 1
}
'*bar = \\&foo; # * foo is fine, \&bar is fine, *not* unexpected.';
TEST {
glob2functionref 'local (*F);', 0
}
'local (*F);';
TEST { glob2functionref 'BEGIN { undef(*array) }', 0 }
'BEGIN { undef(*array) }';
TEST { glob2functionref 'BEGIN { undef *array }', 0 }
'BEGIN { undef *array }';
TEST { glob2functionref ' *fm = memoizing *f; # memoize in process memory', 0 }
' *fm = memoizing \&f; # memoize in process memory';
TEST {
glob2functionref
'our @EXPORT_OK = qw(*trace_failures *use_failure *fails);', 0
}
'our @EXPORT_OK = qw(*trace_failures *use_failure *fails);';
TEST { glob2functionref 'use FP::Failure qw(*use_failure *fails);', 0 }
'use FP::Failure qw(*use_failure *fails);';
TEST {
glob2functionref 'TEST { tequals((*foo::bar) . "", "*foo::bar") } 1;
TEST { tequals *foo, *FP::Equal::t::foo } 1;', 0
}
'TEST { tequals((*foo::bar) . "", "*foo::bar") } 1;
TEST { tequals *foo, *FP::Equal::t::foo } 1;';
sub deglob ($line) {
# In comments, only convert globs if they appear without space
# after the star (with a space, it's probably a bullet point).
if (my ($pre, $post) = $line =~ /^([^#]*)#(.*)/s) {
glob2functionref($pre, 0) . "#" . glob2functionref($post, 1)
} else {
glob2functionref($line, 0)
}
}
TEST { deglob 'cmp => on(*string_lc, *string_cmp),' }
'cmp => on(\&string_lc, \&string_cmp),';
TEST {
deglob
'$self->if_suffix_md_to_html($path, $for_title, *identity, *identity)'
}
'$self->if_suffix_md_to_html($path, $for_title, \&identity, \&identity)';
TEST { deglob '$code =~ s/\buse\s*Sub::Call::Tail\b.*?;//s;' }
'$code =~ s/\buse\s*Sub::Call::Tail\b.*?;//s;';
TEST {
deglob
'our $get = $db->prepare("select * from, *examples/csv_to_xml-example.csv");'
}
'our $get = $db->prepare("select * from, *examples/csv_to_xml-example.csv");';
TEST { deglob 'local *add = \&counting_add;' } 'local *add = \&counting_add;';
TEST { deglob '*is_AlsoSee = is_heading_of(sub($s) { $s =~ /also *see/i });' }
'*is_AlsoSee = is_heading_of(sub($s) { $s =~ /also *see/i });';
TEST { deglob ' # possibly *map contents* of inline code or code sections' }
' # possibly *map contents* of inline code or code sections';
TEST { deglob '$perl++ if $str =~ /(?:^|\n)\s*use\s+\w+/;' }
'$perl++ if $str =~ /(?:^|\n)\s*use\s+\w+/;';
# yes I know, it's a complete hack, this *is to get things done*.
TEST { deglob '$perl += 1 if $str =~ /(?:^|\n|;)\s*push\s+\@\w+\s*,\s*/;' }
'$perl += 1 if $str =~ /(?:^|\n|;)\s*push\s+\@\w+\s*,\s*/;';
TEST { deglob '$n * fact($n - 1)' }
'$n * fact($n - 1)';
TEST {
deglob
'our @EXPORT* variables yet. Drop me a note if you would like to have that.)'
}
'our @EXPORT* variables yet. Drop me a note if you would like to have that.)';
TEST {
deglob
'=item * Use of member names that are also used as subroutine names, perl'
}
'=item * Use of member names that are also used as subroutine names, perl';
TEST {
deglob
'to prevent redefinitions of subroutines *which are NOT in the namespace'
}
'to prevent redefinitions of subroutines *which are NOT in the namespace';
TEST { deglob '|($PACKAGE)\s*->\s*new)' } '|($PACKAGE)\s*->\s*new)';
TEST {
deglob
' [*is_procedure, "nav_bar"],'
}
' [\&is_procedure, "nav_bar"],';
# Ah this is not converted in real case since it's in bench/globs!
TEST { deglob 'time_this { t *inc } " *";' } 'time_this { t \&inc } " *";';
our $current_file;
sub ops () {
(
# [ needs_whole_file, proc ]
opspaces => [
0,
sub {
if (/http|href/) {
$_
} else {
s{ ([^/>=~<!|+*-]) (=|=>|==|=~|/=|//=|>=|<=|<<|>>|!=|\|\||\|\|=|\+=|-=|\*=) ([^/>=~<!|]) }{
my ($a,$b,$c)=($1,$2,$3);
my $all = "$a$b$c";
my $pre= substr($_, 0, pos($_)+1);
my $is_perl = 0;
if ($b eq "=>") {
$is_perl = 1
} elsif (not substr($pre, length($pre)-1, 1)=~ /\w/) {
$is_perl = 1
} elsif (my ($sigil) = $pre =~ /([^\w])[A-Za-z_]\w*\s*$/) {
$is_perl= $sigil =~ /[\$*&@%]/
}
#use FP::Repl;repl;
if ($is_perl) {
($a eq " " ? $a : "$a ").$b.($c eq " " ? $c : " $c")
} else {
$all
}
}sgex and s/[ \t]*$//;
$_
}
}
],
functionparameters2signatures => [
1,
sub {
s{
\b(method|fun)(\s+\w+)
(?:
(\s*\(\s*)
([^()]*?)
(\s*\))
)?
(\s*\{)
}{
my ($which,$name,$a,$b,$c,$end)=($1,$2,$3,$4,$5,$6);
"sub$name"
. ($which eq "method" ?
(defined($b) ? $a.(length($b) ? q{$self, }.$b : q{$self}).$c
: q{($self)})
: "$a$b$c")
. $end
}sgex;
$_
}
],
excise_prototypes => [
1,
sub {
return $_
if /use +experimental [^\n;]*signatures[^\n;]*;/
; # since `()` is ambiguous
s{
( \bsub\b \ *)(\w+)?( \ * )
\( ([\@\$; ]*) \) # do *not* include & here, as those are needed
( \s* \{ \s* )
( (?:[^\n]*\n){0,2} )
}{
my ($_pre, $maybe_name, $_post, $protos, $post, $bodystart)
= ($1,$2,$3,$4,$5,$6);
my $pre = $_pre . ($maybe_name // "") . $_post;
if (defined $maybe_name and $maybe_name =~ /^[A-Z0-9_]+\z/
and $protos =~ /^\s*\z/ and $bodystart =~ /^\s*\d+\}/) {
# constant, leave as is
"$pre($protos)$post$bodystart"
} else {
# make sure bodystart doesn't slurp over a subsequent definition
die "accidentally slurping up subsequent definition"
if $bodystart =~ /(\bsub\b \ *(?:\w+)?) \ * (?:\([@$&;]*\))? (\s* \{)/;
my $checkcode = do {
if ($bodystart =~ /\@_ *(?:==|<=|>=|<|>) *\d+/) {
""
} else {
checkcode_for_arity(protos_to_arity($protos))
}
};
# Make sure there's no empty line before the checkcode
my $post_and_checkcode = "$post$checkcode";
$post_and_checkcode =~ s/^(\s*\{)[ \t]*\n\s*/$1\n /s;
"$pre$post_and_checkcode $bodystart"
}
}sgex;
$_
}
],
move_to_fp_croak_arity => [
1,
sub {
my $replacements = s{
( ; | \{ \s* )
( \(? \s* \@_ \s* ($compare_re) \s* (\d+) \s*
(?: ( and | && | or | \|\| ) \s* \@_ \s* ($compare_re) \s* (\d+) )? \s* \)? \s* )
( or \s+ (?:die|(?:\w+::)*croak) \s*
(?: "[^"]*wrong\ number\ of\ arguments[^"]*" |
"[^"]*(?:expecting|expects|needs?) \s+ \d+
(?: \ * (?:-|to) \ * \d+)? \s+ (?:parameter|argument)s?[^"]*" |
"[^"]*not\ enough\ arguments[^"]*"
) \s* ;)
}{
my ($pre, $compare, $compare1, $n1, $maybe_andor, $maybe_compare2, $maybe_n2, $or_part)
= ($1,$2,$3,$4,$5,$6,$7,$8);
my $range = compare_range($compare1, $n1, $maybe_andor, $maybe_compare2, $maybe_n2);
unless ($range =~ /^\d+$/s) {
$range = "\"$range\"";
}
"$pre$compare or fp_croak_arity $range;"
}sgex;
if ($replacements) {
unless (/use FP::Carp/) {
do {
if (/[\@\%]EXPORT/) {
# force it after 'EXPORT'
s{(.*[\@\%]EXPORT.*\nuse [A-Z][^;\n]+;[^\n]*\n)}{${1}use FP::Carp;\n}s
} else {
s{(.*\nuse [A-Z][^;\n]+;[^\n]*\n)}{${1}use FP::Carp;\n}s
}
}
or warn
"could not insert FP::Carp into $current_file\n";
}
}
$_
}
],
deglob => [
0,
sub {
return $_
if (
# this file uses globs for function passing on
# purpose:
$current_file eq 'bench/globs' or
# leave tests etc. alone:
$current_file eq 'meta/code-reformat'
);
deglob $_
}
]
)
}
my %ops = ops;
sub run {
@opt_op or usage "no op given, use the --op option";
my @op = map { $ops{$_} // die "unknown op '$_'" } @opt_op;
my %needs_whole_file = map { $_->[0] ? (1 => undef) : (0 => undef) } @op;
(keys %needs_whole_file) == 1
or die
"can't satisfy ops of different needs_whole_file requirement at the same time";
my ($needs_whole_file) = keys %needs_whole_file;
for my $file (@ARGV) {
local $current_file = $file;
my $f = xopen_read $file;
my @lines = do {
local $/ = $needs_whole_file ? undef : $/;
$f->xreadline;
};
$f->xclose;
my $t = xtmpfile $file;
$t->xprint(
map {
for my $op (@op) {
my (undef, $proc) = @$op;
$_ = &$proc($_);
}
$_
} @lines
);
$t->xclose;
$t->xputback;
}
}
if ($opt_test) {
my $failures = Chj::TEST::run_tests(__PACKAGE__)->{failures};
exit($failures ? 1 : 0)
} elsif ($opt_repl) {
require FP::Repl;
FP::Repl->import("repl");
repl();
} elsif (defined $opt_run) {
require FP::Show;
my $res;
eval "\$res= do { $opt_run }; 1" or die $@;
print FP::Show::show($res), "\n" or die $!;
} else {
run
}