#!/usr/bin/env perl
# Copyright (c) 2015-2019 Christian Jaeger, copying@christianjaeger.ch
# This is free software. See the file COPYING.md that came bundled
# with this file.
# NOTE: there is now a more extensive intro in docs/intro.md; this
# file was written before that intro existed. This file still has
# useful info above that intro; you can probably choose one or the
# other (and then check the other for completeness) without much
# difference (to get a perfect didactic material, someone could work
# on eliminating this duplication).
# This file contains an intro to functions and the very basic parts of
# functional programming, and to how to use FunctionalPerl:
#
# 1. what is a "repl"?
# 2. what are closures?
# 3. what is recursion?
# 4. what is iteration, in imperative and functional ways?
# 5. what are linked lists, and why are they the list data structure
# of choice for functional programs?
#
# 2-3 won't use any of the FunctionalPerl code; it's really just
# using standard Perl. 5 will use the List module from
# FunctionalPerl.
# ------------------------------------------------------------------
# (0) boilerplate (this will be familiar to Perlers)
# This (together with the -w flag in the #! line at the top) is how to
# set up Perl to treat undefined or ambiguous language use as errors,
# which is a good idea to always do, since it leads to earlier
# reporting of errors in the program flow, before invalid results are
# used for real, and while still close to the context the issue is
# originating from, making the problem easier to find.
use
strict;
use
warnings;
# Some setup that's required to have this script work well as part of
# the project, you wouldn't need this in your own scripts.
# Find modules from the functional-perl working directory (if not
# installed, but checked out from Github)
our
(
$mydir
,
$myname
);
BEGIN {
my
$location
= (-l $0) ? abs_path($0) : $0;
$location
=~ /(.*?)([^\/]+?)_?\z/s or
die
"?"
;
(
$mydir
,
$myname
) = ($1, $2);
}
# Specify FP::Repl's dependencies as a dependency when this file is
# run through test suite (so that testing of this file will be skipped
# instead of reported as a failure)
# end of "Some setup".
# For development/debugging
#use Chj::ruse; # get the 'ruse' procedure which will reload modules;
# since we're putting the meat of the program into the
# main file here, this wouldn't help us in this case.
# end, but you can also put it anywhere in the code when
# you want to examine the state of the program at that
# point.
# that opens a repl whenever there's an
# uncaught exception in your program (and the
# program is running on a tty). This is useful
# if you're the programmer; it wouldn't be
# useful if you hand your script to an end
# user who is not a programmer, thus comment
# out (or otherwise disable) this import when
# you pass the script on. NOTE: this overrides
# the handler from Chj::Backtrace above, thus
# you could comment out that line above; or,
# just leave it as is, and if you comment out
# FP::Repl::AutoTrap, Chj::Backtrace will
# automatically be active instead.
# ------------------------------------------------------------------
# (1) Demonstrating the repl, which stands for read-eval-print loop:
# The repl allows to evaluate expressions without restarting the
# program. It shows a prompt, reads a line of input, evaluates it, and
# prints the result, and does this in an endless loop until you enter
# <ctrl>-d at the prompt, at which point the repl procedure
# returns. It has line editing, history and <tab> completion. Example:
#main> 1+1
#$VAR1 = 2;
# The 'main' at the prompt indicates the namespace, which is main::
# here.
# Run this script now, `intro/basics`, and you will get a repl prompt
# (you can see the call to it at the bottom of this file), where you
# can play with these examples.
# Multiple values:
#main> (1+1,2*2)
#$VAR1 = 2;
#$VAR2 = 4;
#main> ()
#main>
# An array:
#main> [1+1,2*2]
#$VAR1 = [
# 2,
# 4
# ];
# Variables:
# Strict mode is not active in the repl, so you can leave out the
# otherwise obligatory "our " keyword and it will be understood as a
# package variable and hence survive across entries.
# main> $x = 10
# $VAR1 = 10;
# main> $x++
# $VAR1 = 10;
# main> $x
# $VAR1 = 11;
# If you used "my ", it would be restricted to just the line you enter:
# main> my $x = 100
# $VAR1 = 100;
# main> $x
# $VAR1 = 11;
# You can define new code:
# main> sub hi { my (@names) = @_; "Hello ".join(" and ", @names) }
# main> hi "Paul"
# $VAR1 = 'Hello Paul';
# main> hi "Stefan", undef
# Exception: 'Use of uninitialized value $names[1] in join or string at (eval 146) line 1.
# '
# main 1>
# An exception has happened because of the "use warnings FATAL"
# above. The "1" in the new prompt indicates that you're in a nested
# repl: this is because FP::Repl::AutoTrap was loaded above (see
# comments there). Whatever you enter in this nested repl is evaluated
# in the context where the die happened. There are some meta commands
# in the repl; enter ":?" or ",?" for help:
# main 1> :?
# Repl help:
# If a command line starts with a ':' or ',', then the remainder of the
# line is interpreted as follows:
# ...
# ...
# Get a backtrace:
# main 1> ,b
# 0 FP::Repl::WithRepl::__ANON__('Use of uninitialized value $names[1] in join or string at (ev...') called at (eval 146) line 1
# 1 main::hi('Stefan', undef) called at (eval 148) line 1
# 2 main::__ANON__() called at intro//../lib/FP/Repl/Repl.pm line 556
# 3 FP::Repl::Repl::__ANON__() called at intro//../lib/FP/Repl/Repl.pm line 107
# ...
# Look at the environment (variables):
# main 1> ,e
# \@names = ['Stefan', undef];
# Run code within that environment:
# main 1> $names[0]
# $VAR1 = 'Stefan';
# main 1> hi "there", undef, "again"
# Exception: 'Use of uninitialized value $names[1] in join or string at (eval 146) line 1.
# '
# main 2>
# The "2" signifies that you've got 2 nested repls now.
# main 2> ,e
# \@names = ['there', undef, 'again'];
# main 2> ,b
# 0 FP::Repl::WithRepl::__ANON__('Use of uninitialized value $names[1] in join or string at (ev...') called at (eval 146) line 1
# 1 main::hi('there', undef, 'again') called at (eval 160) line 1
# ...
# 11 FP::Repl::WithRepl::__ANON__('Use of uninitialized value $names[1] in join or string at (ev...') called at (eval 146) line 1
# 12 main::hi('Stefan', undef) called at (eval 148) line 1
# 13 main::__ANON__() called at intro//../lib/FP/Repl/Repl.pm line 556
# main 2/12> ,10
# ..
# FP::Repl::WithRepl 2/10> ,e
# \@names = ['Stefan', undef];
# FP::Repl::WithRepl 2/10> $names[0]
# $VAR1 = 'Stefan';
#XXX buggy 1 off TODO fix.
# You can exit one level of repls by entering ctl-d. You'll be back at:
# main 1> ,e
# \@names = ['Stefan', undef];
# And again ctl-d:
# main>
# ------------------------------------------------------------------
# (2) Demonstrating closures:
# 'sub' stands for 'subroutine', also called procedures, or
# functions. "Function" implies that a value is returned. If we say
# "pure function", then its *only* effect is to return a value.
sub
f {
# this is just how functions receive their arguments in Perl
# (ugly, yes; we'll see a nicer way in the file
# `more_tailcalls`):
my
(
$x
,
$y
) =
@_
;
# $x and $y are fresh variables here ('my' means fresh and local
# to the scope), set to the arguments that were passed to 'f'
# The value of the last expression in a scope is also the value
# being returned by the scope (and in this case, the 'f'
# function). This is an anonymous function here (note that no name
# is given); it is called a "closure" as it "closes over"
# variables in its enclosing scope, i.e. $x and $y here.
sub
{
my
(
$z
) =
@_
;
$x
*
$y
-
$z
}
}
# See how f returns a subroutine as the result (its body is shown as
# "DUMMY" for simplicity):
# main> f(2,3)
# $VAR1 = sub { 'DUMMY: main::__ANON__ at "intro/basics" line 247' };
# If we want to call that:
#main> $VAR1->(4)
#$VAR1 = 2;
# 2 * 3 - 4, correct.
# Capture the anonymous subroutines in variables (thus giving them
# names, although those names are prefixed with '$' (they are 'SCALAR'
# variables instead of the 'CODE' variables that 'sub name' sets)):
#main> $g = f(2,3); $h = f(4,5); ()
#main> $g->(4)
#$VAR1 = 2;
#main> $h->(4)
#$VAR1 = 16;
# Note how $g and $h remember the values that were passed to 'f' for
# their creation.
# ------------------------------------------------------------------
# 3-4: we're defining the factorial function:
# f(n) = 1 * 2 * 3 * ... * n
# (3) factorial using recursion:
sub
fact {
my
(
$n
) =
@_
;
warn
"n=$n"
;
if
(
$n
< 2) {
1
}
else
{
$n
* fact(
$n
- 1)
}
}
# The above definition is purely functional (except for the warn
# statement): not only is the only effect of calling fact its return
# value (again except for the warning, please consider that a
# debugging feature, outside of the scope of program behaviour), but
# it's also not using anything else than pure functions (the operators
# like '*', '-', '<' can be understood as functions in the
# mathematical sense, even though they are not accessible as functions
# in Perl) and variable bindings inside.
# With the 'variable binding', my $n = ... is meant. In pure
# functional languages, variables are only ever assigned once (and
# immediately upon their instantiation); they are usually called
# 'bindings' in those languages, not variables. They bind a value to a
# name in a context; the same name in the same context is guaranteed
# to always be the same value in those languages, thus, a binding
# doesn't vary (the same binding isn't variable). Note that this
# doesn't mean that $n is constant: every invocation of 'fact' will
# bind its own instance of $n to whatever argument was passed; but
# that same instance of $n can never be modified in a purely
# functional language, and in this implementation of the factorial we
# don't do that either (voluntarily).
# Try it out:
# main> fact 10
# ...
# ------------------------------------------------------------------
# (4) factorial using iteration:
# If you already know about the difference between iterative and
# recursive algorithms, you could skip down to (4a).
# The recursive algorithm above delays the multiplication until the
# recursive call returns. fact(3) will calculate 3 * fact(2), which
# has to evaluate fact(2) before the multiplication can be executed;
# fact(2) will calculate 2 * fact(1), and fact(1) will return 1, after
# which point the multiplication 2 * 1 => 2 can be executed, which
# will be returned, at which point the multiplication 3 * 2 will be
# executed. The steps that are not done yet, like "3 *", are called
# continuations ("3 *" is the continuation of the evaluation of
# "fact(2)", etc.), and are remembered implicitely by the Perl
# interpreter on a stack (the Perl language-level stack, which is
# implemented independently of the C stack). This means that
# calculating fact for n will need n slots of space on the stack. This
# is unlikely to be a problem here since fact for big values of n is
# going to overflow Perl's number range anyway, but it's something to
# be aware of since you *will* run out of stack space for some other
# tasks when implementing them by way of recursion.
# But the factorial can also be calculated in another way: instead of
# carrying out the multiplication after getting the factorial for the
# next smaller n, we can multiply by the next smaller n and then use
# this for the next iteration. Effectively, instead of calculating
# 4 * (3 * (2 * 1))
# we can calculate
# ((4 * 3) * 2) * 1
# which of course (thanks to the associativity of multiplication) will
# give the same result (at least for exact numbers). If we evaluate
# the multiplications in the same direction as the decrement of n
# (i.e. towards the end condition), then the work continues only
# towarts the end condition, and no pending work is piling up (no
# continuations are to be remembered on the stack). Such an algorithm
# is called *iterative*. This distinction is independent of the fact
# whether we're implementing the algorithm with a programming language
# that continues to the next evaluation step by way of calling a
# function, or by way of using loop syntax. These two styles are shown
# below:
# (4a) iteration with a loop and variable mutation. This means, that
# only one instance of $n and $res respectively is created when
# calling 'imperative_fact', and subsequently assigned new values;
# this is *not* functional, but imperative programming.
# To peek inside what's going on, we're going to save a closure from
# every iteration step, so that we can call those later to show us the
# values of the variables that were in their context at the time of
# their creation.
our
@imperative_inspect
;
# array to hold the closures; 'our' means a
# global variable, as opposed to 'my' which
# is lexically accessible only. For a
# variable to be accessible from 'repl', it
# needs to be global (sadly)
sub
imperative_fact {
my
(
$n
) =
@_
;
my
$res
= 1;
while
(1) {
# save closure for later inspection of $n and $res
push
@imperative_inspect
,
sub
{ (
$n
,
$res
) };
if
(
$n
< 2) {
return
$res
;
}
else
{
# treat $res and $n not as immutable binding of a name to
# a value, but as mutable memory location
$res
=
$n
*
$res
;
$n
=
$n
- 1;
}
}
}
#main> imperative_fact 4
#$VAR1 = 24;
#main> @imperative_inspect
# $VAR1 = sub { 'DUMMY: main::__ANON__ at "intro/basics" line 391' };
# $VAR2 = sub { 'DUMMY: main::__ANON__ at "intro/basics" line 391' };
# $VAR3 = sub { 'DUMMY: main::__ANON__ at "intro/basics" line 391' };
# $VAR4 = sub { 'DUMMY: main::__ANON__ at "intro/basics" line 391' };
#main> $imperative_inspect[0]->()
#$VAR1 = 1;
#$VAR2 = 24;
#main> $imperative_inspect[3]->()
#$VAR1 = 1;
#$VAR2 = 24;
# Note how our captured closures all show the same values, namely the
# last ones. Even though the closure that was captured in the first
# loop iteration was created when the variables held different values,
# by the time we call it, the variables were mutated and don't bind to
# the same values anymore. All of the closures in @imperative_inspect
# refer to the same variable instances.
# Ponder how this could be a dangerous feature in bigger programs (and
# hence why functional programming avoids it.)
# As an aside: note that as seen by the world *outside*,
# imperative_fact is still purely functional (when ignoring the
# mutation of @imperative_inspect, which again we do for
# debugging/inspection purposes only). Iff mutations are kept
# localized, their danger is correspondingly low (they will only
# matter when working on the code within the scope that they are kept
# to, i.e. when imperative_fact is modified later on).
# (4b) factorial still using iteration, but instead of using mutation,
# again using pure functions internally. Note how this implements the
# same algorithm as imperative_fact, but the looping happens by way of
# calling the containing function instead of a while loop within
# it. This way, $n and $res are new instances for every iteration;
# they are not mutated and maintain the same value throughout their
# lifetime.
sub
functional_fact {
my
(
$n
) =
@_
;
functional_fact_iter(
$n
, 1)
}
our
@functional_inspect
;
sub
functional_fact_iter {
my
(
$n
,
$res
) =
@_
;
push
@functional_inspect
,
sub
{ (
$n
,
$res
) };
if
(
$n
< 2) {
return
$res
;
}
else
{
# This is a tail call: it happens in tail position,
# i.e. there's nothing happening *after* this call within this
# function.
functional_fact_iter(
$n
- 1,
$n
*
$res
)
# In different words: when this sub-call returns a value, it
# will be immediately returned by the current call, too,
# without doing any further computation with it. Because of
# this, there's no need to keep space allocated (on the call
# stack) for the current context--it won't be used
# anymore. Dropping the current context at the same time as
# executing the tail call is what one calls "tail-call
# optimization".
# But: Perl does *not* carry out this optimization
# automatically, thus this code will still use stack space
# (not a real problem for factorial since it will not be a
# huge amount; cases where a loop can be repeated millions of
# times it would be a problem as Perl would run out of stack
# space). It can be specified explicitely, though. See the
# file `tailcalls` for a version that won't use stack space.
}
}
#main> functional_fact 4
#$VAR1 = 24;
#main> @functional_inspect
# $VAR1 = sub { 'DUMMY: main::__ANON__ at "intro/basics" line 458' };
# $VAR2 = sub { 'DUMMY: main::__ANON__ at "intro/basics" line 458' };
# $VAR3 = sub { 'DUMMY: main::__ANON__ at "intro/basics" line 458' };
# $VAR4 = sub { 'DUMMY: main::__ANON__ at "intro/basics" line 458' };
#main> $functional_inspect[0]->()
#$VAR1 = 4;
#$VAR2 = 1;
#main> $functional_inspect[3]->()
#$VAR1 = 1;
#$VAR2 = 24;
# Note how the closures really remember the values that were in their
# context when they were created.
# ------------------------------------------------------------------
# (5) Linked lists
# If lists of values are implemented as arrays (slots in adjacent
# memory locations), then the only way to initialize an array is to
# write to it using mutation. If we want to stay purely functional, a
# different data structure has to be used; singly linked lists are
# commonly used for this purpose. (Trees are an alternative.)
# Those are built from pairs of (val, rest), where rest is again a
# pair, or the list end marker. Lisp calls the function to create a
# pair 'cons', and the accessors to get val 'car' and rest 'cdr'
# (sometimes aliased to 'first' and 'rest'; the reason to use 'car'
# and 'cdr' is that pairs can be used as building blocks for other
# things than lists, too, and because combinations of those names can
# be shortened, e.g. car(car($x)) can be written as caar($x), or
# car(cdr($x)) to cadr($x).) Also, pairs can be used for other data
# structures than lists. But there are also aliases for car and cdr
# named 'first' and 'rest', for more descriptive names in the context
# of lists. Those are also the names used by Clojure, Mathematica, and
# perhaps some other languages, whereas Haskell and Scala use the
# names 'head' and 'tail'. Since 'tail' is already taken by the
# `Sub::Call::Tail` module, we're staying with 'first' and 'rest'.
# For best fit with functional programming, the bare chains are passed
# around, without any object wrapper.
# main> cons(1,2)
# $VAR1 = bless( [
# 1,
# 2
# ], 'FP::List::Pair' );
# As you can see, cons returns a Pair object simply containing the two arguments.
# Let's introduce Chj::TEST, it is useful to make sure the results we
# show here actually are what the current functional-perl version
# returns. TEST takes a block of code, and a value that the block of
# code is expected to return.
use
Chj::TEST;
TEST { cons(1, 2) }
bless
([1, 2],
'FP::List::Pair'
);
# Let's build a list containing 3 elements; null is returning the list
# end marker.
TEST { cons(1, cons(2, cons(5, null))) }
bless
(
[
1,
bless
(
[2,
bless
([5,
bless
([],
'FP::List::Null'
)],
'FP::List::Pair'
)],
'FP::List::Pair'
)
],
'FP::List::Pair'
);
# This becomes ugly fast, so, turn it back into an array that Perl
# will show nicely:
# main> cons(1, cons(2, cons(5, null)))->array
# $VAR1 = [1, 2, 5];
# But, there's also a meta command in the repl that can show such
# values nicely:
# main> ,s
# main> :s # same
# main> cons(1, cons(2, cons(5, null)))
# $VAR1 = list(1, 2, 5);
# :s formats things in a single line; if you'd like it to be
# pretty-printed into multiple lines, use :S, but that (currently)
# requires Perl::Tidy to be installed, and can be a bit slow for large
# data structures.
# `list` is a function from `FP::List` that turns the arguments given
# to it into a linked list. The code shown by the repl in "s" mode is
# actually Perl code that, when run, yields the value that the command
# evaluated to.
# main> list(1, 1+1, 2*2)
# $VAR1 = list(1, 2, 4);
# Let's build a list functionally, using recursion:
sub
iota {
my
(
$from
,
$to
) =
@_
;
if
(
$from
>=
$to
) {
null
}
else
{
cons
$from
, iota(
$from
+ 1,
$to
)
}
}
# main> iota (3, 8)
# $VAR1 = list(3, 4, 5, 6, 7);
# you could use write_sexpr to get Scheme-compatible formatting
# instead (this needs the print to force a newline and hence flush the
# buffer; you're first seeing the written output, then the return
# value):
# main> write_sexpr iota (3, 8); print "\n"
# ("3" "4" "5" "6" "7")
# $VAR1 = 1;
# A preview on lazy lists, AKA functional streams:
# those are linked lists, but computed on demand.
# main> stream_iota (4, 3)
# $VAR1 = lazy { "DUMMY" };
# or, back in Data::Dumper mode:
# main> :d stream_iota (4, 3)
# $VAR1 = bless( [
# sub { "DUMMY" },
# undef
# ], 'FP::Lazy::Promise' );
# Instead of calculating the nested list data structure right away,
# this just returns a "promise". (Details to be shown in another
# intro, some time.)
# stream_iota treats the arguments a bit different (second argument is
# length, not last value; XX should this be changed?)
# main> write_sexpr stream_iota (3, 5); print "\n"
# ("3" "4" "5" "6" "7")
# $VAR1 = 1;
# `write_sexpr stream_iota (3)` would print an endless list:
# stream_iota never ends it without the second argument.
sub
square {
my
(
$x
) =
@_
;
$x
*
$x
}
# A function from a 'CODE' variable can be passed by prefixing it with
# \& (yes, a bit ugly, too).
# This creates an infinite stream of integers, maps it to its squares,
# takes the first 10 values from it and prints those:
# main> write_sexpr stream_take (stream_map (\&square, stream_iota 0), 10); print "\n"
# ("0" "1" "4" "9" "16" "25" "36" "49" "64" "81")
# It's usually nicer to use method calls, though: the same in "object
# orientation" (still purely functional!):
# main> stream_iota(0)->map(\&square)->take(10)->write_sexpr ; print "\n"
# ("0" "1" "4" "9" "16" "25" "36" "49" "64" "81")
# $VAR1 = 1;
# But you can also just get a FunctionalPerl list:
# main> :s stream_iota(0)->map(\&square)->take(10)->list
# $VAR1 = list(0, 1, 4, 9, 16, 25, 36, 49, 64, 81);
# Or, remember, a plain old array:
# main> stream_iota(0)->map(\&square)->take(10)->array
# $VAR1 = [0, 1, 4, 9, 16, 25, 36, 49, 64, 81];
# Or, a purearray, which is an array that's immutable and that is an
# object, so, the same methods for sequences can called on it as on
# list:
# main> stream_iota(0)->map(\&square)->take(10)->purearray
# $VAR1 = purearray(0, 1, 4, 9, 16, 25, 36, 49, 64, 81);
# main> $VAR1->map(\&square)
# $VAR1 = purearray(0, 1, 16, 81, 256, 625, 1296, 2401, 4096, 6561);
# main> $VAR1->list
# $VAR1 = list(0, 1, 16, 81, 256, 625, 1296, 2401, 4096, 6561);
# In addition to sequence methods (those which are declared by
# FP::Abstract::Sequence), purearray has efficient (constant time)
# random position element access, whereas list has an efficient
# (constant time) `rest` method, so which one to choose depends on the
# task at hands.
# ------------------------------------------------------------------
# run TEST forms if called as part of test suite, otherwise enter the
# repl for your experiments:
perhaps_run_tests
"main"
or repl;