From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

#!/usr/bin/env perl
# Copyright (c) 2019-2020 Christian Jaeger, copying@christianjaeger.ch
# This is free software. See the file COPYING.md that came bundled
# with this file.
use strict;
use warnings FATAL => 'uninitialized';
use utf8;
binmode *STDOUT{IO}, ":encoding(UTF-8)";
use experimental "signatures";
#use Sub::Call::Tail;
# find modules from functional-perl working directory (not installed)
use Cwd 'abs_path';
our ($mydir, $myname);
BEGIN {
my $location = (-l $0) ? abs_path($0) : $0;
$location =~ /(.*?)([^\/]+?)_?\z/s or die "?";
($mydir, $myname) = ($1, $2);
}
use lib "$mydir/../lib";
use FP::List ":all";
use FP::Stream ":all";
use FP::IOStream ":all";
use FP::Array ":all";
use FP::Array_sort ":all";
use FP::Predicates ":all";
use FP::Ops ":all";
use FP::Combinators ":all";
use Chj::xperlfunc ":all";
use FP::PureArray ":all";
use Chj::TEST ":all";
use Chj::xIO qw(with_output_to_file);
sub usage {
print "usage: $myname path/to/hiring-without-whiteboards/README.md
Starts an \`FP::Repl\` with access to a \`cs\` function which returns a
stream (\`FP::Stream\`) of \`Company\` objects representing the contents
of the given file from a check-out of
You can then enter 'queries' like:
cs->filter(sub(\$r) { \$r->locations->any(sub(\$l) { \$l =~ /\\bUK\\b/ }) })->show_items
The 'show_items' method shows the object structure. If you instead
use the 'print' method, it will serialize back to markdown, although
without the grouping. \`print_grouped\` will re-sort and re-group and
show the group subtitles the same way as in the original markdown.
You can use tab completion to learn about the available methods on a
variable (don't use \`my\` for making variables, or they won't persist
to your next entry). Also see the docs on \`functional-perl.org\`.
To redirect printing to a file, use
with_output_to_file \$filename, sub { ... }
discussion.
";
exit 1;
}
our $verbose = 0;
GetOptions("verbose" => \$verbose, "help" => sub {usage},) or exit 1;
my ($file) = @ARGV;
# XX lib
sub triples {
my @out;
while (@_) {
my $k = shift;
@_ or fp_croak_arity "n*3";
my $v = shift;
@_ or fp_croak_arity "n*3";
my $v2 = shift;
push @out, [$k, $v, $v2]
}
array_to_purearray \@out
}
package CountryOrRemote {
use FP::Struct [], 'FP::Struct::Show';
use overload '""' => sub { shift->string };
_END_
}
package Remote {
use FP::Struct [], 'CountryOrRemote';
sub is_USA($self) {undef}
sub is_remote($self) {1}
sub string($self) {"Remote"}
_END_
}
import Remote::constructors;
my $Remote = Remote();
package Country {
use FP::Struct [], 'CountryOrRemote';
sub is_remote($self) {0}
_END_
}
# Name => USPS
my $states = "
Alabama AL
Alaska AK
Arizona AZ
Arkansas AR
California CA
Colorado CO
Connecticut CT
Delaware DE
Florida FL
Georgia GA
Hawaii HI
Idaho ID
Illinois IL
Indiana IN
Iowa IA
Kansas KS
Kentucky KY
Louisiana LA
Maine ME
Maryland MD
Massachusetts MA
Michigan MI
Minnesota MN
Mississippi MS
Missouri MO
Montana MT
Nebraska NE
Nevada NV
New Hampshire NH
New Jersey NJ
New Mexico NM
New York NY
North Carolina NC
North Dakota ND
Ohio OH
Oklahoma OK
Oregon OR
Pennsylvania PA
Rhode Island RI
South Carolina SC
South Dakota SD
Tennessee TN
Texas TX
Utah UT
Vermont VT
Virginia VA
Washington WA
West Virginia WV
Wisconsin WI
Wyoming WY ";
my $nonstates = "
American Samoa AS
Guam GU
Northern Mariana Islands MP
Puerto Rico PR
U.S. Virgin Islands VI
Micronesia FM
Marshall Islands MH
Palau PW
U.S. Armed Forces – Americas AA
U.S. Armed Forces – Europe AE
U.S. Armed Forces – Pacific AP";
package USPSCode {
use FP::Struct ["name", "code", "is_state"], 'Country';
sub is_USA($self) {1}
sub country_name($self) {"USA"}
sub string($self) { $$self{code} }
_END_
}
import USPSCode::constructors;
sub parse_USPS_segment ($str, $is_state) {
$str =~ s/^\s+//;
purearray(split /\n/, $str)->map(
sub($line) {
$line =~ s/^\s*(.*?)\s*\z/$1/s;
my ($name, $code) = $line =~ /^(\S.*?\S)\s+(\w{2})\z/
or die "no parse: '$line'";
USPSCode($name, $code, $is_state)
}
)
}
my $USPSCodes
= parse_USPS_segment($states, 1)->append(parse_USPS_segment($nonstates, 0));
my %USfromCode = map { $_->code => $_ } $USPSCodes->values;
package NonUSCountry {
use FP::Struct ["country_name"], 'Country';
sub is_USA($self) {0}
sub string($self) { $$self{country_name} }
_END_
}
import NonUSCountry::constructors;
package City {
use FP::Struct ["city", "country"], 'FP::Struct::Show';
use overload '""' => sub { shift->string };
sub string($self) {
$self->city . ", " . $self->country
}
_END_
}
import City::constructors;
sub sortstring($str) {
my $s = lc($str);
# I'm sure there might be a library for this? Anyway, the
# rules used by this project may be too specific.
$s =~ s/[ä]/a/sg; # or ae ?
$s =~ s/[éế]/e/sg;
$s =~ s/[ï]/i/sg;
$s =~ s/[ōöô]/o/sg;
$s =~ s/[üû]/o/sg;
$s =~ s/[ç]/c/sg;
$s =~ s/[|]/0/sg; # hacky
$s =~ s/[:]//sg; # do *not* strip space, (), -
$s
}
package Company {
use FP::Ops qw(the_method);
use FP::Struct ["name", "URL", "locations", "maybe_process"],
'FP::Struct::Show';
use overload (
'""' => sub { shift->string },
cmp => sub {
my ($a, $b, $inverted) = @_;
die if $inverted; ##
my @v = map { $_->sortstring } ($a, $b);
my $order = $v[0] cmp $v[1];
# Order numeric entries between X and Y (since that's what
# the current hand sorted document does)
if ($order) {
my @vo = $order < 0 ? @v : reverse @v;
if ($vo[0] =~ /^[0-9]/ and $vo[1] =~ /^[a-x]/) {
-$order
} else {
$order
}
} else {
0
}
}
);
sub sortstring($self) {
main::sortstring($self->name)
}
sub string($self) {
# back to markdown
(
"- ["
. $self->name . "]("
. $self->URL . ")" . " | "
. $self->locations->map(the_method("string"))->join("; ")
. do {
if (defined(my $p = $self->maybe_process)) {
" | $p"
} else {
""
}
}
)
}
sub remote($self) {
# not calling it is_remote since has_remote might be more
# appropriate, and then just keep it short.
$self->locations->any(the_method("is_remote"))
}
_END_
}
import Company::constructors;
# ==================================================================
# For re-grouping&sorting (I'm crazy to have considered writing that)
# Oh, don't have ranges yet? Just code one up for us here, hacky
# anyway.
package InclusiveRange {
use FP::Struct ["from", "to"], 'FP::Struct::Show';
use overload (
'""' => sub { shift->string },
cmp => sub {
my ($a, $b) = @_;
($a->from cmp $b->cmp or $a->to cmp $b->to)
}
);
sub string($self) {
$self->from . " - " . $self->to
}
sub contains_item ($self, $v) {
# super HACK to accomodate for the '#' thing (should use a
# maybe type instead for ranges which are open above--except
# cmp is wrong for digits, too, so, dunno):
return 1 if $self->to eq '#' and $v =~ /^[0-9]$/;
(($self->from cmp $v) <= 0 and ($v cmp $self->to) <= 0)
}
_END_
}
import InclusiveRange::constructors;
package CatchallRange {
# A bit of a hack, to catch numbers and whatever in the last one
# with the '#', as the letter '#' isn't actually covering over the
# digits so would otherwise have to hack InclusiveRange.
use FP::Struct [], 'InclusiveRange';
sub contains_item ($self, $v) {
1
}
_END_
}
import CatchallRange::constructors;
package Group {
use FP::Predicates qw(instance_of);
use FP::Struct [[instance_of("InclusiveRange"), "range"], "items"],
'FP::Struct::Show';
_END_
}
import Group::constructors;
my $ranges = triples('A' .. 'Z', '#')->map_with_islast(
sub ($islast, $a) {
($islast ? \&CatchallRange : \&InclusiveRange)->($$a[0], $$a[2])
}
)->list;
sub group_companies ($l, $ranges) {
# sortedlist -> list_of(Group)
return null if $l->is_null;
my $range = $ranges->first;
my ($groupitems, $rest) = $l->take_while_and_rest(
sub($company) {
$range->contains_item(uc substr($company->name, 0, 1))
}
);
no warnings "recursion";
cons(Group($range, $groupitems), group_companies($rest, $ranges->rest))
}
# ==================================================================
# markdown line parsing
sub is_item($s) {
$s =~ /^-\s+/
}
sub is_heading_of($pred) {
sub($s) {
if (my ($txt) = $s =~ /^\s*#+\s*(.*)/) {
&$pred($txt)
} else {
0
}
}
}
*is_heading = is_heading_of(sub($s) {1});
*is_AlsoSee = is_heading_of(sub($s) { $s =~ /also *see/i });
sub is_hr($s) {
$s =~ /^---\s*$/
}
sub is_empty($s) {
$s =~ /^\s*$/
}
sub parse_country($str) {
$USfromCode{$str} // NonUSCountry($str)
}
sub parse_location($str) {
if ($str =~ /^remote$/i) {
$Remote
} else {
my @s = split /\s*,\s*/, $str;
if (@s == 1) {
parse_country($s[0])
} elsif (@s == 2) {
my ($city, $country) = @s;
City($city, parse_country($country))
} elsif (@s == 3) {
my ($city, $state, $country) = @s;
if ($country eq "USA") {
my $s = parse_country($state);
if ($s->is_USA) {
City($city, $s)
} else {
die "presumed state '$state' is not a state in the USA";
}
} else {
die
"don't know how to deal with presumed state '$state' in country '$country': don't know that country";
}
} else {
die "more than two commas in: '$str'"
}
}
}
sub parse_line($line) {
my $s = $line;
$s =~ s/^-\s*// or die "line is not an item";
my ($name, $url, $rest) = $s =~ /^\[(.*?)\] *\((.*?)\)\s*(.*)$/
or die "missing link formatting in: '$s'";
my @p = split /\s*\|\s*/, $rest;
@p == 2 or @p == 3 or die "rest does not contain 2 or 3 parts: '$rest'";
my (undef, $locations, $maybe_process) = @p;
Company(
$name, $url,
# /, ; and & used inconsistently:
list(map { parse_location $_ } split m%\s*[/;&]\s*%, $locations),
$maybe_process
)
}
TEST {
parse_line
"- [Accredible](https://www.accredible.com/careers) | Cambridge, UK / San Francisco, CA / Remote | Take home project, then a pair-programming and discussion onsite / Skype round."
}
Company(
"Accredible",
list(
City('Cambridge', NonUSCountry('UK')),
City('San Francisco', USPSCode('California', 'CA', 1)),
Remote()
),
"Take home project, then a pair-programming and discussion onsite / Skype round."
);
# XX move?; name?
sub FP::Abstract::Sequence::drop_over ($l, $pred) {
$l->drop_while(complement $pred)->drop_while($pred)
}
sub datalines () {
xfile_lines_chomp("$file", "UTF-8")->drop_over(\&is_hr)
->take_while(complement \&is_AlsoSee)->filter(complement \&is_empty)
}
sub companies () {
# Simply ignore the grouping headings.
datalines->filter(complement \&is_heading)->map (\&parse_line)
}
sub parse_heading($str) {
my ($from, $to) = $str =~ /^#+\s+(\w)\s*-\s*(\w|\\?#)\s*$/
or die "not a heading: '$str'";
$to =~ s/^\\//;
InclusiveRange($from, $to)
}
sub grouped_companies_from($datalines) {
# Capture the groupings as well, as the original file is badly
# grouping them, so to keep a diff minimal we first have to
# maintain the wrong grouping before re-grouping automatically.
if ($datalines->is_null) {
null
} else {
my ($heading, $r) = $datalines->first_and_rest;
if (is_heading($heading)) {
my ($groupitems, $r)
= $r->take_while_and_rest(complement \&is_heading);
cons(
Group(parse_heading($heading), $groupitems->map(\&parse_line)),
grouped_companies_from $r)
} else {
die "expecting a header, got: '$heading'";
}
}
}
sub grouped_companies () {
grouped_companies_from datalines
}
sub print_groups($s) {
$s->for_each(
sub($group) {
my $range = $group->range->string;
$range =~ s/#/\\#/;
xprintln "## $range";
$group->items->for_each(
sub($company) {
xprintln $company;
}
);
xprintln;
}
)
}
# HACK: should really just use the repl printer (show) directly to
# show the whole results list, but show currently doesn't do
# multi-line pretty-printing; so:
sub print_showln($v) {
xprintln show $v
}
# XX see above, and move?
sub FP::Abstract::Sequence::show_items($l) {
$l->for_each(\&print_showln)
}
sub FP::Abstract::Sequence::print($l) {
$l->for_each(\&xprintln)
}
sub FP::Abstract::Sequence::print_grouped ($l, $please_sort = 1) {
$l = $please_sort ? $l->sort : $l;
print_groups(group_companies($l, $ranges));
}
# ==================================================================
# main
sub cs () {
companies()
}
perhaps_run_tests "main" or do {
usage unless @ARGV == 1;
# let the user play with the data
repl;
};